RedisAI · lantiga · Feb 24, 2020 · Feb 20, 2020 · Feb 21, 2020 · Feb 21, 2020
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -126,6 +126,7 @@ jobs:
           command: |
             mkdir -p ~/workspace/tests
             docker run --gpus all -v $HOME/workspace/tests:/build/test/logs -it --rm redisai-gpu:latest-x64-bionic-test
+          no_output_timeout: 30m
       - store_test_results:
           path: ~/workspace/tests
   deploy_package:

diff --git a/opt/Makefile b/opt/Makefile
@@ -55,7 +55,7 @@ BINDIR=$(BINROOT)/src
 # INSTALL_DIR=$(ROOT)/install-$(DEVICE)
 DEPS_DIR=$(ROOT)/deps/$(OS)-$(ARCH)-$(DEVICE)
 INSTALL_DIR=$(ROOT)/bin/$(OS)-$(ARCH)-$(DEVICE)/install
-REDIS_VALGRID_SUPRESS=./redis_valgrind.sup 
+REDIS_VALGRID_SUPRESS=$(ROOT)/opt/redis_valgrind.sup
 TARGET=$(BINDIR)/redisai.so
 
 BACKENDS_PATH ?= $(INSTALL_DIR)/backends
@@ -147,22 +147,23 @@ ifeq ($(VERBOSE),1)
 TEST_ARGS += -v
 endif
 ifeq ($(TEST),)
-TEST=basic_tests.py
+TEST=
 PYDEBUG=
 else
-TEST_ARGS += -s
+TEST_ARGS += -s --test $(TEST)
 PYDEBUG=1
 endif
 
-TEST_PREFIX=set -e; cd $(ROOT)/test
-TEST_CMD=\
-	DEVICE=$(DEVICE) PYDEBUG=$(PYDEBUG) \
-	python3 -m RLTest $(TEST_ARGS) --test $(TEST) --module $(INSTALL_DIR)/redisai.so
-
 GEN ?= 1
 SLAVES ?= 1
 AOF ?= 1
 
+TEST_PREFIX=set -e; cd $(ROOT)/test
+# TODO: --errors-for-leak-kinds=definite
+VALGRIND_OPTIONS="--leak-check=full -q --show-reachable=no --show-possibly-lost=no"
+TEST_CMD= DEVICE=$(DEVICE) PYDEBUG=$(PYDEBUG) python3 -m RLTest $(TEST_ARGS) --module $(INSTALL_DIR)/redisai.so
+VALGRIND_TEST_CMD= DEVICE=$(DEVICE) PYDEBUG=$(PYDEBUG) python3 -m RLTest $(TEST_ARGS) --module $(INSTALL_DIR)/redisai.so --no-output-catch --use-valgrind --vg-no-fail-on-errors --vg-verbose  --vg-options $(VALGRIND_OPTIONS) --vg-suppressions $(realpath $(REDIS_VALGRID_SUPRESS))
+
 test:
 ifneq ($(NO_LFS),1)
 	$(SHOW)if [ "$(git lfs env > /dev/null 2>&1 ; echo $?)" != "0" ]; then cd $(ROOT); git lfs install; fi
@@ -179,6 +180,10 @@ ifeq ($(SLAVES),1)
 	$(SHOW)$(TEST_PREFIX); printf "\nTests with --use-slaves:\n\n" ;\
 	$(TEST_CMD) --use-slaves
 endif
+ifeq ($(VALGRIND),1)
+	$(SHOW)$(TEST_PREFIX); printf "\nTests with valgrind:\n\n" ;\
+	$(VALGRIND_TEST_CMD) 
+endif
 
 #----------------------------------------------------------------------------------------------
 
@@ -192,10 +197,7 @@ MODULE_ARGS=\
 	TF redisai_tensorflow.so 
 
 VALGRIND_ARGS=\
-	--leak-check=full \
-	--show-reachable=no \
-	--show-possibly-lost=no \
-	--leak-check=full \
+	$(VALGRIND_OPTIONS) \
 	--suppressions=$(realpath $(REDIS_VALGRID_SUPRESS)) \
 	-v redis-server --protected-mode no --save "" --appendonly no
 

diff --git a/opt/redis_valgrind.sup b/opt/redis_valgrind.sup
@@ -1,3 +1,31 @@
+{
+   ignore_unversioned_libs
+   Memcheck:Leak
+   ...
+   obj:*/libtensorflow.so.*
+}
+
+{
+   ignore_unversioned_libs
+   Memcheck:Leak
+   ...
+   obj:*/libonnxruntime.so.*
+}
+
+{
+   ignore_unversioned_libs
+   Memcheck:Leak
+   ...
+   obj:*/libtorch.so.*
+}
+
+{
+   ignore_unversioned_libs
+   Memcheck:Leak
+   ...
+   obj:*/libtorch.so*
+}
+
 {
    <lzf_unitialized_hash_table>
    Memcheck:Cond

diff --git a/src/backends/torch.c b/src/backends/torch.c
@@ -61,6 +61,9 @@ RAI_Model *RAI_ModelCreateTorch(RAI_Backend backend, const char* devicestr,
 }
 
 void RAI_ModelFreeTorch(RAI_Model* model, RAI_Error *error) {
+  if(model->devicestr){
+    RedisModule_Free(model->devicestr);
+  }
   torchDeallocContext(model->model);
 }
 

diff --git a/src/redisai.c b/src/redisai.c
@@ -831,9 +831,9 @@ void RedisAI_ReplicateTensorSet(RedisModuleCtx *ctx, RedisModuleString *key, RAI
   RedisModule_Replicate(ctx, "AI.TENSORSET", "scvcb", key, dtypestr,
                         dims, ndims, "BLOB", data, size);
 
-  // for (long long i=0; i<ndims; i++) {
-  //   RedisModule_Free(dims[i]);
-  // }
+  for (long long i=0; i<ndims; i++) {
+    RedisModule_FreeString(ctx,dims[i]);
+  }
 
   RedisModule_Free(dtypestr);
 }

diff --git a/test/__init__.py b/test/__init__.py
diff --git a/test/includes.py b/test/includes.py
@@ -0,0 +1,110 @@
+import json
+import os
+import random
+import sys
+import time
+from multiprocessing import Process
+
+import numpy as np
+from skimage.io import imread
+from skimage.transform import resize
+
+try:
+    sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../deps/readies"))
+    import paella
+except:
+    pass
+
+TEST_TF = os.environ.get("TEST_TF") != "0" and os.environ.get("WITH_TF") != "0"
+TEST_TFLITE = os.environ.get("TEST_TFLITE") != "0" and os.environ.get("WITH_TFLITE") != "0"
+TEST_PT = os.environ.get("TEST_PT") != "0" and os.environ.get("WITH_PT") != "0"
+TEST_ONNX = os.environ.get("TEST_ONNX") != "0" and os.environ.get("WITH_ORT") != "0"
+DEVICE = os.environ.get('DEVICE', 'CPU').upper().encode('utf-8', 'ignore').decode('utf-8')
+VALGRIND = os.environ.get("VALGRIND") == "1"
+print(f"Running tests on {DEVICE}\n")
+
+
+def ensureSlaveSynced(con, env, timeout_ms=5000):
+    if env.useSlaves:
+        # When WAIT returns, all the previous write commands
+        # sent in the context of the current connection are
+        # guaranteed to be received by the number of replicas returned by WAIT.
+        wait_reply = con.execute_command('WAIT', '1', timeout_ms)
+        number_replicas = 0
+        try:
+            number_replicas = int(wait_reply)
+        # does not contain anything convertible to int
+        except ValueError as verr:
+            pass
+        # Exception occurred while converting to int
+        except Exception as ex:
+            pass
+        env.assertTrue(number_replicas >= 1)
+
+
+# Ensures command is sent and forced disconnect
+# after without waiting for the reply to be parsed
+# Usefull for checking behaviour of commands
+# that are run with background threads
+def send_and_disconnect(cmd, red):
+    pool = red.connection_pool
+    con = pool.get_connection(cmd[0])
+    ret = con.send_command(*cmd)
+    con.disconnect()
+    return ret
+
+
+def check_cuda():
+    return os.system('which nvcc')
+
+
+def info_to_dict(info):
+    info = [el.decode('utf-8') if type(el) is bytes else el for el in info]
+    return dict(zip(info[::2], info[1::2]))
+
+
+def load_mobilenet_test_data():
+    test_data_path = os.path.join(os.path.dirname(__file__), 'test_data')
+    labels_filename = os.path.join(test_data_path, 'imagenet_class_index.json')
+    image_filename = os.path.join(test_data_path, 'panda.jpg')
+    model_filename = os.path.join(test_data_path, 'mobilenet_v2_1.4_224_frozen.pb')
+
+    with open(model_filename, 'rb') as f:
+        model_pb = f.read()
+
+    with open(labels_filename, 'r') as f:
+        labels = json.load(f)
+
+    img_height, img_width = 224, 224
+
+    img = imread(image_filename)
+    img = resize(img, (img_height, img_width), mode='constant', anti_aliasing=True)
+    img = img.astype(np.float32)
+
+    return model_pb, labels, img
+
+
+def run_mobilenet(con, img, input_var, output_var):
+    time.sleep(0.5 * random.randint(0, 10))
+    con.execute_command('AI.TENSORSET', 'input',
+                        'FLOAT', 1, img.shape[1], img.shape[0], img.shape[2],
+                        'BLOB', img.tobytes())
+
+    con.execute_command('AI.MODELRUN', 'mobilenet',
+                        'INPUTS', 'input', 'OUTPUTS', 'output')
+
+
+def run_test_multiproc(env, n_procs, fn, args=tuple()):
+    procs = []
+
+    def tmpfn():
+        con = env.getConnection()
+        fn(con, *args)
+        return 1
+
+    for _ in range(n_procs):
+        p = Process(target=tmpfn)
+        p.start()
+        procs.append(p)
+
+    [p.join() for p in procs]