RedisAI
diff --git a/‎docs/commands.md
Lines changed: 11 additions & 1 deletion b/‎docs/commands.md
Lines changed: 11 additions & 1 deletion
diff --git a/‎src/backends/onnxruntime.c
Lines changed: 47 additions & 5 deletions b/‎src/backends/onnxruntime.c
Lines changed: 47 additions & 5 deletions
diff --git a/‎src/backends/tensorflow.c
Lines changed: 28 additions & 5 deletions b/‎src/backends/tensorflow.c
Lines changed: 28 additions & 5 deletions
diff --git a/‎src/backends/torch.c
Lines changed: 5 additions & 1 deletion b/‎src/backends/torch.c
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/config.c
Lines changed: 61 additions & 20 deletions b/‎src/config.c
Lines changed: 61 additions & 20 deletions
@@ -174,6 +174,7 @@ _Arguments_
 * **device**: the device that will execute the model can be of:
     * **CPU**: a CPU device
     * **GPU**: a GPU device
+    * **GPU:0**, ..., **GPU:n**: a specific GPU device on a multi-GPU system
 * **TAG**: an optional string for tagging the model such as a version number or any arbitrary identifier
 * **BATCHSIZE**: when provided with an `n` that is greater than 0, the engine will batch incoming requests from multiple clients that use the model with input tensors of the same shape. When `AI.MODELRUN` is called the requests queue is visited and input tensors from compatible requests are concatenated along the 0th (batch) dimension until `n` is exceeded. The model is then run for the entire batch and the results are unpacked back to the individual requests unblocking their respective clients. If the batch size of the inputs to of first request in the queue exceeds `BATCHSIZE`, the request is served immediately (default value: 0).
 * **MINBATCHSIZE**: when provided with an `m` that is greater than 0, the engine will postpone calls to `AI.MODELRUN` until the batch's size had reached `m`. This is primarily used to force batching during testing, but it can also be used under normal operation. In this case, note that requests for which `m` is not reached will hang indefinitely (default value: 0).
@@ -220,7 +221,7 @@ An array of alternating key-value pairs as follows:
 1. **MINBATCHSIZE**: The minimum size of any batch of incoming requests.
 1. **INPUTS**: array reply with one or more names of the model's input nodes (applicable only for TensorFlow models)
 1. **OUTPUTS**: array reply with one or more names of the model's output nodes (applicable only for TensorFlow models)
-1. **BLOB**: a blob containing the serialized model (when called with the `BLOB` argument) as a String
+1. **BLOB**: a blob containing the serialized model (when called with the `BLOB` argument) as a String. If the size of the serialized model exceeds `MODEL_CHUNK_SIZE` (see `AI.CONFIG` command), then an array of chunks is returned. The full serialized model can be obtained by concatenating the chunks.
 
 **Examples**
 
@@ -361,6 +362,7 @@ _Arguments_
 * **device**: the device that will execute the model can be of:
     * **CPU**: a CPU device
     * **GPU**: a GPU device
+    * **GPU:0**, ..., **GPU:n**: a specific GPU device on a multi-GPU system
 * **script**: a string containing [TorchScript](https://pytorch.org/docs/stable/jit.html) source code
 
 _Return_
@@ -719,6 +721,7 @@ _Arguments_
     * **TFLITE**: The TensorFlow Lite backend
     * **TORCH**: The PyTorch backend
     * **ONNX**: ONNXRuntime backend
+* **MODEL_CHUNK_SIZE**: Sets the size of chunks (in bytes) in which model payloads are split for serialization, replication and `MODELGET`. Default is `511 * 1024 * 1024`.
 
 _Return_
 
@@ -746,3 +749,10 @@ This loads the PyTorch backend with a full path:
 redis> AI.CONFIG LOADBACKEND TORCH /usr/lib/redis/modules/redisai/backends/redisai_torch/redisai_torch.so
 OK
 ```
+
+This sets model chunk size to one megabyte (not recommended):
+
+```
+redis> AI.CONFIG MODEL_CHUNK_SIZE 1048576
+OK
+```
@@ -163,7 +163,7 @@ OrtValue* RAI_OrtValueFromTensors(RAI_Tensor** ts, size_t count, RAI_Error *erro
   return NULL;
 }
 
-RAI_Tensor* RAI_TensorCreateFromOrtValue(OrtValue* v, size_t batch_offset, size_t batch_size, RAI_Error *error) {
+RAI_Tensor* RAI_TensorCreateFromOrtValue(OrtValue* v, size_t batch_offset, long long batch_size, RAI_Error *error) {
   OrtStatus* status = NULL;
   const OrtApi* ort = OrtGetApiBase()->GetApi(1);
 
@@ -206,6 +206,7 @@ RAI_Tensor* RAI_TensorCreateFromOrtValue(OrtValue* v, size_t batch_offset, size_
     if (status != NULL) goto error;
 
     int64_t total_batch_size = dims[0];
+    total_batch_size = total_batch_size > 0 ? total_batch_size : 1;
 
     shape = RedisModule_Calloc(ndims, sizeof(*shape));
     strides = RedisModule_Calloc(ndims, sizeof(*strides));
@@ -214,7 +215,12 @@ RAI_Tensor* RAI_TensorCreateFromOrtValue(OrtValue* v, size_t batch_offset, size_
       shape[i] = dims[i];
       strides[i] = 1;
     }
-    shape[0] = batch_size;
+    if (batch_size != -1) {
+      shape[0] = batch_size;
+    }
+    else {
+      batch_size = total_batch_size;
+    }
     for (int64_t i = ndims - 2; i >= 0; --i)
     {
       strides[i] *= strides[i + 1] * shape[i + 1];
@@ -411,9 +417,11 @@ int RAI_ModelRunORT(RAI_ModelRunCtx **mctxs, RAI_Error *error)
 
   size_t batch_sizes[nbatches];
   size_t batch_offsets[nbatches];
+  size_t total_batch_size = 0;
   if (array_len(mctxs[0]->inputs) > 0) {
     for (size_t b=0; b<nbatches; ++b) {
       batch_sizes[b] = RAI_TensorDim(mctxs[b]->inputs[0].tensor, 0);
+      total_batch_size += batch_sizes[b];
     }
     batch_offsets[0] = 0;
     for (size_t b=1; b<nbatches; ++b) {
@@ -529,14 +537,48 @@ int RAI_ModelRunORT(RAI_ModelRunCtx **mctxs, RAI_Error *error)
     }
 
     for (size_t i = 0; i < n_output_nodes; i++) {
-      for (size_t b=0; b<nbatches; b++) {
-        RAI_Tensor* output_tensor = RAI_TensorCreateFromOrtValue(outputs[i], batch_offsets[b], batch_sizes[b], error);
+      if (nbatches > 1) {
+        OrtTensorTypeAndShapeInfo* info;
+        status = ort->GetTensorTypeAndShape(outputs[i], &info);
+        if (status != NULL) goto error;
+
+        size_t ndims;
+        status = ort->GetDimensionsCount(info, &ndims);
+        if (status != NULL) goto error;
+
+        int64_t dims[ndims];
+        status = ort->GetDimensions(info, dims, ndims);
+        if (status != NULL) goto error;
+
+        if (dims[0] != total_batch_size) {
+          RAI_SetError(error, RAI_EMODELRUN, "ERR Model did not generate the expected batch size");
+          ort->ReleaseStatus(status);
+          return 1;
+        }
+
+        for (size_t b=0; b<nbatches; b++) {
+          RAI_Tensor* output_tensor = RAI_TensorCreateFromOrtValue(outputs[i], batch_offsets[b], batch_sizes[b], error);
+          if (error->code != RAI_OK) {
+            ort->ReleaseStatus(status);
+            return 1;
+          }
+          if (output_tensor) {
+            mctxs[b]->outputs[i].tensor = RAI_TensorGetShallowCopy(output_tensor);
+            RAI_TensorFree(output_tensor);
+          }
+          else {
+            printf("ERR: non-tensor output from ONNX models, ignoring (currently unsupported)");
+          }
+        }
+      }
+      else {
+        RAI_Tensor* output_tensor = RAI_TensorCreateFromOrtValue(outputs[i], 0, -1, error);
         if (error->code != RAI_OK) {
           ort->ReleaseStatus(status);
           return 1;
         }
         if (output_tensor) {
-          mctxs[b]->outputs[i].tensor = RAI_TensorGetShallowCopy(output_tensor);
+          mctxs[0]->outputs[i].tensor = RAI_TensorGetShallowCopy(output_tensor);
           RAI_TensorFree(output_tensor);
         }
         else {
 
@@ -79,7 +79,7 @@ DLDataType RAI_GetDLDataTypeFromTF(TF_DataType dtype) {
   return (DLDataType){ .bits = 0 };
 }
 
-RAI_Tensor* RAI_TensorCreateFromTFTensor(TF_Tensor *tensor, size_t batch_offset, size_t batch_size) {
+RAI_Tensor* RAI_TensorCreateFromTFTensor(TF_Tensor *tensor, size_t batch_offset, long long batch_size) {
   RAI_Tensor* ret = RedisModule_Calloc(1, sizeof(*ret));
 
   DLContext ctx = (DLContext){
@@ -89,15 +89,21 @@ RAI_Tensor* RAI_TensorCreateFromTFTensor(TF_Tensor *tensor, size_t batch_offset,
 
   const size_t ndims = TF_NumDims(tensor);
 
-  const int64_t total_batch_size = TF_Dim(tensor, 0);
+  int64_t total_batch_size = TF_Dim(tensor, 0);
+  total_batch_size = total_batch_size > 0 ? total_batch_size : 1;
 
   int64_t* shape = RedisModule_Calloc(ndims, sizeof(*shape));
   int64_t* strides = RedisModule_Calloc(ndims, sizeof(*strides));
   for (int64_t i = 0 ; i < ndims ; ++i) {
     shape[i] = TF_Dim(tensor, i);
     strides[i] = 1;
   }
-  shape[0] = batch_size;
+  if (batch_size != -1) {
+    shape[0] = batch_size;
+  }
+  else {
+    batch_size = total_batch_size;
+  }
   for (int64_t i = ndims-2 ; i >= 0 ; --i) {
     strides[i] *= strides[i+1] * shape[i+1];
   }
@@ -475,9 +481,11 @@ int RAI_ModelRunTF(RAI_ModelRunCtx** mctxs, RAI_Error *error) {
 
   size_t batch_sizes[nbatches];
   size_t batch_offsets[nbatches];
+  size_t total_batch_size = 0;
   if (ninputs > 0) {
     for (size_t b=0; b<nbatches; ++b) {
       batch_sizes[b] = RAI_TensorDim(mctxs[b]->inputs[0].tensor, 0);
+      total_batch_size += batch_sizes[b];
     }
     batch_offsets[0] = 0;
     for (size_t b=1; b<nbatches; ++b) {
@@ -531,8 +539,23 @@ int RAI_ModelRunTF(RAI_ModelRunCtx** mctxs, RAI_Error *error) {
   }
 
   for(size_t i=0; i<noutputs; ++i) {
-    for (size_t b=0; b<nbatches; b++) {
-      mctxs[b]->outputs[i].tensor = RAI_TensorCreateFromTFTensor(outputTensorsValues[i], batch_offsets[b], batch_sizes[b]);
+    if (nbatches > 1) {
+      if (TF_NumDims(outputTensorsValues[i]) == 0) {
+        continue;
+      }
+      if (TF_Dim(outputTensorsValues[i], 0) != total_batch_size) {
+        TF_DeleteTensor(outputTensorsValues[i]);
+        TF_DeleteStatus(status);
+        RAI_SetError(error, RAI_EMODELRUN, "ERR Model did not generate the expected batch size");
+        return 1;
+      }
+
+      for (size_t b=0; b<nbatches; b++) {
+        mctxs[b]->outputs[i].tensor = RAI_TensorCreateFromTFTensor(outputTensorsValues[i], batch_offsets[b], batch_sizes[b]);
+      }
+    }
+    else {
+      mctxs[0]->outputs[i].tensor = RAI_TensorCreateFromTFTensor(outputTensorsValues[i], 0, -1);
     }
     TF_DeleteTensor(outputTensorsValues[i]);
   }
 
@@ -93,9 +93,9 @@ int RAI_ModelRunTorch(RAI_ModelRunCtx** mctxs, RAI_Error *error) {
 
   size_t batch_sizes[nbatches];
   size_t batch_offsets[nbatches];
+  size_t total_batch_size = 0;
 
   if (nbatches > 1) {
-    size_t total_batch_size = 0;
     if (array_len(mctxs[0]->inputs) > 0) {
       for (size_t b=0; b<nbatches; ++b) {
         batch_sizes[b] = RAI_TensorDim(mctxs[b]->inputs[0].tensor, 0);
@@ -147,6 +147,10 @@ int RAI_ModelRunTorch(RAI_ModelRunCtx** mctxs, RAI_Error *error) {
     }
     RAI_Tensor* output_tensor = RAI_TensorCreateFromDLTensor(outputs_dl[i]);
     if (nbatches > 1) {
+      if (outputs_dl[i]->dl_tensor.shape[0] != total_batch_size) {
+        RAI_SetError(error, RAI_EMODELRUN, "ERR Model did not generate the expected batch size");
+        return 1;
+      }
       for (size_t b=0; b<nbatches; b++) {
         mctxs[b]->outputs[i].tensor = RAI_TensorCreateBySlicingTensor(output_tensor, batch_offsets[b], batch_sizes[b]);
       }
 
@@ -20,6 +20,7 @@ long long backends_intra_op_parallelism;  //  number of threads used within an
 long long
     backends_inter_op_parallelism;  //  number of threads used for parallelism
                                     //  between independent operations.
+long long model_chunk_size;  // size of chunks used to break up model payloads.
 
 /**
  *
@@ -69,6 +70,30 @@ int setBackendsIntraOpParallelism(long long num_threads) {
   return result;
 }
 
+/**
+ * @return size of chunks (in bytes) in which models are split for
+ * set, get, serialization and replication.
+ */
+long long getModelChunkSize() {
+  return model_chunk_size;
+}
+
+/**
+ * Set size of chunks (in bytes) in which models are split for set,
+ * get, serialization and replication.
+ *
+ * @param size
+ * @return 0 on success, or 1  if failed
+ */
+int setModelChunkSize(long long size) {
+  int result = 1;
+  if (size > 0) {
+    model_chunk_size = size;
+    result = 0;
+  }
+  return result;
+}
+
 /**
  * Helper method for AI.CONFIG LOADBACKEND <backend_identifier>
  * <location_of_backend_library>
@@ -175,6 +200,26 @@ int RedisAI_Config_IntraOperationParallelism(
   return result;
 }
 
+/**
+ * Set size of chunks in which model payloads are split for set,
+ * get, serialization and replication.
+ *
+ * @param chunk_size_string string containing chunk size (in bytes)
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if failed
+ */
+int RedisAI_Config_ModelChunkSize(RedisModuleString *chunk_size_string) {
+  long long temp;
+  int result = RedisModule_StringToLongLong(chunk_size_string, &temp);
+  // make sure chunk size is a positive integer
+  // if not set the value to the default
+  if (result == REDISMODULE_OK && temp < 1) {
+    temp = REDISAI_DEFAULT_MODEL_CHUNK_SIZE;
+    result = REDISMODULE_ERR;
+  }
+  result = setModelChunkSize(temp);
+  return result;
+}
+
 /**
  *
  * @param ctx Context in which Redis modules operate
@@ -199,34 +244,30 @@ int RAI_configParamParse(RedisModuleCtx *ctx, const char *key,
   else if (strcasecmp((key), "THREADS_PER_QUEUE") == 0) {
     ret = RedisAI_Config_QueueThreads(rsval);
     if (ret == REDISMODULE_OK) {
-      char *buffer = RedisModule_Alloc(
-          (3 + strlen(REDISAI_INFOMSG_THREADS_PER_QUEUE) + strlen((val))) *
-          sizeof(*buffer));
-      sprintf(buffer, "%s: %s", REDISAI_INFOMSG_THREADS_PER_QUEUE, (val));
-      RedisModule_Log(ctx, "notice", buffer);
-      RedisModule_Free(buffer);
+      RedisModule_Log(ctx, "notice", "%s: %s",
+                      REDISAI_INFOMSG_THREADS_PER_QUEUE,
+                      (val));
     }
   } else if (strcasecmp((key), "INTRA_OP_PARALLELISM") == 0) {
     ret = RedisAI_Config_IntraOperationParallelism(rsval);
     if (ret == REDISMODULE_OK) {
-      char *buffer = RedisModule_Alloc(
-          (3 + strlen(REDISAI_INFOMSG_INTRA_OP_PARALLELISM) + strlen((val))) *
-          sizeof(*buffer));
-      sprintf(buffer, "%s: %lld", REDISAI_INFOMSG_INTRA_OP_PARALLELISM,
-              getBackendsIntraOpParallelism());
-      RedisModule_Log(ctx, "notice", buffer);
-      RedisModule_Free(buffer);
+      RedisModule_Log(ctx, "notice", "%s: %lld",
+                      REDISAI_INFOMSG_INTRA_OP_PARALLELISM,
+                      getBackendsIntraOpParallelism());
     }
   } else if (strcasecmp((key), "INTER_OP_PARALLELISM") == 0) {
     ret = RedisAI_Config_InterOperationParallelism(rsval);
     if (ret == REDISMODULE_OK) {
-      char *buffer = RedisModule_Alloc(
-          (3 + strlen(REDISAI_INFOMSG_INTER_OP_PARALLELISM) + strlen((val))) *
-          sizeof(*buffer));
-      sprintf(buffer, "%s: %lld", REDISAI_INFOMSG_INTER_OP_PARALLELISM,
-              getBackendsInterOpParallelism());
-      RedisModule_Log(ctx, "notice", buffer);
-      RedisModule_Free(buffer);
+      RedisModule_Log(ctx, "notice", "%s: %lld",
+                      REDISAI_INFOMSG_INTER_OP_PARALLELISM,
+                      getBackendsInterOpParallelism());
+    }
+  } else if (strcasecmp((key), "MODEL_CHUNK_SIZE") == 0) {
+    ret = RedisAI_Config_ModelChunkSize(rsval);
+    if (ret == REDISMODULE_OK) {
+      RedisModule_Log(ctx, "notice", "%s: %lld",
+                      REDISAI_INFOMSG_MODEL_CHUNK_SIZE,
+                      getModelChunkSize());
     }
   } else if (strcasecmp((key), "BACKENDSPATH") == 0) {
     // already taken care of