Skip to content

Commit fab4d3a

Browse files
committed
Make TF tests work
1 parent 087132e commit fab4d3a

File tree

6 files changed

+236
-81
lines changed

6 files changed

+236
-81
lines changed

src/backends/onnxruntime.c

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -358,22 +358,19 @@ int RAI_ModelRunORT(RAI_ModelRunCtx *mctx, RAI_Error *error)
358358

359359
OrtAllocator *allocator;
360360
status = ort->GetAllocatorWithDefaultOptions(&allocator);
361-
if (status != NULL)
362-
{
361+
if (status != NULL) {
363362
goto error;
364363
}
365364

366365
size_t n_input_nodes;
367366
status = ort->SessionGetInputCount(session, &n_input_nodes);
368-
if (status != NULL)
369-
{
367+
if (status != NULL) {
370368
goto error;
371369
}
372370

373371
size_t n_output_nodes;
374372
status = ort->SessionGetOutputCount(session, &n_output_nodes);
375-
if (status != NULL)
376-
{
373+
if (status != NULL) {
377374
goto error;
378375
}
379376

@@ -387,37 +384,31 @@ int RAI_ModelRunORT(RAI_ModelRunCtx *mctx, RAI_Error *error)
387384
size_t ninputs = array_len(mctx->inputs);
388385
size_t noutputs = array_len(mctx->outputs);
389386

390-
if (ninputs != n_input_nodes)
391-
{
392-
387+
if (ninputs != n_input_nodes) {
393388
char msg[70];
394389
sprintf(msg, "Expected %li inputs but got %li", n_input_nodes, ninputs);
395390
RAI_SetError(error, RAI_EMODELRUN, msg);
396391
return 1;
397392
}
398393

399-
if (noutputs != n_output_nodes)
400-
{
394+
if (noutputs != n_output_nodes) {
401395
char msg[70];
402396
sprintf(msg, "Expected %li outputs but got %li", n_output_nodes, noutputs);
403397
RAI_SetError(error, RAI_EMODELRUN, msg);
404398
return 1;
405399
}
406400

407-
for (size_t i = 0; i < n_input_nodes; i++)
408-
{
401+
for (size_t i = 0; i < n_input_nodes; i++) {
409402
char *input_name;
410403
status = ort->SessionGetInputName(session, i, allocator, &input_name);
411-
if (status != NULL)
412-
{
404+
if (status != NULL) {
413405
goto error;
414406
}
415407

416408
input_names[i] = input_name;
417409

418410
inputs[i] = RAI_OrtValueFromTensor(mctx->inputs[i].tensor, error);
419-
if (error->code != RAI_OK)
420-
{
411+
if (error->code != RAI_OK) {
421412
ort->ReleaseStatus(status);
422413
return 1;
423414
}
@@ -442,12 +433,10 @@ int RAI_ModelRunORT(RAI_ModelRunCtx *mctx, RAI_Error *error)
442433
#endif
443434
}
444435

445-
for (size_t i = 0; i < n_output_nodes; i++)
446-
{
436+
for (size_t i = 0; i < n_output_nodes; i++) {
447437
char *output_name;
448438
status = ort->SessionGetOutputName(session, i, allocator, &output_name);
449-
if (status != NULL)
450-
{
439+
if (status != NULL) {
451440
goto error;
452441
}
453442

@@ -463,33 +452,27 @@ int RAI_ModelRunORT(RAI_ModelRunCtx *mctx, RAI_Error *error)
463452
status = ort->Run(session, run_options, input_names, (const OrtValue *const *)inputs,
464453
n_input_nodes, output_names, n_output_nodes, outputs);
465454

466-
if (status)
467-
{
455+
if (status) {
468456
goto error;
469457
}
470458

471-
for (size_t i = 0; i < n_output_nodes; i++)
472-
{
459+
for (size_t i = 0; i < n_output_nodes; i++) {
473460
RAI_Tensor *output_tensor = RAI_TensorCreateFromOrtValue(outputs[i], error);
474-
if (error->code != RAI_OK)
475-
{
461+
if (error->code != RAI_OK) {
476462
ort->ReleaseStatus(status);
477463
return 1;
478464
}
479-
if (output_tensor)
480-
{
465+
if (output_tensor) {
481466
mctx->outputs[i].tensor = RAI_TensorGetShallowCopy(output_tensor);
482467
RAI_TensorFree(output_tensor);
483468
}
484-
else
485-
{
469+
else {
486470
printf("ERR: non-tensor output from ONNX models, ignoring (currently unsupported).\n");
487471
}
488472
ort->ReleaseValue(outputs[i]);
489473
}
490474

491-
for (size_t i = 0; i < n_input_nodes; i++)
492-
{
475+
for (size_t i = 0; i < n_input_nodes; i++) {
493476
ort->ReleaseValue(inputs[i]);
494477
}
495478

src/backends/tensorflow.c

Lines changed: 102 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ DLDataType RAI_GetDLDataTypeFromTF(TF_DataType dtype) {
7878
return (DLDataType){ .bits = 0 };
7979
}
8080

81-
RAI_Tensor* RAI_TensorCreateFromTFTensor(TF_Tensor *tensor) {
81+
RAI_Tensor* RAI_TensorCreateFromTFTensor(TF_Tensor *tensor, size_t batch_offset, size_t batch_size) {
8282
RAI_Tensor* ret = RedisModule_Calloc(1, sizeof(*ret));
8383

8484
DLContext ctx = (DLContext){
@@ -88,25 +88,30 @@ RAI_Tensor* RAI_TensorCreateFromTFTensor(TF_Tensor *tensor) {
8888

8989
size_t ndims = TF_NumDims(tensor);
9090

91+
int64_t total_batch_size = TF_Dim(tensor, 0);
92+
9193
int64_t* shape = RedisModule_Calloc(ndims, sizeof(*shape));
9294
int64_t* strides = RedisModule_Calloc(ndims, sizeof(*strides));
9395
for (int64_t i = 0 ; i < ndims ; ++i) {
9496
shape[i] = TF_Dim(tensor, i);
9597
strides[i] = 1;
9698
}
99+
shape[0] = batch_size;
97100
for (int64_t i = ndims-2 ; i >= 0 ; --i) {
98101
strides[i] *= strides[i+1] * shape[i+1];
99102
}
100103

104+
size_t sample_bytesize = TF_TensorByteSize(tensor) / total_batch_size;
105+
101106
// FIXME: In TF, RunSession allocates memory for output tensors
102107
// This means that we either memcpy the tensor data and let
103108
// Redis be responsible for the memory, or we reuse the TF
104109
// allocated memory, which might not be optimal down the road
105110
// Note: on YOLO this has no impact on perf
106111
#ifdef RAI_COPY_RUN_OUTPUT
107-
size_t len = TF_TensorByteSize(tensor);
112+
size_t len = sample_bytesize * batch_size;
108113
char* data = RedisModule_Calloc(len, sizeof(*data));
109-
memcpy(data, TF_TensorData(tensor), len);
114+
memcpy(data, TF_TensorData(tensor) + sample_bytesize * batch_offset, len);
110115
#endif
111116

112117
// TODO: use manager_ctx to ensure TF tensor doesn't get deallocated
@@ -160,6 +165,64 @@ TF_Tensor* RAI_TFTensorFromTensor(RAI_Tensor* t){
160165
#endif /* RAI_COPY_RUN_INPUT */
161166
}
162167

168+
TF_Tensor* RAI_TFTensorFromTensors(RAI_Tensor** ts, size_t count){
169+
170+
if (count == 0) {
171+
return NULL;
172+
}
173+
174+
size_t batch_size = 0;
175+
176+
for (size_t i=0; i<count; i++) {
177+
batch_size += ts[i]->tensor.dl_tensor.shape[0];
178+
}
179+
180+
RAI_Tensor* t0 = ts[0];
181+
182+
int ndim = t0->tensor.dl_tensor.ndim;
183+
int64_t batched_shape[ndim];
184+
185+
for (size_t i=0; i<ndim; i++) {
186+
batched_shape[i] = t0->tensor.dl_tensor.shape[i];
187+
}
188+
189+
batched_shape[0] = batch_size;
190+
191+
TF_Tensor* out = TF_AllocateTensor(
192+
RAI_GetTFDataTypeFromDL(t0->tensor.dl_tensor.dtype),
193+
batched_shape,
194+
t0->tensor.dl_tensor.ndim,
195+
RAI_TensorByteSize(t0));
196+
197+
size_t offset = 0;
198+
for (size_t i=0; i<count; i++) {
199+
size_t tbytesize = RAI_TensorByteSize(ts[i]);
200+
memcpy(TF_TensorData(out) + offset, ts[i]->tensor.dl_tensor.data, tbytesize);
201+
offset += tbytesize;
202+
}
203+
204+
return out;
205+
206+
// #ifdef RAI_COPY_RUN_INPUT
207+
// TF_Tensor* out = TF_AllocateTensor(
208+
// RAI_GetTFDataTypeFromDL(t->tensor.dl_tensor.dtype),
209+
// t->tensor.dl_tensor.shape,
210+
// t->tensor.dl_tensor.ndim,
211+
// RAI_TensorByteSize(t));
212+
// memcpy(TF_TensorData(out), t->tensor.dl_tensor.data, TF_TensorByteSize(out));
213+
// return out;
214+
// #else
215+
// return TF_NewTensor(
216+
// RAI_GetTFDataTypeFromDL(t->tensor.dl_tensor.dtype),
217+
// t->tensor.dl_tensor.shape,
218+
// t->tensor.dl_tensor.ndim,
219+
// t->tensor.dl_tensor.data,
220+
// RAI_TensorByteSize(t),
221+
// &RAI_TFDeallocator,
222+
// NULL);
223+
// #endif /* RAI_COPY_RUN_INPUT */
224+
}
225+
163226

164227
RAI_Model *RAI_ModelCreateTF(RAI_Backend backend, const char* devicestr,
165228
size_t ninputs, const char **inputs,
@@ -359,17 +422,41 @@ void RAI_ModelFreeTF(RAI_Model* model, RAI_Error* error) {
359422

360423
int RAI_ModelRunTF(RAI_ModelRunCtx* mctx, RAI_Error *error) {
361424
TF_Status *status = TF_NewStatus();
362-
const size_t ninputs = array_len(mctx->inputs);
363-
const size_t noutputs = array_len(mctx->outputs);
425+
const size_t nbatches = array_len(mctx->batches);
426+
427+
if (nbatches == 0) {
428+
return 1;
429+
}
430+
431+
const size_t ninputs = array_len(mctx->batches[0].inputs);
432+
const size_t noutputs = array_len(mctx->batches[0].outputs);
364433
TF_Tensor* inputTensorsValues[ninputs];
365434
TF_Output inputs[ninputs];
366435
TF_Tensor* outputTensorsValues[noutputs];
367436
TF_Output outputs[noutputs];
368437

369-
for (size_t i=0 ; i<ninputs; ++i) {
370-
inputTensorsValues[i] = RAI_TFTensorFromTensor(mctx->inputs[i].tensor);
438+
size_t batch_sizes[nbatches];
439+
size_t batch_offsets[nbatches];
440+
if (array_len(mctx->batches[0].inputs) > 0) {
441+
for (size_t b=0; b<nbatches; ++b) {
442+
batch_sizes[b] = RAI_TensorDim(mctx->batches[b].inputs[0].tensor, 0);
443+
}
444+
batch_offsets[0] = 0;
445+
for (size_t b=1; b<nbatches; ++b) {
446+
batch_offsets[b] = batch_sizes[b-1];
447+
}
448+
}
449+
450+
for (size_t i=0; i<ninputs; ++i) {
451+
RAI_Tensor* batched_input_tensors[nbatches];
452+
453+
for (size_t b=0; b<nbatches; ++b) {
454+
batched_input_tensors[b] = mctx->batches[b].inputs[i].tensor;
455+
}
456+
// inputTensorsValues[i] = RAI_TFTensorFromTensor(mctx->inputs[i].tensor);
457+
inputTensorsValues[i] = RAI_TFTensorFromTensors(batched_input_tensors, nbatches);
371458
TF_Output port;
372-
port.oper = TF_GraphOperationByName(mctx->model->model, mctx->inputs[i].name);
459+
port.oper = TF_GraphOperationByName(mctx->model->model, mctx->batches[0].inputs[i].name);
373460
port.index = 0;
374461
if(port.oper == NULL){
375462
return 1;
@@ -379,7 +466,7 @@ int RAI_ModelRunTF(RAI_ModelRunCtx* mctx, RAI_Error *error) {
379466

380467
for (size_t i=0 ; i<noutputs; ++i) {
381468
TF_Output port;
382-
port.oper = TF_GraphOperationByName(mctx->model->model, mctx->outputs[i].name);
469+
port.oper = TF_GraphOperationByName(mctx->model->model, mctx->batches[0].outputs[i].name);
383470
port.index = 0;
384471
if(port.oper == NULL){
385472
return 1;
@@ -406,11 +493,13 @@ int RAI_ModelRunTF(RAI_ModelRunCtx* mctx, RAI_Error *error) {
406493
return 1;
407494
}
408495

409-
for(size_t i = 0 ; i < noutputs ; ++i) {
410-
RAI_Tensor* output_tensor = RAI_TensorCreateFromTFTensor(outputTensorsValues[i]);
496+
for(size_t i=0; i<noutputs; ++i) {
497+
for (size_t b=0; b<nbatches; b++) {
498+
RAI_Tensor* output_tensor = RAI_TensorCreateFromTFTensor(outputTensorsValues[i], batch_offsets[b], batch_sizes[b]);
499+
mctx->batches[b].outputs[i].tensor = RAI_TensorGetShallowCopy(output_tensor);
500+
RAI_TensorFree(output_tensor);
501+
}
411502
TF_DeleteTensor(outputTensorsValues[i]);
412-
mctx->outputs[i].tensor = RAI_TensorGetShallowCopy(output_tensor);
413-
RAI_TensorFree(output_tensor);
414503
}
415504

416505
// TODO: add (make sure we deallocate once)

0 commit comments

Comments
 (0)