Skip to content

Commit bd9f462

Browse files
author
DvirDukhan
authored
Merge pull request #553 from RedisAI/fix_memory_leaks
Fix memory leaks
2 parents 8243e8d + 340435d commit bd9f462

File tree

22 files changed

+426
-592
lines changed

22 files changed

+426
-592
lines changed

opt/redis_valgrind.sup

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@
55
obj:*/libtensorflow.so.*
66
}
77

8+
{
9+
ignore_unversioned_libs
10+
Memcheck:Leak
11+
...
12+
obj:*/libtensorflow_framework.so.*
13+
}
14+
815
{
916
ignore_unversioned_libs
1017
Memcheck:Leak
@@ -54,17 +61,3 @@
5461
fun:RAI_LoadBackend
5562
}
5663

57-
{
58-
<tf-operator new>
59-
Memcheck:Leak
60-
...
61-
fun:clone
62-
}
63-
64-
{
65-
<malloc>
66-
Memcheck:Leak
67-
fun:malloc
68-
...
69-
fun:clone
70-
}

src/DAG/dag.c

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ static void Dag_LoadInputsToModelRunCtx(RedisAI_RunInfo *rinfo, RAI_DagOp *curre
9191

9292
static void Dag_StoreOutputsFromModelRunCtx(RedisAI_RunInfo *rinfo, RAI_DagOp *currentOp) {
9393

94-
RAI_ContextReadLock(rinfo);
94+
RAI_ContextWriteLock(rinfo);
9595
const size_t noutputs = RAI_ModelRunCtxNumOutputs(currentOp->mctx);
9696
for (size_t outputNumber = 0; outputNumber < noutputs; outputNumber++) {
9797
RAI_Tensor *tensor = RAI_ModelRunCtxOutputTensor(currentOp->mctx, outputNumber);
@@ -177,6 +177,9 @@ void RedisAI_BatchedDagRunSession_ModelRun_Step(RedisAI_RunInfo **batched_rinfo,
177177
if (rinfo->single_op_dag == 0)
178178
Dag_StoreOutputsFromModelRunCtx(rinfo, currentOp);
179179
}
180+
// Clear the result in case of an error.
181+
if (result == REDISMODULE_ERR)
182+
RAI_ClearError(&err);
180183
}
181184

182185
/**
@@ -346,16 +349,20 @@ int RAI_DagOpBatchable(RAI_DagOp *op1, RedisAI_RunInfo *rinfo1, RAI_DagOp *op2,
346349
return 1;
347350
}
348351

349-
int RedisAI_DagDeviceComplete(RedisAI_RunInfo *rinfo) {
352+
bool RedisAI_DagDeviceComplete(RedisAI_RunInfo *rinfo) {
350353
return rinfo->dagDeviceCompleteOpCount == rinfo->dagDeviceOpCount;
351354
}
352355

353-
int RedisAI_DagComplete(RedisAI_RunInfo *rinfo) {
356+
bool RedisAI_DagComplete(RedisAI_RunInfo *rinfo) {
354357
int completeOpCount = __atomic_load_n(rinfo->dagCompleteOpCount, __ATOMIC_RELAXED);
355358

356359
return completeOpCount == rinfo->dagOpCount;
357360
}
358361

362+
bool RedisAI_DagError(RedisAI_RunInfo *rinfo) {
363+
return __atomic_load_n(rinfo->dagError, __ATOMIC_RELAXED) != 0;
364+
}
365+
359366
RAI_DagOp *RedisAI_DagCurrentOp(RedisAI_RunInfo *rinfo) {
360367
if (rinfo->dagDeviceCompleteOpCount == rinfo->dagDeviceOpCount) {
361368
return NULL;
@@ -364,21 +371,21 @@ RAI_DagOp *RedisAI_DagCurrentOp(RedisAI_RunInfo *rinfo) {
364371
return rinfo->dagDeviceOps[rinfo->dagDeviceCompleteOpCount];
365372
}
366373

367-
void RedisAI_DagCurrentOpInfo(RedisAI_RunInfo *rinfo, int *currentOpReady,
368-
int *currentOpBatchable) {
374+
void RedisAI_DagCurrentOpInfo(RedisAI_RunInfo *rinfo, bool *currentOpReady,
375+
bool *currentOpBatchable) {
369376
RAI_DagOp *currentOp_ = RedisAI_DagCurrentOp(rinfo);
370377

371-
*currentOpReady = 0;
372-
*currentOpBatchable = 0;
378+
*currentOpReady = false;
379+
*currentOpBatchable = false;
373380

374381
if (currentOp_ == NULL) {
375382
return;
376383
}
377384

378385
if (currentOp_->mctx && currentOp_->mctx->model->opts.batchsize > 0) {
379-
*currentOpBatchable = 1;
386+
*currentOpBatchable = true;
380387
}
381-
*currentOpReady = 1;
388+
*currentOpReady = true;
382389
// If this is a single op dag, the op is definitely ready.
383390
if (rinfo->single_op_dag == 1)
384391
return;
@@ -389,7 +396,7 @@ void RedisAI_DagCurrentOpInfo(RedisAI_RunInfo *rinfo, int *currentOpReady,
389396
for (int i = 0; i < n_inkeys; i++) {
390397
if (AI_dictFind(rinfo->dagTensorsContext, currentOp_->inkeys[i]) == NULL) {
391398
RAI_ContextUnlock(rinfo);
392-
*currentOpReady = 0;
399+
*currentOpReady = false;
393400
return;
394401
}
395402
}
@@ -577,7 +584,6 @@ static void _ModelSingleOp_PersistTensors(RedisModuleCtx *ctx, RAI_DagOp *op) {
577584
const size_t noutputs = RAI_ModelRunCtxNumOutputs(op->mctx);
578585
for (size_t outputNumber = 0; outputNumber < noutputs; outputNumber++) {
579586
RAI_Tensor *tensor = RAI_ModelRunCtxOutputTensor(op->mctx, outputNumber);
580-
tensor = tensor ? RAI_TensorGetShallowCopy(tensor) : NULL;
581587
if (tensor)
582588
_StoreTensorInKeySpace(ctx, tensor, op->outkeys[outputNumber], false);
583589
}
@@ -587,7 +593,6 @@ static void _ScriptSingleOp_PersistTensors(RedisModuleCtx *ctx, RAI_DagOp *op) {
587593
const size_t noutputs = RAI_ScriptRunCtxNumOutputs(op->sctx);
588594
for (size_t outputNumber = 0; outputNumber < noutputs; outputNumber++) {
589595
RAI_Tensor *tensor = RAI_ScriptRunCtxOutputTensor(op->sctx, outputNumber);
590-
tensor = tensor ? RAI_TensorGetShallowCopy(tensor) : NULL;
591596
if (tensor)
592597
_StoreTensorInKeySpace(ctx, tensor, op->outkeys[outputNumber], false);
593598
}
@@ -600,7 +605,6 @@ int RedisAI_DagRun_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc
600605

601606
if (RAI_GetErrorCode(rinfo->err) == RAI_EDAGRUN) {
602607
RedisModule_ReplyWithError(ctx, RAI_GetErrorOneLine(rinfo->err));
603-
RAI_FreeRunInfo(rinfo);
604608
return REDISMODULE_ERR;
605609
}
606610
int dag_error = 0;
@@ -610,7 +614,6 @@ int RedisAI_DagRun_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc
610614

611615
if (*rinfo->timedOut) {
612616
RedisModule_ReplyWithSimpleString(ctx, "TIMEDOUT");
613-
RAI_FreeRunInfo(rinfo);
614617
return REDISMODULE_OK;
615618
}
616619

@@ -701,7 +704,6 @@ int RedisAI_DagRun_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc
701704
if (rinfo->single_op_dag == 0) {
702705
RedisModule_ReplySetArrayLength(ctx, rinfo->dagReplyLength);
703706
}
704-
RAI_FreeRunInfo(rinfo);
705707
return REDISMODULE_ERR;
706708
}
707709

@@ -718,7 +720,6 @@ int RedisAI_DagRun_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc
718720
}
719721
}
720722

721-
RAI_FreeRunInfo(rinfo);
722723
return REDISMODULE_OK;
723724
}
724725

@@ -746,11 +747,7 @@ int RedisAI_DagRun_IsKeysPositionRequest_ReportKeys(RedisModuleCtx *ctx, RedisMo
746747
return REDISMODULE_OK;
747748
}
748749

749-
void RunInfo_FreeData(RedisModuleCtx *ctx, void *rinfo) {}
750-
751-
void RedisAI_Disconnected(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc) {
752-
RedisModule_Log(ctx, "warning", "Blocked client %p disconnected!", (void *)bc);
753-
}
750+
void RunInfo_FreeData(RedisModuleCtx *ctx, void *rinfo) { RAI_FreeRunInfo(rinfo); }
754751

755752
// Add Shallow copies of the DAG run info to the devices' queues.
756753
// Return REDISMODULE_OK in case of success, REDISMODULE_ERR if (at least) one insert op had

src/DAG/dag.h

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,25 @@
1919
* successfully. Since rinfo carries information on what queue
2020
* it has been placed in, there's no need to pass the device identifier.
2121
* @param rinfo context in which RedisAI blocking commands operate.
22-
* @return nonzero if all ops are complete for device, 0 otherwise
22+
* @return true if all ops are complete for device, 0 otherwise
2323
*/
24-
int RedisAI_DagDeviceComplete(RedisAI_RunInfo *rinfo);
24+
bool RedisAI_DagDeviceComplete(RedisAI_RunInfo *rinfo);
2525

2626
/**
2727
* Get whether all DAG ops have been executed successfully irrespective
2828
* of the device, i.e. if the DAG has been completely executed.
2929
* @param rinfo context in which RedisAI blocking commands operate.
30-
* @return nonzero of all ops in DAG are complete, 0 otherwise
30+
* @return true of all ops in DAG are complete, 0 otherwise
3131
*/
32-
int RedisAI_DagComplete(RedisAI_RunInfo *rinfo);
32+
bool RedisAI_DagComplete(RedisAI_RunInfo *rinfo);
33+
34+
/**
35+
* @brief Get an indication if an error happend during the dag run.
36+
*
37+
* @param rinfo context in which RedisAI blocking commands operate.
38+
* @return true if there was an error
39+
*/
40+
bool RedisAI_DagError(RedisAI_RunInfo *rinfo);
3341

3442
/**
3543
* Get current DAG op for the given device. An op is current if it's
@@ -50,7 +58,8 @@ RAI_DagOp *RedisAI_DagCurrentOp(RedisAI_RunInfo *rinfo);
5058
* a MODELRUN and is BATCHSIZE greater than zero
5159
* @return
5260
*/
53-
void RedisAI_DagCurrentOpInfo(RedisAI_RunInfo *rinfo, int *currentOpReady, int *currentOpBatchable);
61+
void RedisAI_DagCurrentOpInfo(RedisAI_RunInfo *rinfo, bool *currentOpReady,
62+
bool *currentOpBatchable);
5463

5564
/**
5665
* Get batching information about a DAG op.
@@ -142,9 +151,4 @@ int DAG_InsertDAGToQueue(RedisAI_RunInfo *rinfo);
142151
*/
143152
void RunInfo_FreeData(RedisModuleCtx *ctx, void *rinfo);
144153

145-
/**
146-
* @brief A callback to send to BlockClient.
147-
*/
148-
void RedisAI_Disconnected(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc);
149-
150154
#endif /* SRC_DAG_H_ */

src/backends.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ RedisModuleString *RAI_GetBackendsPath(RedisModuleCtx *ctx) {
3939
RedisModuleString *module_path = RAI_GetModulePath(ctx);
4040
backends_path = RedisModule_CreateStringPrintf(ctx, "%s/backends",
4141
RedisModule_StringPtrLen(module_path, NULL));
42+
RedisModule_FreeString(ctx, module_path);
4243
}
4344

4445
return backends_path;
@@ -422,21 +423,26 @@ int RAI_LoadBackend(RedisModuleCtx *ctx, int backend, const char *path) {
422423
RedisModuleString *backends_path = RAI_GetBackendsPath(ctx);
423424
fullpath = RedisModule_CreateStringPrintf(
424425
ctx, "%s/%s", RedisModule_StringPtrLen(backends_path, NULL), path);
426+
RedisModule_FreeString(ctx, backends_path);
425427
}
426428

427429
int ret;
428430
switch (backend) {
429431
case RAI_BACKEND_TENSORFLOW:
430-
return RAI_LoadBackend_TensorFlow(ctx, RedisModule_StringPtrLen(fullpath, NULL));
432+
ret = RAI_LoadBackend_TensorFlow(ctx, RedisModule_StringPtrLen(fullpath, NULL));
433+
break;
431434
case RAI_BACKEND_TFLITE:
432-
return RAI_LoadBackend_TFLite(ctx, RedisModule_StringPtrLen(fullpath, NULL));
435+
ret = RAI_LoadBackend_TFLite(ctx, RedisModule_StringPtrLen(fullpath, NULL));
436+
break;
433437
case RAI_BACKEND_TORCH:
434-
return RAI_LoadBackend_Torch(ctx, RedisModule_StringPtrLen(fullpath, NULL));
438+
ret = RAI_LoadBackend_Torch(ctx, RedisModule_StringPtrLen(fullpath, NULL));
439+
break;
435440
case RAI_BACKEND_ONNXRUNTIME:
436-
return RAI_LoadBackend_ONNXRuntime(ctx, RedisModule_StringPtrLen(fullpath, NULL));
441+
ret = RAI_LoadBackend_ONNXRuntime(ctx, RedisModule_StringPtrLen(fullpath, NULL));
442+
break;
437443
}
438-
439-
return REDISMODULE_ERR;
444+
RedisModule_FreeString(ctx, fullpath);
445+
return ret;
440446
}
441447

442448
int RAI_LoadDefaultBackend(RedisModuleCtx *ctx, int backend) {

0 commit comments

Comments
 (0)