diff --git a/get_deps.sh b/get_deps.sh
index 36a9c3854..4a70f536e 100755
--- a/get_deps.sh
+++ b/get_deps.sh
@@ -70,7 +70,7 @@ MKL=mkl
 ONNXRUNTIME=onnxruntime
 
 ######################################################################################## DLPACK
-DLPACK_VERSION="v0.3"
+DLPACK_VERSION="v0.4"
 if [[ $WITH_DLPACK != 0 ]]; then
 	[[ $FORCE == 1 ]] && rm -rf $DLPACK
 
diff --git a/src/backends/onnxruntime.c b/src/backends/onnxruntime.c
index 4ad5af962..51a13ca12 100644
--- a/src/backends/onnxruntime.c
+++ b/src/backends/onnxruntime.c
@@ -172,7 +172,7 @@ RAI_Tensor *RAI_TensorCreateFromOrtValue(OrtValue *v, size_t batch_offset, long
 
     ret = RAI_TensorNew();
 
-    DLContext ctx = (DLContext){.device_type = kDLCPU, .device_id = 0};
+    DLDevice device = (DLDevice){.device_type = kDLCPU, .device_id = 0};
 
     OrtTensorTypeAndShapeInfo *info;
     status = ort->GetTensorTypeAndShape(v, &info);
@@ -243,7 +243,7 @@ RAI_Tensor *RAI_TensorCreateFromOrtValue(OrtValue *v, size_t batch_offset, long
         // TODO: use manager_ctx to ensure ORT tensor doesn't get deallocated
         // This applies to outputs
 
-        ret->tensor = (DLManagedTensor){.dl_tensor = (DLTensor){.ctx = ctx,
+        ret->tensor = (DLManagedTensor){.dl_tensor = (DLTensor){.device = device,
 #ifdef RAI_COPY_RUN_OUTPUT
                                                                 .data = data,
 #else
diff --git a/src/backends/tensorflow.c b/src/backends/tensorflow.c
index cb42e6d25..72c4a7368 100644
--- a/src/backends/tensorflow.c
+++ b/src/backends/tensorflow.c
@@ -90,7 +90,7 @@ RAI_Tensor *RAI_TensorCreateFromTFTensor(TF_Tensor *tensor, size_t batch_offset,
                                          long long batch_size) {
     RAI_Tensor *ret = RAI_TensorNew();
 
-    DLContext ctx = (DLContext){.device_type = kDLCPU, .device_id = 0};
+    DLDevice device = (DLDevice){.device_type = kDLCPU, .device_id = 0};
 
     const size_t ndims = TF_NumDims(tensor);
 
@@ -129,7 +129,7 @@ RAI_Tensor *RAI_TensorCreateFromTFTensor(TF_Tensor *tensor, size_t batch_offset,
     // This applies to outputs
 
     ret->tensor = (DLManagedTensor){
-        .dl_tensor = (DLTensor){.ctx = ctx,
+        .dl_tensor = (DLTensor){.device = device,
 #ifdef RAI_COPY_RUN_OUTPUT
                                 .data = data,
 #else
diff --git a/src/libtflite_c/tflite_c.cpp b/src/libtflite_c/tflite_c.cpp
index 167beb508..ee0e6ffb4 100644
--- a/src/libtflite_c/tflite_c.cpp
+++ b/src/libtflite_c/tflite_c.cpp
@@ -48,16 +48,11 @@ static DLDataType getDLDataType(const TfLiteTensor *tensor) {
     return dtype;
 }
 
-static DLContext getDLContext(const TfLiteTensor *tensor, const int64_t &device_id) {
-    DLContext ctx;
-    ctx.device_id = device_id;
-    // if (tensor->.is_cuda()) {
-    //   ctx.device_type = DLDeviceType::kDLGPU;
-    // } else {
-    //   ctx.device_type = DLDeviceType::kDLCPU;
-    // }
-    ctx.device_type = DLDeviceType::kDLCPU;
-    return ctx;
+static DLDevice getDLDevice(const TfLiteTensor *tensor, const int64_t &device_id) {
+    DLDevice device;
+    device.device_id = device_id;
+    device.device_type = DLDeviceType::kDLCPU;
+    return device;
 }
 
 #if 0
@@ -139,10 +134,10 @@ DLManagedTensor *toManagedDLPack(std::shared_ptr<tflite::Interpreter> interprete
     DLDataType dtype = getDLDataType(tensor);
 
     int64_t device_id = 0;
-    DLContext ctx = getDLContext(tensor, device_id);
+    DLDevice device = getDLDevice(tensor, device_id);
 
     DLTensor dl_tensor = (DLTensor){.data = new uint8_t[tensor->bytes],
-                                    .ctx = ctx,
+                                    .device = device,
                                     .ndim = output_dims->size,
                                     .dtype = dtype,
                                     .shape = new int64_t[output_dims->size],
diff --git a/src/libtorch_c/torch_c.cpp b/src/libtorch_c/torch_c.cpp
index 59deeb3a5..e88eb2d6b 100644
--- a/src/libtorch_c/torch_c.cpp
+++ b/src/libtorch_c/torch_c.cpp
@@ -64,15 +64,15 @@ static DLDataType getDLDataType(const at::Tensor &t) {
     return dtype;
 }
 
-static DLContext getDLContext(const at::Tensor &tensor, const int64_t &device_id) {
-    DLContext ctx;
-    ctx.device_id = device_id;
+static DLDevice getDLDevice(const at::Tensor &tensor, const int64_t &device_id) {
+    DLDevice device;
+    device.device_id = device_id;
     if (tensor.is_cuda()) {
-        ctx.device_type = DLDeviceType::kDLGPU;
+        device.device_type = DLDeviceType::kDLGPU;
     } else {
-        ctx.device_type = DLDeviceType::kDLCPU;
+        device.device_type = DLDeviceType::kDLCPU;
     }
-    return ctx;
+    return device;
 }
 
 static at::DeviceType getATenDeviceType(DLDeviceType device_type) {
@@ -145,7 +145,7 @@ at::ScalarType toScalarType(const DLDataType &dtype) {
 }
 
 torch::Tensor fromDLPack(const DLTensor *src) {
-    at::DeviceType device_type = getATenDeviceType(src->ctx.device_type);
+    at::DeviceType device_type = getATenDeviceType(src->device.device_type);
     at::ScalarType stype = toScalarType(src->dtype);
     // torch::Device device(device_type, src->ctx.device_id);
     torch::Device device(device_type, -1);
@@ -176,7 +176,7 @@ DLManagedTensor *toManagedDLPack(const torch::Tensor &src_) {
     if (src.is_cuda()) {
         device_id = src.get_device();
     }
-    atDLMTensor->tensor.dl_tensor.ctx = getDLContext(src, device_id);
+    atDLMTensor->tensor.dl_tensor.device = getDLDevice(src, device_id);
     atDLMTensor->tensor.dl_tensor.ndim = src.dim();
     atDLMTensor->tensor.dl_tensor.dtype = getDLDataType(src);
     atDLMTensor->tensor.dl_tensor.shape = const_cast<int64_t *>(src.sizes().data());
diff --git a/src/serialization/RDB/decoder/current/v1/decode_v1.c b/src/serialization/RDB/decoder/current/v1/decode_v1.c
index bfaf3e59d..0947d6aca 100644
--- a/src/serialization/RDB/decoder/current/v1/decode_v1.c
+++ b/src/serialization/RDB/decoder/current/v1/decode_v1.c
@@ -12,15 +12,15 @@ void *RAI_RDBLoadTensor_v1(RedisModuleIO *io) {
     int64_t *shape = NULL;
     int64_t *strides = NULL;
 
-    DLContext ctx;
-    ctx.device_type = RedisModule_LoadUnsigned(io);
-    ctx.device_id = RedisModule_LoadUnsigned(io);
+    DLDevice device;
+    device.device_type = RedisModule_LoadUnsigned(io);
+    device.device_id = RedisModule_LoadUnsigned(io);
     if (RedisModule_IsIOError(io))
         goto cleanup;
 
     // For now we only support CPU tensors (except during model and script run)
-    assert(ctx.device_type == kDLCPU);
-    assert(ctx.device_id == 0);
+    assert(device.device_type == kDLCPU);
+    assert(device.device_id == 0);
 
     DLDataType dtype;
     dtype.bits = RedisModule_LoadUnsigned(io);
@@ -49,7 +49,7 @@ void *RAI_RDBLoadTensor_v1(RedisModuleIO *io) {
         goto cleanup;
 
     RAI_Tensor *ret = RAI_TensorNew();
-    ret->tensor = (DLManagedTensor){.dl_tensor = (DLTensor){.ctx = ctx,
+    ret->tensor = (DLManagedTensor){.dl_tensor = (DLTensor){.device = device,
                                                             .data = data,
                                                             .ndim = ndims,
                                                             .dtype = dtype,
diff --git a/src/serialization/RDB/decoder/previous/v0/decode_v0.c b/src/serialization/RDB/decoder/previous/v0/decode_v0.c
index 8dfed030b..810438771 100644
--- a/src/serialization/RDB/decoder/previous/v0/decode_v0.c
+++ b/src/serialization/RDB/decoder/previous/v0/decode_v0.c
@@ -5,15 +5,15 @@ void *RAI_RDBLoadTensor_v0(RedisModuleIO *io) {
     int64_t *shape = NULL;
     int64_t *strides = NULL;
 
-    DLContext ctx;
-    ctx.device_type = RedisModule_LoadUnsigned(io);
-    ctx.device_id = RedisModule_LoadUnsigned(io);
+    DLDevice device;
+    device.device_type = RedisModule_LoadUnsigned(io);
+    device.device_id = RedisModule_LoadUnsigned(io);
     if (RedisModule_IsIOError(io))
         goto cleanup;
 
     // For now we only support CPU tensors (except during model and script run)
-    assert(ctx.device_type == kDLCPU);
-    assert(ctx.device_id == 0);
+    assert(device.device_type == kDLCPU);
+    assert(device.device_id == 0);
 
     DLDataType dtype;
     dtype.bits = RedisModule_LoadUnsigned(io);
@@ -42,7 +42,7 @@ void *RAI_RDBLoadTensor_v0(RedisModuleIO *io) {
         goto cleanup;
 
     RAI_Tensor *ret = RAI_TensorNew();
-    ret->tensor = (DLManagedTensor){.dl_tensor = (DLTensor){.ctx = ctx,
+    ret->tensor = (DLManagedTensor){.dl_tensor = (DLTensor){.device = device,
                                                             .data = data,
                                                             .ndim = ndims,
                                                             .dtype = dtype,
diff --git a/src/serialization/RDB/encoder/v1/encode_v1.c b/src/serialization/RDB/encoder/v1/encode_v1.c
index 6e896a593..4a34e551b 100644
--- a/src/serialization/RDB/encoder/v1/encode_v1.c
+++ b/src/serialization/RDB/encoder/v1/encode_v1.c
@@ -5,8 +5,8 @@ void RAI_RDBSaveTensor_v1(RedisModuleIO *io, void *value) {
 
     size_t ndim = tensor->tensor.dl_tensor.ndim;
 
-    RedisModule_SaveUnsigned(io, tensor->tensor.dl_tensor.ctx.device_type);
-    RedisModule_SaveUnsigned(io, tensor->tensor.dl_tensor.ctx.device_id);
+    RedisModule_SaveUnsigned(io, tensor->tensor.dl_tensor.device.device_type);
+    RedisModule_SaveUnsigned(io, tensor->tensor.dl_tensor.device.device_id);
     RedisModule_SaveUnsigned(io, tensor->tensor.dl_tensor.dtype.bits);
     RedisModule_SaveUnsigned(io, tensor->tensor.dl_tensor.dtype.code);
     RedisModule_SaveUnsigned(io, tensor->tensor.dl_tensor.dtype.lanes);
diff --git a/src/tensor.c b/src/tensor.c
index 079614abc..49a02f35c 100644
--- a/src/tensor.c
+++ b/src/tensor.c
@@ -123,7 +123,7 @@ RAI_Tensor *RAI_TensorCreateWithDLDataType(DLDataType dtype, long long *dims, in
         strides[i] *= strides[i + 1] * shape[i + 1];
     }
 
-    DLContext ctx = (DLContext){.device_type = kDLCPU, .device_id = 0};
+    DLDevice device = (DLDevice){.device_type = kDLCPU, .device_id = 0};
     void *data = NULL;
     switch (tensorAllocMode) {
     case TENSORALLOC_ALLOC:
@@ -140,7 +140,7 @@ RAI_Tensor *RAI_TensorCreateWithDLDataType(DLDataType dtype, long long *dims, in
         break;
     }
 
-    ret->tensor = (DLManagedTensor){.dl_tensor = (DLTensor){.ctx = ctx,
+    ret->tensor = (DLManagedTensor){.dl_tensor = (DLTensor){.device = device,
                                                             .data = data,
                                                             .ndim = ndims,
                                                             .dtype = dtype,
@@ -185,7 +185,7 @@ RAI_Tensor *_TensorCreateWithDLDataTypeAndRString(DLDataType dtype, size_t dtype
         strides[i] *= strides[i + 1] * shape[i + 1];
     }
 
-    DLContext ctx = (DLContext){.device_type = kDLCPU, .device_id = 0};
+    DLDevice device = (DLDevice){.device_type = kDLCPU, .device_id = 0};
     size_t nbytes = len * dtypeSize;
 
     size_t blob_len;
@@ -201,7 +201,7 @@ RAI_Tensor *_TensorCreateWithDLDataTypeAndRString(DLDataType dtype, size_t dtype
     RAI_HoldString(NULL, rstr);
 
     RAI_Tensor *ret = RAI_TensorNew();
-    ret->tensor = (DLManagedTensor){.dl_tensor = (DLTensor){.ctx = ctx,
+    ret->tensor = (DLManagedTensor){.dl_tensor = (DLTensor){.device = device,
                                                             .data = data,
                                                             .ndim = ndims,
                                                             .dtype = dtype,
@@ -342,7 +342,7 @@ RAI_Tensor *RAI_TensorCreateFromDLTensor(DLManagedTensor *dl_tensor) {
     RAI_Tensor *ret = RAI_TensorNew();
 
     ret->tensor =
-        (DLManagedTensor){.dl_tensor = (DLTensor){.ctx = dl_tensor->dl_tensor.ctx,
+        (DLManagedTensor){.dl_tensor = (DLTensor){.device = dl_tensor->dl_tensor.device,
                                                   .data = dl_tensor->dl_tensor.data,
                                                   .ndim = dl_tensor->dl_tensor.ndim,
                                                   .dtype = dl_tensor->dl_tensor.dtype,