diff --git a/.gitignore b/.gitignore
index 882c8c6cd..1791006fd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,7 +16,7 @@ venv*/
 *.tar.gz
 /VARIANT
 
-# Docs API reference 
+# Docs API reference
 docs/api_reference.md
 
 ### Cmake auto tools
@@ -137,4 +137,8 @@ dkms.conf
 .idea_modules/
 
 # docs site
-site/
\ No newline at end of file
+site/
+
+# docker remnants
+*.iid
+*.cid
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1926d76aa..9bb420537 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -138,14 +138,25 @@ ENDIF()
 #----------------------------------------------------------------------------------------------
 
 IF(BUILD_TFLITE)
-    FIND_LIBRARY(TFLITE_LIBRARIES_1 NAMES tensorflow-lite
+    FIND_LIBRARY(TFLITE_LIBRARIES_1 NAMES tensorflowlite
             PATHS ${depsAbs}/libtensorflow-lite/lib)
-    FIND_LIBRARY(TFLITE_LIBRARIES_2 NAMES benchmark-lib.a
+    IF (${DEVICE} STREQUAL "gpu")
+        FIND_LIBRARY(TFLITE_LIBRARIES_2 NAMES tensorflowlite_gpu_delegate
             PATHS ${depsAbs}/libtensorflow-lite/lib)
-    SET(TFLITE_LIBRARIES ${TFLITE_LIBRARIES_1} ${TFLITE_LIBRARIES_2})
-    MESSAGE(STATUS "Found TensorFlow Lite Libraries: \"${TFLITE_LIBRARIES}\")")
+        IF (NOT APPLE)
+	    FIND_LIBRARY(OPENGL_LIBRARIES NAMES GL
+		PATHS /usr/lib/${MACH}-linux-gnu)
+	    FIND_LIBRARY(EGL_LIBRARIES NAMES EGL
+		PATHS /usr/lib/${MACH}-linux-gnu)
+        ELSE()
+		MESSAGE(FATAL_ERROR "Build for TensorFlow Lite GPU backend on Apple machines.")
+        ENDIF()
+    ENDIF()
+    SET(TFLITE_LIBRARIES ${TFLITE_LIBRARIES_1} ${TFLITE_LIBRARIES_2} ${OPENGL_LIBRARIES} ${EGL_LIBRARIES})
     IF (NOT TFLITE_LIBRARIES)
         MESSAGE(FATAL_ERROR "Could not find TensorFlow Lite")
+    ELSE()
+        MESSAGE(STATUS "Found TensorFlow Lite Libraries: \"${TFLITE_LIBRARIES}\")")
     ENDIF()
     IF (${DEVICE} STREQUAL "gpu")
         ADD_DEFINITIONS(-DRAI_TFLITE_USE_CUDA)
@@ -202,6 +213,7 @@ ENDIF()
 
 ADD_SUBDIRECTORY(src)
 ADD_SUBDIRECTORY(tests/module)
+
 ADD_LIBRARY(redisai SHARED $<TARGET_OBJECTS:redisai_obj>)
 
 TARGET_LINK_LIBRARIES(redisai ${CMAKE_DL_LIBS})
@@ -322,4 +334,4 @@ if(PACKAGE_UNIT_TESTS)
     enable_testing()
     include(GoogleTest)
     add_subdirectory(tests/unit)
-endif()
\ No newline at end of file
+endif()
diff --git a/README.md b/README.md
index d82a4b36e..3bc27c163 100644
--- a/README.md
+++ b/README.md
@@ -69,7 +69,7 @@ redis-cli
 
 ## Building
 
-You should obtain the module's source code and submodule using git like so: 
+You should obtain the module's source code and submodule using git like so:
 
 ```sh
 git clone --recursive https://github.com/RedisAI/RedisAI
@@ -96,6 +96,8 @@ ALL=1 make -C opt clean build
 
 Note: in order to use the PyTorch backend on Linux, at least `gcc 4.9.2` is required.
 
+[See this document](docs/developer-backends.md) for building AI backends.
+
 ### Running the server
 
 You will need a redis-server version 5.0.7 or greater. This should be
diff --git a/docs/developer-backends.md b/docs/developer-backends.md
new file mode 100644
index 000000000..7388b9f5d
--- /dev/null
+++ b/docs/developer-backends.md
@@ -0,0 +1,19 @@
+# RedisAI dependency builds
+
+Platform dependency build systems are located in this folder. Dependencies are to be pre-built, and published to S3. To do so, they rely *(ultimately)* on running **make build publish** in a given directory. The goal is for this to be true on all target platforms (x86_64, arm64), though at this time it's only true for: tensorflowlite.
+
+## Background
+
+Items are built in docker images, for the target platform whenever possible. If needed (i.e a future planned MacOS build) items are built on the dedicated hardware. There are design wrinkles to each build. Though the ideal is to build a base docker (see the [automata repository](https://github.com/redislabsmodules/automata). That base docker is then used as the base build system injector for the dependency itself.  A docker image is built from the base docker, accepting externalized variables such as the dependency version. Compilation of external requirements takes place in a build file, mounted inside the docker image.
+
+Ideally a per-platform Docker file (i.e Dockerfile.x64, Dockerfile.arm) will exist in the underlying folder, assuming building within a docker is tenable.
+
+--------------
+
+## tensorflowlite (tflite)
+
+### arm64
+
+The arm build of tflite currently occurs on **jetson arm devices** only, as portions of the root filesystem of the Jetson device are mounted during the build. Given the symlinks that exist on the device between things in /usr/lib to /etc/alternatives, and in turn to /usr/local/cuda, which is itself a symlink to /usr/local/cuda-10.2, this is the current philosophy.
+
+WThe *build_arm* target in the [Makefile](Makefile) describes the process in detail. The code to build the base docker build image can be found in the [automata repository](https://github.com/RedisLabsModules/automata/tree/master/dockers/buildsystem/bazelbuilder). The *bazelbuilder* image is published to the [redisfab dockerhub repositories](https://hub.docker.com/r/redisfab/).
diff --git a/docs/developer.md b/docs/developer.md
index feb85e753..aff8e2e9b 100644
--- a/docs/developer.md
+++ b/docs/developer.md
@@ -106,7 +106,7 @@ Within the `backends` folder you will find the implementations code required to
 * **ONNX**: `onnxruntime.h` and `onnxruntime.c` exporting the functions to to register the ONNXRuntime backend
 
 ## Building and Testing
-You can compile and build the module from its source code - refer to the [Building and Running section](quickstart.md#building-and-running) of the Quickstart page for instructions on how to do that.
+You can compile and build the module from its source code - refer to the [Building and Running section](quickstart.md#building-and-running) of the Quickstart page for instructions on how to do that, or view the  detailed instructions on [building backends](developer-backends.md).
 
 **Running Tests**
 
diff --git a/get_deps.sh b/get_deps.sh
index a5d0f07e1..64d8c54b1 100755
--- a/get_deps.sh
+++ b/get_deps.sh
@@ -145,7 +145,7 @@ fi # WITH_TF
 
 ################################################################################# LIBTFLITE
 
-TFLITE_VERSION="2.0.0"
+TFLITE_VERSION="2.4.1"
 
 if [[ $WITH_TFLITE != 0 ]]; then
 	[[ $FORCE == 1 ]] && rm -rf $LIBTFLITE
@@ -156,18 +156,16 @@ if [[ $WITH_TFLITE != 0 ]]; then
 		LIBTF_URL_BASE=https://s3.amazonaws.com/redismodules/tensorflow
 		if [[ $OS == linux ]]; then
 			TFLITE_OS="linux"
-			# if [[ $GPU != 1 ]]; then
-			# 	TFLITE_BUILD="cpu"
-			# else
-			# 	TFLITE_BUILD="gpu"
-			# fi
+			 if [[ $GPU != 1 ]]; then
+			 	TFLITE_PLATFORM="cpu"
+			 else
+			 	TFLITE_PLATFORM="cuda"
+			 fi
 
 			if [[ $ARCH == x64 ]]; then
 				TFLITE_ARCH=x86_64
 			elif [[ $ARCH == arm64v8 ]]; then
 				TFLITE_ARCH=arm64
-			elif [[ $ARCH == arm32v7 ]]; then
-				TFLITE_ARCH=arm
 			fi
 		elif [[ $OS == macos ]]; then
 			TFLITE_OS=darwin
@@ -175,7 +173,7 @@ if [[ $WITH_TFLITE != 0 ]]; then
 			TFLITE_ARCH=x86_64
 		fi
 
-		LIBTFLITE_ARCHIVE=libtensorflowlite-${TFLITE_OS}-${TFLITE_ARCH}-${TFLITE_VERSION}.tar.gz
+		LIBTFLITE_ARCHIVE=libtensorflowlite-${TFLITE_OS}-${TFLITE_PLATFORM}-${TFLITE_ARCH}-${TFLITE_VERSION}.tar.gz
 
 		[[ ! -f $LIBTFLITE_ARCHIVE || $FORCE == 1 ]] && wget -q $LIBTF_URL_BASE/$LIBTFLITE_ARCHIVE
 
diff --git a/opt/Makefile b/opt/Makefile
index 53f0e42d7..fc2436c51 100755
--- a/opt/Makefile
+++ b/opt/Makefile
@@ -135,7 +135,8 @@ CMAKE_FLAGS += \
 	-DUSE_COVERAGE=$(USE_COVERAGE) \
 	-DUSE_PROFILE=$(USE_PROFILE) \
 	-DREDISAI_GIT_SHA=\"$(GIT_SHA)\" \
-	-DDEVICE=$(DEVICE)
+	-DDEVICE=$(DEVICE) \
+	-DMACH=$(shell uname -m)
 
 ifeq ($(WITH_TF),0)
 CMAKE_FLAGS += -DBUILD_TF=off
diff --git a/opt/build/tflite/Dockerfile.x64 b/opt/build/tflite/Dockerfile.x64
old mode 100755
new mode 100644
index 53a474c69..89ff73a10
--- a/opt/build/tflite/Dockerfile.x64
+++ b/opt/build/tflite/Dockerfile.x64
@@ -1,29 +1,34 @@
+ARG BAZEL_VERSION=3.1.0
+ARG TFLITE_ARCH=x86_64
 
-ARG OS=debian:buster
+ARG OS=redisfab/ubuntu1804-${TFLITE_ARCH}-bazel${BAZEL_VERSION}
 
-ARG FTLITE_VER=2.0.0
+# cuda | cpu
+ARG REDISAI_PLATFORM=cuda
+
+ARG TFLITE_VERSION=2.4.0
 
 #----------------------------------------------------------------------------------------------
 FROM ${OS}
 
 ARG FTLITE_VER
-
-WORKDIR /build
-
-RUN set -e ;\
-	apt-get -qq update ;\
-	apt-get -q install -y git ca-certificates curl wget unzip python3 ;\
-	apt-get -q install -y git build-essential zlib1g-dev
-
-RUN git clone --single-branch --branch v${FTLITE_VER} --depth 1 https://github.com/tensorflow/tensorflow.git
-
-ADD ./opt/build/tflite/build /build/
-ADD ./opt/readies/ /build/readies/
-ADD ./opt/build/tflite/collect.py /build/
-
-RUN set -e ;\
-	cd tensorflow/tensorflow/lite/tools/make ;\
-	./download_dependencies.sh ;\
-	./build_lib.sh
-
-RUN ./collect.py --version ${FTLITE_VER} --dest /build/dest
+ARG TFLITE_VERSION
+ARG TFLITE_ARCH
+ARG REDISAI_PLATFORM
+
+ADD ./opt/build/tflite /tflite
+
+RUN apt-get -qq update && apt-get install -yqq python3
+RUN apt-get install -qqy git \
+            unzip \
+            wget \
+            curl \
+            build-essential \
+            zlib1g-dev \
+            libegl1-mesa-dev \
+            libgles2-mesa-dev \
+            python3-distutils \
+            python3-numpy
+RUN ln -s /usr/bin/python3 /usr/bin/python
+WORKDIR /tflite
+RUN bash build.sh ${TFLITE_ARCH} ${TFLITE_VERSION} ${REDISAI_PLATFORM}
diff --git a/opt/build/tflite/Makefile b/opt/build/tflite/Makefile
index 8962177b7..704906916 100755
--- a/opt/build/tflite/Makefile
+++ b/opt/build/tflite/Makefile
@@ -1,19 +1,34 @@
-
 ROOT=../../..
 
-export VERSION ?= 2.0.0
-OSNICK ?= buster
+export VERSION ?= 2.4.0
 
 #----------------------------------------------------------------------------------------------
 
 S3_URL=redismodules/tensorflow
 
 OS:=$(shell $(ROOT)/opt/readies/bin/platform --os)
+ARCH:=$(shell $(ROOT)/opt/readies/bin/platform --arch)
+
+# cuda | cpu
+REDISAI_PLATFORM=cuda
 
+# non-arm linux
 ifeq ($(OS),linux)
+ifeq ($(ARCH),x64)
 OS.publish:=$(OS)
-ARCH.publish:=$(ARCH)
+ARCH.publish:=x86_64
 
+BUILD_TARGETS:=build_x64
+PUBLISH_TARGETS:=publish_x64
+else ifeq ($(ARCH),arm64v8)
+BUILD_TARGETS:=build_arm
+PUBLISH_TARGETS:=publish_arm
+else ifeq ($(ARCH),)
+BUILD_TARGETS:=
+PUBLISH_TARGETS:=
+endif
+
+# mac
 else ifeq ($(OS),macos)
 OS.publish:=darwin
 ARCH.publish:=x86_64
@@ -21,119 +36,55 @@ endif
 
 STEM=libtensorflowlite-$(OS.publish)
 
-DOCKER_OS.bionic=ubuntu:bionic
-DOCKER_OS.stretch=debian:stretch-slim
-DOCKER_OS.buster=debian:buster-slim
-DOCKER_OS=$(DOCKER_OS.$(OSNICK))
- 
-#----------------------------------------------------------------------------------------------
-
-ifeq ($(OS),linux)
-
-define targets # (1=OP, 2=op)
-$(1)_TARGETS :=
-$(1)_TARGETS += $(if $(findstring $(X64),1),$(2)_x64)
-$(1)_TARGETS += $(if $(findstring $(ARM7),1),$(2)_arm32v7)
-$(1)_TARGETS += $(if $(findstring $(ARM8),1),$(2)_arm64v8)
-
-$(1)_TARGETS += $$(if $$(strip $$($(1)_TARGETS)),,$(2)_x64 $(2)_arm32v7 $(2)_arm64v8)
-endef
-
-else ifeq ($(OS),macos)
-
-define targets # (1=OP, 2=op)
-$(1)_TARGETS := $(2)_x64
-endef
-
-endif
-
-$(eval $(call targets,BUILD,build))
-$(eval $(call targets,PUBLISH,publish))
-
-#----------------------------------------------------------------------------------------------
-
-define build_x64 # (1=arch, 2=tar-arch)
-IID_$(1)=$(1)_$(VERSION).iid
-CID_$(1)=$(1)_$(VERSION).cid
+# x86 linux ------------------------------------------------------------------------------------------
 
+#IID_X64=x86_64_$(VERSION).iid
+#CID_X64=x86_64_$(VERSION).cid
+#
 build_x64:
-	@docker build --iidfile $$(IID_$(1)) -t redisfab/$(STEM)-$(1):$(VERSION) -f Dockerfile.x64 \
-		--build-arg OS=$(DOCKER_OS) $(ROOT)
-	@docker create --cidfile $$(CID_$(1)) `cat $$(IID_$(1))`
-	@docker cp `cat $$(CID_$(1))`:/build/dest/$(STEM)-$(2)-$(VERSION).tar.gz .
-
+	-@rm *.iid *.cid
+	@docker build --iidfile x64.iid -t redisfab/$(STEM)-x86_64:$(VERSION) -f Dockerfile.x64 \
+		--build-arg TFLITE_VERSION=$(VERSION) --build-arg REDISAI_PLATFORM=$(REDISAI_PLATFORM) $(ROOT)
+	@docker create --cidfile x64.cid `cat x64.iid`
+	@docker cp `cat x64.cid`:/tflite/tensorflow-$(VERSION)/tmp/libtensorflowlite-linux-$(REDISAI_PLATFORM)-x86_64-$(VERSION).tar.gz .
 .PHONY: build_x64
-endef
-
-define build_arm # (1=arch, 2=tar-arch)
-IID_$(1)=$(1)_$(VERSION).iid
-CID_$(1)=$(1)_$(VERSION).cid
 
-build_$(1):
-	@docker build --iidfile $$(IID_$(1)) -t redisfab/$(STEM)-$(1):$(VERSION) -f Dockerfile.arm \
-		--build-arg ARCH=$(1) $(ROOT)
-	@docker create --cidfile $$(CID_$(1)) `cat $$(IID_$(1))`
-	@docker cp `cat $$(CID_$(1))`:/build/$(STEM)-$(2)-$(VERSION).tar.gz .
+publish_x64:
+	@aws s3 cp $(STEM)-$(REDISAI_PLATFORM)-x86_64-$(VERSION).tar.gz s3://$(S3_URL)/ --acl public-read
+.PHONY: publish_x64
 
-.PHONY: build_$(1)
-endef
+# arm linux ------------------------------------------------------------------------------------------
 
-#----------------------------------------------------------------------------------------------
+ifeq ($(ARCH),arm64v8)
+ARCH.publish:=arm64
+DOCKERBASE=redisfab/ubuntu1804-$(ARCH.publish)-bazel3.1.0-jetson:latest
+endif
 
-define publish_x64 # (1=arch, 2=tar-arch)
-publish_x64:
-	@aws s3 cp $(STEM)-$(2)-$(VERSION).tar.gz s3://$(S3_URL)/ --acl public-read
+build_arm:
+	@cd ../../../ ; docker run -v `pwd`/opt/build/tflite:/tflite -v /etc/alternatives:/etc/alternatives -v /usr/lib:/usr/lib -v /usr/local:/usr/local -v /usr/include:/usr/include ${DOCKERBASE} /tflite/build.sh $(ARCH.publish) $(VERSION) $(REDISAI_PLATFORM)
 
-.PHONY: publish_x64
-endef
+.PHONY: build_arm
 
-define publish_arm # (1=arch, 2=tar-arch)
-publish_$(1):
-	@aws s3 cp $(STEM)-$(2)-$(VERSION).tar.gz s3://$(S3_URL)/ --acl public-read
+publish_arm:
+	@aws s3 cp `pwd`/tensorflow-$(VERSION)/tmp/$(STEM)$(OS)-$(REDISAI_PLATFORM)-$(ARCH.publish)-$(VERSION).tar.gz s3://$(S3_URL)/ --acl public-read
 
-.PHONY: publish_$(1)
-endef
+.PHONY: publish_arm
 
 #----------------------------------------------------------------------------------------------
 
 all: build publish
 
-ifeq ($(OS),linux)
-
-build: $(BUILD_TARGETS)
-
-$(eval $(call build_x64,x64,x86_64))
-$(eval $(call build_arm,arm64v8,arm64))
-$(eval $(call build_arm,arm32v7,arm))
-
 ifneq ($(filter publish,$(MAKECMDGOALS)),)
 ifeq ($(wildcard $(HOME)/.aws/credentials),)
 $(error Please run 'aws configure' and provide it with access credentials)
 endif
 endif
 
-publish: $(PUBLISH_TARGETS)
-
-$(eval $(call publish_x64,x64,x86_64))
-$(eval $(call publish_arm,arm64v8,arm64))
-$(eval $(call publish_arm,arm32v7,arm))
-
-help:
-	@echo "make [build|publish] [X64=1|ARM7=1|ARM8=1]"
-
-else ifeq ($(OS),macos)
-
-build:
-	@VERSION=$(VERSION) ./build.macos
-	@mv macos/dest/$(STEM)-$(ARCH.publish)-$(VERSION).tar.gz .
-
-publish: $(PUBLISH_TARGETS)
-
-$(eval $(call publish_x64,x64,x86_64))
-
 help:
 	@echo "make [build|publish]"
 
-endif # macos
+build: $(BUILD_TARGETS)
+publish: $(PUBLISH_TARGETS)
 
 .PHONY: all build publish help
+
diff --git a/opt/build/tflite/build b/opt/build/tflite/build
deleted file mode 100755
index ba9a8cb71..000000000
--- a/opt/build/tflite/build
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-
-OS=$(python3 readies/bin/platform --os)
-ARCH=$(python3 readies/bin/platform --arch)
-
-cd tensorflow/tensorflow/lite/tools/make
-bash download_dependencies.sh
-if [[ $OS == linux ]]; then
-	TARGET=linux
-	if [[ $ARCH == x64 ]]; then
-		bash build_lib.sh
-	elif [[ $ARCH == arm64v8 ]]; then
-		bash build_aarch64_lib.sh
-	elif [[ $ARCH == arm32v7 ]]; then
-		bash build_rpi_lib.sh
-	fi
-elif [[ $OS == macos ]]; then
-	TARGET=osx
-	bash build_lib.sh
-fi
diff --git a/opt/build/tflite/build.macos b/opt/build/tflite/build.macos
deleted file mode 100755
index 316d38e0f..000000000
--- a/opt/build/tflite/build.macos
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
-cd $HERE
-
-set -e
-mkdir -p macos
-cd macos
-cp ../collect.py .
-ln -s ../../../readies/
-git clone --single-branch --branch v${VERSION} --depth 1 https://github.com/tensorflow/tensorflow.git
-./collect.py --version $VERSION --dest dest
diff --git a/opt/build/tflite/build.sh b/opt/build/tflite/build.sh
new file mode 100755
index 000000000..e55c44178
--- /dev/null
+++ b/opt/build/tflite/build.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+set -e
+set -x
+
+cd `dirname ${BASH_SOURCE[0]}`
+ARCH=$1
+
+VERSION=$2
+if [ "X$VERSION" == "X" ]; then
+    VERSION=2.4.0
+fi
+
+# cuda | cpu
+REDISAI_TARGET=$3
+if [ "X$REDISAI_TARGET" == "X" ]; then
+    REDISAI_TARGET="cuda"
+fi
+
+BASEOS=$4
+if [ "X$BASEOS" == "X" ]; then
+    BASEOS=linux
+fi
+
+if [ ! -f v$VERSION.tar.gz ]; then
+    wget -q https://github.com/tensorflow/tensorflow/archive/v$VERSION.tar.gz
+    tar -xzf v$VERSION.tar.gz
+fi
+cd tensorflow-$VERSION
+
+# fetch dependencies
+./tensorflow/lite/tools/make/download_dependencies.sh
+
+# build tensorflow lite library
+if [ "X$REDISAI_TARGET" == "cuda" ]; then
+    BAZEL_VARIANT="--config=cuda"
+fi
+bazel build --jobs $(nproc) --config=monolithic ${BAZEL_VARIANT} //tensorflow/lite:libtensorflowlite.so
+# bazel build --jobs ${nproc} --config=monolithic --config=cuda //tensorflow/lite:libtensorflowlite.so
+
+TMP_LIB="tmp"
+# flatbuffer header files
+mkdir -p $TMP_LIB/include
+cp -r tensorflow/lite/tools/make/downloads/flatbuffers/include/flatbuffers $TMP_LIB/include/
+# tensorflow lite header files
+TFLITE_DIR="tensorflow/lite"
+declare -a tfLiteDirectories=(
+    "$TFLITE_DIR"
+    "$TFLITE_DIR/c"
+    "$TFLITE_DIR/core"
+    "$TFLITE_DIR/core/api"
+    "$TFLITE_DIR/delegates/gpu"
+    "$TFLITE_DIR/delegates/nnapi"
+    "$TFLITE_DIR/delegates/xnnpack"
+    "$TFLITE_DIR/experimental/resource"
+    "$TFLITE_DIR/kernels"
+    "$TFLITE_DIR/nnapi"
+    "$TFLITE_DIR/schema"
+    "$TFLITE_DIR/tools/evaluation"
+)
+for dir in "${tfLiteDirectories[@]}"
+do
+    mkdir -p $TMP_LIB/include/$dir
+    cp $dir/*h $TMP_LIB/include/$dir
+done
+mkdir -p $TMP_LIB/lib
+cp bazel-bin/tensorflow/lite/libtensorflowlite.so $TMP_LIB/lib
+bazel build -c opt --copt -DMESA_EGL_NO_X10_HEADERS --copt -DEGL_NO_X11 tensorflow/lite/delegates/gpu:libtensorflowlite_gpu_delegate.so
+cp bazel-bin/tensorflow/lite/delegates/gpu/libtensorflowlite_gpu_delegate.so $TMP_LIB/lib
+
+bazel build --config=monolithic //tensorflow/lite:libtensorflowlite.so
+# create .tar.gz file
+cd $TMP_LIB
+tar -cvzf libtensorflowlite-${BASEOS}-${REDISAI_TARGET}-${ARCH}-${VERSION}.tar.gz include lib
+
diff --git a/opt/build/tflite/collect.py b/opt/build/tflite/collect.py
deleted file mode 100755
index 4d4e4b347..000000000
--- a/opt/build/tflite/collect.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import sys
-import argparse
-from pathlib import Path
-import shutil
-import tarfile
-
-# this refers to deps directory inside a container
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), "readies"))
-import paella
-
-#----------------------------------------------------------------------------------------------
-
-TFLITE_VERSION = '2.0.0'
-
-parser = argparse.ArgumentParser(description='Prepare RedisAI dependant distribution packages.')
-parser.add_argument('--tensorflow', default='tensorflow', help='root of tensorflow repository')
-parser.add_argument('--version', default=TFLITE_VERSION, help='tensorflow version')
-parser.add_argument('--dest', default='dest', help='destination directory')
-parser.add_argument('-n', '--nop', action="store_true", help='no operation')
-args = parser.parse_args()
-
-#----------------------------------------------------------------------------------------------
-
-tensorflow = Path(args.tensorflow).resolve()
-dest = Path(args.dest).resolve()
-
-#----------------------------------------------------------------------------------------------
-
-platform = paella.Platform()
-
-tf_os = platform.os
-tf_os_internal = tf_os
-if tf_os == 'macos':
-    tf_os = 'darwin'
-    tf_os_internal = 'osx'
-
-tf_arch = platform.arch
-if tf_arch == 'x64':
-    tf_arch = 'x86_64'
-elif tf_arch == 'arm64v8':
-    tf_arch = 'arm64'
-
-tf_ver = args.version
-
-#----------------------------------------------------------------------------------------------
-
-def copy_p(src, dest):
-    f = dest/src
-    paella.mkdir_p(os.path.dirname(f))
-    shutil.copy(src, f, follow_symlinks=False)
-
-def create_tar(name, basedir, dir='.'):
-    def reset_uid(tarinfo):
-        tarinfo.uid = tarinfo.gid = 0
-        tarinfo.uname = tarinfo.gname = "root"
-        return tarinfo
-    with cwd(basedir):
-        with tarfile.open(name, 'w:gz') as tar:
-            tar.add(dir, filter=reset_uid)
-
-def collect_tflite():
-    d_tensorflow = dest
-    with cwd(tensorflow):
-        for f in Path('tensorflow/lite').glob('**/*.h'):
-            copy_p(f, d_tensorflow/'include')
-        with cwd('tensorflow/lite/tools/make'):
-            with cwd('downloads/flatbuffers/include'):
-                for f in Path('.').glob('**/*.h'):
-                    copy_p(f, d_tensorflow/'include')
-            with cwd(f'gen/{tf_os_internal}_{tf_arch}/lib'):
-                for f in Path('.').glob('*.a'):
-                    copy_p(f, d_tensorflow/'lib')
-    create_tar(dest/f'libtensorflowlite-{tf_os}-{tf_arch}-{tf_ver}.tar.gz', dest)
-
-#----------------------------------------------------------------------------------------------
-
-collect_tflite()
diff --git a/opt/system-setup.py b/opt/system-setup.py
index 07fdd50eb..24dd16c42 100755
--- a/opt/system-setup.py
+++ b/opt/system-setup.py
@@ -30,7 +30,8 @@ def debian_compat(self):
         self.install("gawk")
         self.install("libssl-dev")
         self.install("python3-regex")
-        self.install("python3-networkx python3-numpy")
+        self.install("python3-psutil python3-networkx python3-numpy")
+        self.install("libegl1-mesa-dev libgles2-mesa-dev")
         if self.platform.is_arm():
             self.install("python3-dev") # python3-skimage
         self.install("libmpich-dev libopenblas-dev") # for libtorch
diff --git a/src/backends/libtflite_c/tflite_c.cpp b/src/backends/libtflite_c/tflite_c.cpp
index c0b75bc19..eb26fb2f9 100644
--- a/src/backends/libtflite_c/tflite_c.cpp
+++ b/src/backends/libtflite_c/tflite_c.cpp
@@ -5,6 +5,8 @@
 #include "tensorflow/lite/model.h"
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/delegates/gpu/delegate.h"
 #include "tensorflow/lite/tools/evaluation/utils.h"
 
 namespace {
@@ -204,6 +206,9 @@ struct ModelContext {
     std::string buffer;
     DLDeviceType device;
     int64_t device_id;
+#if RAI_TFLITE_USE_CUDA
+    TfLiteDelegate *delegate;
+#endif
 };
 
 } // namespace
@@ -230,16 +235,16 @@ extern "C" void *tfliteLoadModel(const char *graph, size_t graphlen, DLDeviceTyp
         return NULL;
     }
 
-#if RAI_TFLITE_USE_CUDA
-    if (device == DLDeviceType::kDLGPU) {
-        tflite::Interpreter::TfLiteDelegatePtr delegate =
-            tflite::evaluation::CreateGPUDelegate(model.get());
-        if (interpreter_->ModifyGraphWithDelegate(std::move(delegate)) != kTfLiteOk) {
-            _setError("Failed to set GPU delegate", error);
-            return NULL;
-        }
-    }
-#endif
+// #if RAI_TFLITE_USE_CUDA
+//     if (device == DLDeviceType::kDLGPU) {
+//         tflite::Interpreter::TfLiteDelegatePtr delegate =
+//             tflite::evaluation::CreateGPUDelegate(model.get());
+//         if (interpreter_->ModifyGraphWithDelegate(std::move(delegate)) != kTfLiteOk) {
+//             _setError("Failed to set GPU delegate", error);
+//             return NULL;
+//         }
+//     }
+// #endif
 
     if (interpreter_->AllocateTensors() != kTfLiteOk) {
         _setError("Failed to allocate tensors", error);
@@ -254,7 +259,9 @@ extern "C" void *tfliteLoadModel(const char *graph, size_t graphlen, DLDeviceTyp
     ctx->model = std::move(model);
     ctx->interpreter = std::move(interpreter);
     ctx->buffer = std::move(graphstr);
-
+#if RAI_TFLITE_USE_CUDA
+    ctx->delegate = nullptr;
+#endif
     return ctx;
 }
 
@@ -342,6 +349,19 @@ extern "C" void tfliteRunModel(void *ctx, long n_inputs, DLManagedTensor **input
         return;
     }
 
+#if RAI_TFLITE_USE_CUDA
+    if (ctx_->device == DLDeviceType::kDLGPU) {
+      if (!ctx_->delegate) {
+        auto* delegate = TfLiteGpuDelegateV2Create(/*default options=*/nullptr);
+        if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) {
+          _setError("Failed to set GPU delegate", error);
+          return;
+        }
+	ctx_->delegate = delegate;
+      }
+   }
+#endif
+
     try {
         for (size_t i = 0; i < tflite_outputs.size(); i++) {
             outputs[i] = toManagedDLPack(interpreter, tflite_outputs[i]);
@@ -358,7 +378,14 @@ extern "C" void tfliteSerializeModel(void *ctx, char **buffer, size_t *len, char
 
 extern "C" void tfliteDeallocContext(void *ctx) {
     ModelContext *ctx_ = (ModelContext *)ctx;
+#if RAI_TFLITE_USE_CUDA
+    if (ctx_->device == DLDeviceType::kDLGPU) {
+      if (ctx_->delegate) {
+        TfLiteGpuDelegateV2Delete(ctx_->delegate);
+      }
+   }
+#endif
     if (ctx_) {
-        delete ctx_;
+        //delete ctx_;
     }
 }
diff --git a/tests/flow/tests_tflite.py b/tests/flow/tests_tflite.py
index a10eda36c..49c008630 100644
--- a/tests/flow/tests_tflite.py
+++ b/tests/flow/tests_tflite.py
@@ -16,14 +16,14 @@ def test_run_tflite_model(env):
     model_pb = load_file_content('mnist_model_quant.tflite')
     sample_raw = load_file_content('one.raw')
 
-    ret = con.execute_command('AI.MODELSTORE', 'm{1}', 'TFLITE', 'CPU', 'BLOB', model_pb)
+    ret = con.execute_command('AI.MODELSTORE', 'm{1}', 'TFLITE', DEVICE, 'BLOB', model_pb)
     env.assertEqual(ret, b'OK')
 
     ret = con.execute_command('AI.MODELGET', 'm{1}', 'META')
     env.assertEqual(len(ret), 14)
     env.assertEqual(ret[5], b'')
 
-    ret = con.execute_command('AI.MODELSTORE', 'm{1}', 'TFLITE', 'CPU', 'TAG', 'asdf', 'BLOB', model_pb)
+    ret = con.execute_command('AI.MODELSTORE', 'm{1}', 'TFLITE', DEVICE, 'TAG', 'asdf', 'BLOB', model_pb)
     env.assertEqual(ret, b'OK')
 
     ret = con.execute_command('AI.MODELGET', 'm{1}', 'META')
@@ -40,7 +40,7 @@ def test_run_tflite_model(env):
     # TODO: enable me. CI is having issues on GPU asserts of TFLITE and CPU
     if DEVICE == "CPU":
         env.assertEqual(ret[1], b'TFLITE')
-        env.assertEqual(ret[3], b'CPU')
+        env.assertEqual(ret[3], bDEVICE)
 
     con.execute_command('AI.MODELEXECUTE', 'm{1}', 'INPUTS', 1, 'a{1}', 'OUTPUTS', 2, 'b{1}', 'c{1}')
     values = con.execute_command('AI.TENSORGET', 'b{1}', 'VALUES')
@@ -58,17 +58,17 @@ def test_run_tflite_model_errors(env):
     sample_raw = load_file_content('one.raw')
     wrong_model_pb = load_file_content('graph.pb')
 
-    ret = con.execute_command('AI.MODELSTORE', 'm_2{1}', 'TFLITE', 'CPU', 'BLOB', model_pb)
+    ret = con.execute_command('AI.MODELSTORE', 'm_2{1}', 'TFLITE', DEVICE, 'BLOB', model_pb)
     env.assertEqual(ret, b'OK')
 
     check_error_message(env, con, "Failed to load model from buffer",
-                        'AI.MODELSTORE', 'm{1}', 'TFLITE', 'CPU', 'TAG', 'asdf', 'BLOB', wrong_model_pb)
+                        'AI.MODELSTORE', 'm{1}', 'TFLITE', DEVICE, 'TAG', 'asdf', 'BLOB', wrong_model_pb)
 
     # TODO: Autobatch is tricky with TFLITE because TFLITE expects a fixed batch
     #       size. At least we should constrain MINBATCHSIZE according to the
     #       hard-coded dims in the tflite model.
     check_error_message(env, con, "Auto-batching not supported by the TFLITE backend",
-                        'AI.MODELSTORE', 'm{1}', 'TFLITE', 'CPU',
+                        'AI.MODELSTORE', 'm{1}', 'TFLITE', DEVICE,
                         'BATCHSIZE', 2, 'MINBATCHSIZE', 2, 'BLOB', model_pb)
 
     ret = con.execute_command('AI.TENSORSET', 'a{1}', 'FLOAT', 1, 1, 28, 28, 'BLOB', sample_raw)
@@ -96,7 +96,7 @@ def test_tflite_modelinfo(env):
     model_pb = load_file_content('mnist_model_quant.tflite')
     sample_raw = load_file_content('one.raw')
 
-    ret = con.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', 'CPU', 'BLOB', model_pb)
+    ret = con.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', DEVICE, 'BLOB', model_pb)
     env.assertEqual(ret, b'OK')
 
     ret = con.execute_command('AI.TENSORSET', 'a{1}', 'FLOAT', 1, 1, 28, 28, 'BLOB', sample_raw)
@@ -143,7 +143,7 @@ def test_tflite_modelrun_disconnect(env):
     model_pb = load_file_content('mnist_model_quant.tflite')
     sample_raw = load_file_content('one.raw')
 
-    ret = red.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', 'CPU', 'BLOB', model_pb)
+    ret = red.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', DEVICE, 'BLOB', model_pb)
     env.assertEqual(ret, b'OK')
 
     ret = red.execute_command('AI.TENSORSET', 'a{1}', 'FLOAT', 1, 1, 28, 28, 'BLOB', sample_raw)
@@ -164,7 +164,7 @@ def test_tflite_model_rdb_save_load(env):
     con = env.getConnection()
     model_pb = load_file_content('mnist_model_quant.tflite')
 
-    ret = con.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', 'CPU', 'BLOB', model_pb)
+    ret = con.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', DEVICE, 'BLOB', model_pb)
     env.assertEqual(ret, b'OK')
 
     model_serialized_memory = con.execute_command('AI.MODELGET', 'mnist{1}', 'BLOB')
@@ -196,7 +196,7 @@ def test_tflite_info(env):
 
     model_pb = load_file_content('mnist_model_quant.tflite')
 
-    con.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', 'CPU', 'BLOB', model_pb)
+    con.execute_command('AI.MODELSTORE', 'mnist{1}', 'TFLITE', DEVICE, 'BLOB', model_pb)
 
     ret = con.execute_command('AI.INFO')
     env.assertEqual(8, len(ret))