diff --git a/python/AIEMLIRModule.cpp b/python/AIEMLIRModule.cpp
index 8f27cd227a5..51c8f0441b7 100644
--- a/python/AIEMLIRModule.cpp
+++ b/python/AIEMLIRModule.cpp
@@ -15,11 +15,10 @@
 #include "mlir-c/IR.h"
 #include "mlir-c/Support.h"
 #include "mlir/Bindings/Python/Diagnostics.h"
-#include "mlir/Bindings/Python/PybindAdaptors.h"
+#include "mlir/Bindings/Python/NanobindAdaptors.h"
+#include "llvm/ADT/Twine.h"
 
-#include <pybind11/cast.h>
-#include <pybind11/detail/common.h>
-#include <pybind11/pybind11.h>
+#include <nanobind/nanobind.h>
 
 #include <cstdlib>
 #include <stdexcept>
@@ -27,11 +26,11 @@
 #include <unicodeobject.h>
 #include <vector>
 
-using namespace mlir::python::adaptors;
-namespace py = pybind11;
-using namespace py::literals;
+using namespace mlir::python;
+namespace nb = nanobind;
+using namespace nb::literals;
 
-PYBIND11_MODULE(_aie, m) {
+NB_MODULE(_aie, m) {
 
   aieRegisterAllPasses();
 
@@ -48,33 +47,35 @@ PYBIND11_MODULE(_aie, m) {
       "registry"_a);
 
   // AIE types bindings
-  mlir_type_subclass(m, "ObjectFifoType", aieTypeIsObjectFifoType)
+  nanobind_adaptors::mlir_type_subclass(m, "ObjectFifoType",
+                                        aieTypeIsObjectFifoType)
       .def_classmethod(
           "get",
-          [](const py::object &cls, const MlirType type) {
+          [](const nb::object &cls, const MlirType type) {
             return cls(aieObjectFifoTypeGet(type));
           },
           "Get an instance of ObjectFifoType with given element type.",
-          "self"_a, "type"_a = py::none());
+          "self"_a, "type"_a = nb::none());
 
-  mlir_type_subclass(m, "ObjectFifoSubviewType", aieTypeIsObjectFifoSubviewType)
+  nanobind_adaptors::mlir_type_subclass(m, "ObjectFifoSubviewType",
+                                        aieTypeIsObjectFifoSubviewType)
       .def_classmethod(
           "get",
-          [](const py::object &cls, const MlirType type) {
+          [](const nb::object &cls, const MlirType type) {
             return cls(aieObjectFifoSubviewTypeGet(type));
           },
           "Get an instance of ObjectFifoSubviewType with given element type.",
-          "self"_a, "type"_a = py::none());
+          "self"_a, "type"_a = nb::none());
 
   auto stealCStr = [](MlirStringRef mlirString) {
     if (!mlirString.data || mlirString.length == 0)
       throw std::runtime_error("couldn't translate");
     std::string cpp(mlirString.data, mlirString.length);
     free((void *)mlirString.data);
-    py::handle pyS = PyUnicode_DecodeLatin1(cpp.data(), cpp.length(), nullptr);
+    nb::handle pyS = PyUnicode_DecodeLatin1(cpp.data(), cpp.length(), nullptr);
     if (!pyS)
-      throw py::error_already_set();
-    return py::reinterpret_steal<py::str>(pyS);
+      throw nb::python_error();
+    return nb::steal<nb::str>(pyS);
   };
 
   m.def(
@@ -101,8 +102,11 @@ PYBIND11_MODULE(_aie, m) {
         if (mlirLogicalResultIsFailure(aieTranslateToCDODirect(
                 op, {workDirPath.data(), workDirPath.size()}, bigendian,
                 emitUnified, cdoDebug, aieSim, xaieDebug, enableCores)))
-          throw py::value_error("Failed to generate cdo because: " +
-                                scope.takeMessage());
+          throw nb::value_error(
+              (llvm::Twine("Failed to generate cdo because: ") +
+               llvm::Twine(scope.takeMessage()))
+                  .str()
+                  .c_str());
       },
       "module"_a, "work_dir_path"_a, "bigendian"_a = false,
       "emit_unified"_a = false, "cdo_debug"_a = false, "aiesim"_a = false,
@@ -110,9 +114,9 @@ PYBIND11_MODULE(_aie, m) {
 
   m.def(
       "transaction_binary_to_mlir",
-      [](MlirContext ctx, py::bytes bytes) {
-        std::string s = bytes;
-        MlirStringRef bin = {s.data(), s.size()};
+      [](MlirContext ctx, nb::bytes bytes) {
+        MlirStringRef bin = {static_cast<const char *>(bytes.data()),
+                             bytes.size()};
         return aieTranslateBinaryToTxn(ctx, bin);
       },
       "ctx"_a, "binary"_a);
@@ -120,9 +124,9 @@ PYBIND11_MODULE(_aie, m) {
   m.def(
       "npu_instgen",
       [&stealCStr](MlirOperation op) {
-        py::str npuInstructions = stealCStr(aieTranslateToNPU(op));
+        nb::str npuInstructions = stealCStr(aieTranslateToNPU(op));
         auto individualInstructions =
-            npuInstructions.attr("split")().cast<py::list>();
+            nb::cast<nb::list>(npuInstructions.attr("split")());
         for (size_t i = 0; i < individualInstructions.size(); ++i)
           individualInstructions[i] = individualInstructions[i].attr("strip")();
         return individualInstructions;
@@ -132,10 +136,10 @@ PYBIND11_MODULE(_aie, m) {
   m.def(
       "generate_control_packets",
       [&stealCStr](MlirOperation op) {
-        py::str ctrlPackets =
+        nb::str ctrlPackets =
             stealCStr(aieTranslateControlPacketsToUI32Vec(op));
         auto individualInstructions =
-            ctrlPackets.attr("split")().cast<py::list>();
+            nb::cast<nb::list>(ctrlPackets.attr("split")());
         for (size_t i = 0; i < individualInstructions.size(); ++i)
           individualInstructions[i] = individualInstructions[i].attr("strip")();
         return individualInstructions;
@@ -171,7 +175,7 @@ PYBIND11_MODULE(_aie, m) {
   m.def("get_target_model",
         [](uint32_t d) -> PyAieTargetModel { return aieGetTargetModel(d); });
 
-  py::class_<PyAieTargetModel>(m, "AIETargetModel", py::module_local())
+  nb::class_<PyAieTargetModel>(m, "AIETargetModel")
       .def(
           "columns",
           [](PyAieTargetModel &self) {
diff --git a/python/AIERTModule.cpp b/python/AIERTModule.cpp
index 0f627a6ad4c..2aecbb5d0af 100644
--- a/python/AIERTModule.cpp
+++ b/python/AIERTModule.cpp
@@ -13,13 +13,12 @@
 
 #include "aie/Bindings/PyTypes.h"
 
-#include <pybind11/pybind11.h>
-#include <pybind11/pytypes.h>
+#include <nanobind/nanobind.h>
 
 #include <algorithm>
 
-namespace py = pybind11;
-using namespace py::literals;
+namespace nb = nanobind;
+using namespace nb::literals;
 
 class PyAIERTControl {
 public:
@@ -31,10 +30,10 @@ class PyAIERTControl {
   AieRtControl ctl;
 };
 
-PYBIND11_MODULE(_aiert, m) {
+NB_MODULE(_aiert, m) {
 
-  py::class_<PyAIERTControl>(m, "AIERTControl", py::module_local())
-      .def(py::init<PyAieTargetModel>(), "target_model"_a)
+  nb::class_<PyAIERTControl>(m, "AIERTControl")
+      .def(nb::init<PyAieTargetModel>(), "target_model"_a)
       .def("start_transaction",
            [](PyAIERTControl &self) { aieRtStartTransaction(self.ctl); })
       .def("export_serialized_transaction",
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 1df5584816a..9b737f10ef6 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -191,6 +191,9 @@ if (AIE_ENABLE_PYTHON_PASSES)
 
     PRIVATE_LINK_LIBS
       ${_py_libs}
+
+    PYTHON_BINDINGS_LIBRARY
+      nanobind
   )
   target_include_directories(
     AIEPythonExtensions.MLIR
@@ -282,6 +285,8 @@ else ()
       AIECAPI
     PRIVATE_LINK_LIBS
       LLVMSupport
+    PYTHON_BINDINGS_LIBRARY
+      nanobind
   )
 
   if(AIE_ENABLE_XRT_PYTHON_BINDINGS)
@@ -298,6 +303,8 @@ else ()
         LLVMSupport
         xrt_coreutil
         uuid
+      PYTHON_BINDINGS_LIBRARY
+        nanobind
     )
     target_include_directories(AIEPythonExtensions.XRT INTERFACE ${XRT_INCLUDE_DIR})
     target_link_directories(AIEPythonExtensions.XRT INTERFACE ${XRT_LIB_DIR})
@@ -314,6 +321,9 @@ else ()
 
     PRIVATE_LINK_LIBS
       LLVMSupport
+
+    PYTHON_BINDINGS_LIBRARY
+      nanobind
   )
 
   add_mlir_python_common_capi_library(AIEAggregateCAPI
diff --git a/python/XRTModule.cpp b/python/XRTModule.cpp
index fb200f66502..9f9f49759a3 100644
--- a/python/XRTModule.cpp
+++ b/python/XRTModule.cpp
@@ -13,17 +13,19 @@
 #include "xrt/xrt_device.h"
 #include "xrt/xrt_kernel.h"
 
-#include <pybind11/numpy.h>
-#include <pybind11/pybind11.h>
-#include <pybind11/pytypes.h>
-#include <pybind11/stl.h>
+#include <nanobind/nanobind.h>
+#include <nanobind/ndarray.h>
+#include <nanobind/stl/string.h>
+#include <nanobind/stl/vector.h>
 
 #include <algorithm>
+#include <numeric>
+#include <optional>
 #include <string>
 #include <vector>
 
-namespace py = pybind11;
-using namespace py::literals;
+namespace nb = nanobind;
+using namespace nb::literals;
 
 // group_id 0 is for npu instructions
 // group_id 1 is for number of npu instructions
@@ -55,16 +57,16 @@ class PyXCLBin {
   }
 
   template <typename ElementT>
-  std::vector<py::memoryview>
-  mmapBuffers(std::vector<std::vector<int>> shapes) {
+  std::vector<nb::ndarray<>>
+  mmapBuffers(std::vector<std::vector<size_t>> shapes) {
     this->buffers.reserve(shapes.size());
-    std::vector<py::memoryview> views;
+    std::vector<nb::ndarray<>> views;
     views.reserve(shapes.size());
 
     auto initAndViewBuffer = [this](
-                                 std::vector<int> shape, int groupId,
+                                 std::vector<size_t> shape, int groupId,
                                  std::vector<std::unique_ptr<xrt::bo>> &buffers,
-                                 std::vector<py::memoryview> &views) {
+                                 std::vector<nb::ndarray<>> &views) {
       int nElements =
           std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>());
       int nBytes = nElements * sizeof(ElementT);
@@ -79,12 +81,13 @@ class PyXCLBin {
       std::vector strides_{1};
       for (int i = shape.size() - 1; i > 0; i--)
         strides_.push_back(strides_.back() * shape[i]);
-      std::vector<int> strides;
+      std::vector<int64_t> strides;
       // stride in bytes
       std::transform(strides_.rbegin(), strides_.rend(),
                      std::back_inserter(strides),
                      [](int s) { return s * sizeof(ElementT); });
-      views.push_back(py::memoryview::from_buffer(buf, shape, strides));
+      views.push_back(nb::ndarray(buf, shape.size(), shape.data(), nb::handle(),
+                                  strides.data()));
     };
 
     for (size_t i = 0; i < shapes.size(); ++i)
@@ -140,22 +143,22 @@ class PyXCLBin {
   std::unique_ptr<xrt::run> run_;
 };
 
-PYBIND11_MODULE(_xrt, m) {
+NB_MODULE(_xrt, m) {
 
-  py::class_<PyXCLBin>(m, "XCLBin", py::module_local())
-      .def(py::init<const std::string &, const std::string &, int>(),
+  nb::class_<PyXCLBin>(m, "XCLBin")
+      .def(nb::init<const std::string &, const std::string &, int>(),
            "xclbin_path"_a, "kernel_name"_a, "device_index"_a = 0)
       .def("load_npu_instructions", &PyXCLBin::loadNPUInstructions, "insts"_a)
       .def("sync_buffers_to_device", &PyXCLBin::syncBuffersToDevice)
       .def("sync_buffers_from_device", &PyXCLBin::syncBuffersFromDevice)
       .def("run", &PyXCLBin::run)
       .def("_run_only_npu_instructions", &PyXCLBin::_runOnlyNpuInstructions)
-      .def("wait", &PyXCLBin::wait, "timeout"_a = py::none())
+      .def("wait", &PyXCLBin::wait, "timeout"_a = nb::none())
       .def(
           "mmap_buffers",
-          [](PyXCLBin &self, const std::vector<std::vector<int>> &shapes,
-             const py::object &npFormat) {
-            auto npy = py::module_::import("numpy");
+          [](PyXCLBin &self, const std::vector<std::vector<size_t>> &shapes,
+             const nb::object &npFormat) {
+            auto npy = nb::module_::import_("numpy");
             if (npFormat.is(npy.attr("int16")))
               return self.mmapBuffers<int16_t>(shapes);
             if (npFormat.is(npy.attr("int32")))
@@ -167,7 +170,7 @@ PYBIND11_MODULE(_xrt, m) {
             if (npFormat.is(npy.attr("float64")))
               return self.mmapBuffers<double>(shapes);
             throw std::runtime_error("unsupported np format: " +
-                                     py::repr(npFormat).cast<std::string>());
+                                     nb::cast<std::string>(nb::repr(npFormat)));
           },
           "shapes"_a, "np_format"_a)
       .def("_get_buffer_host_address", [](PyXCLBin &self, size_t idx) {