diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 4581d321764b..3cba499aa0cc 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -570,13 +570,13 @@ bool CIRGenModule::shouldEmitCUDAGlobalVar(const VarDecl *global) const {
   // their device-side incarnations.
 
   if (global->hasAttr<CUDAConstantAttr>() ||
-      global->getType()->isCUDADeviceBuiltinSurfaceType() ||
       global->getType()->isCUDADeviceBuiltinTextureType()) {
     llvm_unreachable("NYI");
   }
 
   return !langOpts.CUDAIsDevice || global->hasAttr<CUDADeviceAttr>() ||
-         global->hasAttr<CUDASharedAttr>();
+         global->hasAttr<CUDASharedAttr>() ||
+         global->getType()->isCUDADeviceBuiltinSurfaceType();
 }
 
 void CIRGenModule::emitGlobal(GlobalDecl gd) {
@@ -1122,10 +1122,8 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty,
       }
     }
 
-    // TODO(cir): LLVM codegen makes sure the result is of the correct type
-    // by issuing a address space cast.
-    if (entryCIRAS != cirAS)
-      llvm_unreachable("NYI");
+    // Address space check removed because it is unnecessary because CIR records
+    // address space info in types.
 
     // (If global is requested for a definition, we always need to create a new
     // global, not just return a bitcast.)
@@ -1496,7 +1494,8 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *d,
     // __shared__ variables is not marked as externally initialized,
     // because they must not be initialized.
     if (linkage != cir::GlobalLinkageKind::InternalLinkage &&
-        (d->hasAttr<CUDADeviceAttr>())) {
+        (d->hasAttr<CUDADeviceAttr>() ||
+         d->getType()->isCUDADeviceBuiltinSurfaceType())) {
       gv->setAttr(CUDAExternallyInitializedAttr::getMnemonic(),
                   CUDAExternallyInitializedAttr::get(&getMLIRContext()));
     }
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
index 70f7b681bc86..6000474b161d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
@@ -352,10 +352,14 @@ mlir::Type CIRGenTypes::convertType(QualType T) {
   //  1. There is no SurfaceType on HIP,
   //  2. There is Texture memory on HIP but accessing the memory goes through
   //  calls to the runtime. e.g. for a 2D: `tex2D<float>(tex, x, y);`
-  if (astContext.getLangOpts().CUDA && astContext.getLangOpts().CUDAIsDevice) {
-    if (Ty->isCUDADeviceBuiltinSurfaceType() ||
-        Ty->isCUDADeviceBuiltinTextureType())
+  if (astContext.getLangOpts().CUDAIsDevice) {
+    if (T->isCUDADeviceBuiltinSurfaceType()) {
+      if (mlir::Type Ty =
+              CGM.getTargetCIRGenInfo().getCUDADeviceBuiltinSurfaceDeviceType())
+        return Ty;
+    } else if (T->isCUDADeviceBuiltinTextureType()) {
       llvm_unreachable("NYI");
+    }
   }
 
   if (const auto *recordType = dyn_cast<RecordType>(T))
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp
index bbdcc1e4735f..d44542de5523 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp
@@ -1,3 +1,4 @@
+
 #include "TargetInfo.h"
 #include "ABIInfo.h"
 #include "CIRGenCXXABI.h"
@@ -344,6 +345,11 @@ class NVPTXTargetCIRGenInfo : public TargetCIRGenInfo {
 public:
   NVPTXTargetCIRGenInfo(CIRGenTypes &cgt)
       : TargetCIRGenInfo(std::make_unique<NVPTXABIInfo>(cgt)) {}
+  mlir::Type getCUDADeviceBuiltinSurfaceDeviceType() const override {
+    // On the device side, texture reference is represented as an object handle
+    // in 64-bit integer.
+    return cir::IntType::get(&getABIInfo().CGT.getMLIRContext(), 64, true);
+  }
 };
 
 } // namespace
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h
index 82adf6f10d60..84cf5d1c691f 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.h
+++ b/clang/lib/CIR/CodeGen/TargetInfo.h
@@ -120,7 +120,9 @@ class TargetCIRGenInfo {
   // kernels. They should reset the calling convention to OpenCLKernel,
   // which will be further resolved by getOpenCLKernelCallingConv().
   virtual void setCUDAKernelCallingConvention(const FunctionType *&ft) const {}
-
+  virtual mlir::Type getCUDADeviceBuiltinSurfaceDeviceType() const {
+    return nullptr;
+  }
   virtual ~TargetCIRGenInfo() {}
 };
 
diff --git a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp
index eaa5ffd151b0..ef31582f65be 100644
--- a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp
@@ -657,10 +657,10 @@ AddressSpaceAttr::getValueFromLangAS(clang::LangAS langAS) {
     return Kind::offload_private;
   case LangAS::opencl_generic:
     return Kind::offload_generic;
-
+  case LangAS::cuda_constant:
+    return Kind::offload_constant;
   case LangAS::opencl_global_device:
   case LangAS::opencl_global_host:
-  case LangAS::cuda_constant:
   case LangAS::sycl_global:
   case LangAS::sycl_global_device:
   case LangAS::sycl_global_host:
diff --git a/clang/test/CIR/CodeGen/CUDA/surface.cu b/clang/test/CIR/CodeGen/CUDA/surface.cu
new file mode 100644
index 000000000000..da085137f325
--- /dev/null
+++ b/clang/test/CIR/CodeGen/CUDA/surface.cu
@@ -0,0 +1,26 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -fclangir -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - %s | FileCheck --check-prefix=DEVICE-LLVM %s
+// RUN: %clang_cc1 -fclangir -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-cir -o - %s | FileCheck --check-prefix=DEVICE-CIR %s
+// RUN: echo "GPU binary would be here" > %t
+// RUN: %clang_cc1 -fclangir -std=c++11 -triple x86_64-unknown-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s
+
+struct surfaceReference {
+  int desc;
+};
+
+template <typename T, int dim = 1>
+struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {
+};
+
+// Partial specialization over `void`.
+template<int dim>
+struct __attribute__((device_builtin_surface_type)) surface<void, dim> : public surfaceReference {
+};
+
+surface<void, 2> surf;
+
+// DEVICE-LLVM: @surf = addrspace(1) externally_initialized global i64 undef, align 4
+// DEVICE-CIR: cir.global external addrspace(offload_global) @surf = #cir.undef : !s64i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized}
+// HOST: @surf = global %"struct.surface<void, 2>" zeroinitializer, align 4
\ No newline at end of file