--- llama.cpp/Makefile	2023-11-18 10:44:06.631639892 +0000
+++ Makefile	2023-11-18 10:43:50.795566826 +0000
@@ -390,19 +390,12 @@
 	MK_CPPFLAGS  += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
 	MK_LDFLAGS   += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
 	OBJS         += ggml-cuda.o
-	NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
+	NVCCFLAGS := --compiler-options="$(NVCCFLAGS) $(CXXFLAGS) $(CUDA_CXXFLAGS)" -use_fast_math
 ifdef LLAMA_CUDA_NVCC
 	NVCC = $(LLAMA_CUDA_NVCC)
 else
 	NVCC = nvcc
 endif #LLAMA_CUDA_NVCC
-ifdef CUDA_DOCKER_ARCH
-	NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
-else ifdef CUDA_POWER_ARCH
-	NVCCFLAGS +=
-else
-	NVCCFLAGS += -arch=native
-endif # CUDA_DOCKER_ARCH
 ifdef LLAMA_CUDA_FORCE_DMMV
 	NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
 endif # LLAMA_CUDA_FORCE_DMMV
@@ -514,7 +507,7 @@
 override LDFLAGS       := $(MK_LDFLAGS) $(LDFLAGS)
 
 # save CXXFLAGS before we add host-only options
-NVCCFLAGS := $(NVCCFLAGS) $(CXXFLAGS) $(CUDA_CXXFLAGS) -Wno-pedantic -Xcompiler "$(HOST_CXXFLAGS)"
+#NVCCFLAGS := $(NVCCFLAGS) $(CXXFLAGS) $(CUDA_CXXFLAGS) -Wno-pedantic -Xcompiler "$(HOST_CXXFLAGS)"
 override CXXFLAGS += $(HOST_CXXFLAGS)
 
 #