diff --git a/cuda_bindings/cuda/bindings/_internal/__init__.py b/cuda_bindings/cuda/bindings/_internal/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd b/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd new file mode 100644 index 000000000..5f717d4d8 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd @@ -0,0 +1,27 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. + +from ..cynvjitlink cimport * + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil +cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil +cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil +cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil +cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil +cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil +cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil +cdef nvJitLinkResult _nvJitLinkVersion(unsigned int* major, unsigned int* minor) except* nogil diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx new file mode 100644 index 000000000..eb882b4fb --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx @@ -0,0 +1,402 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .utils cimport get_nvjitlink_dso_version_suffix + +from .utils import FunctionNotFoundError, NotSupportedError + +############################################################################### +# Extern +############################################################################### + +cdef extern from "" nogil: + void* dlopen(const char*, int) + char* dlerror() + void* dlsym(void*, const char*) + int dlclose(void*) + + enum: + RTLD_LAZY + RTLD_NOW + RTLD_GLOBAL + RTLD_LOCAL + + const void* RTLD_DEFAULT 'RTLD_DEFAULT' + + +############################################################################### +# Wrapper init +############################################################################### + +cdef bint __py_nvjitlink_init = False +cdef void* __cuDriverGetVersion = NULL + +cdef void* __nvJitLinkCreate = NULL +cdef void* __nvJitLinkDestroy = NULL +cdef void* __nvJitLinkAddData = NULL +cdef void* __nvJitLinkAddFile = NULL +cdef void* __nvJitLinkComplete = NULL +cdef void* __nvJitLinkGetLinkedCubinSize = NULL +cdef void* __nvJitLinkGetLinkedCubin = NULL +cdef void* __nvJitLinkGetLinkedPtxSize = NULL +cdef void* __nvJitLinkGetLinkedPtx = NULL +cdef void* __nvJitLinkGetErrorLogSize = NULL +cdef void* __nvJitLinkGetErrorLog = NULL +cdef void* __nvJitLinkGetInfoLogSize = NULL +cdef void* __nvJitLinkGetInfoLog = NULL +cdef void* __nvJitLinkVersion = NULL + + +cdef void* load_library(const int driver_ver) except* with gil: + cdef void* handle + for suffix in get_nvjitlink_dso_version_suffix(driver_ver): + so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix) + handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL) + if handle != NULL: + break + else: + err_msg = dlerror() + raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})') + return handle + + +cdef int _check_or_init_nvjitlink() except -1 nogil: + global __py_nvjitlink_init + if __py_nvjitlink_init: + return 0 + + # Load driver to check version + cdef void* handle = NULL + handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + with gil: + err_msg = dlerror() + raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') + global __cuDriverGetVersion + if __cuDriverGetVersion == NULL: + __cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") + if __cuDriverGetVersion == NULL: + with gil: + raise RuntimeError('something went wrong') + cdef int err, driver_ver + err = (__cuDriverGetVersion)(&driver_ver) + if err != 0: + with gil: + raise RuntimeError('something went wrong') + #dlclose(handle) + handle = NULL + + # Load function + global __nvJitLinkCreate + __nvJitLinkCreate = dlsym(RTLD_DEFAULT, 'nvJitLinkCreate') + if __nvJitLinkCreate == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkCreate = dlsym(handle, 'nvJitLinkCreate') + + global __nvJitLinkDestroy + __nvJitLinkDestroy = dlsym(RTLD_DEFAULT, 'nvJitLinkDestroy') + if __nvJitLinkDestroy == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkDestroy = dlsym(handle, 'nvJitLinkDestroy') + + global __nvJitLinkAddData + __nvJitLinkAddData = dlsym(RTLD_DEFAULT, 'nvJitLinkAddData') + if __nvJitLinkAddData == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkAddData = dlsym(handle, 'nvJitLinkAddData') + + global __nvJitLinkAddFile + __nvJitLinkAddFile = dlsym(RTLD_DEFAULT, 'nvJitLinkAddFile') + if __nvJitLinkAddFile == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkAddFile = dlsym(handle, 'nvJitLinkAddFile') + + global __nvJitLinkComplete + __nvJitLinkComplete = dlsym(RTLD_DEFAULT, 'nvJitLinkComplete') + if __nvJitLinkComplete == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkComplete = dlsym(handle, 'nvJitLinkComplete') + + global __nvJitLinkGetLinkedCubinSize + __nvJitLinkGetLinkedCubinSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubinSize') + if __nvJitLinkGetLinkedCubinSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedCubinSize = dlsym(handle, 'nvJitLinkGetLinkedCubinSize') + + global __nvJitLinkGetLinkedCubin + __nvJitLinkGetLinkedCubin = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedCubin') + if __nvJitLinkGetLinkedCubin == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedCubin = dlsym(handle, 'nvJitLinkGetLinkedCubin') + + global __nvJitLinkGetLinkedPtxSize + __nvJitLinkGetLinkedPtxSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtxSize') + if __nvJitLinkGetLinkedPtxSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedPtxSize = dlsym(handle, 'nvJitLinkGetLinkedPtxSize') + + global __nvJitLinkGetLinkedPtx + __nvJitLinkGetLinkedPtx = dlsym(RTLD_DEFAULT, 'nvJitLinkGetLinkedPtx') + if __nvJitLinkGetLinkedPtx == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetLinkedPtx = dlsym(handle, 'nvJitLinkGetLinkedPtx') + + global __nvJitLinkGetErrorLogSize + __nvJitLinkGetErrorLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLogSize') + if __nvJitLinkGetErrorLogSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetErrorLogSize = dlsym(handle, 'nvJitLinkGetErrorLogSize') + + global __nvJitLinkGetErrorLog + __nvJitLinkGetErrorLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetErrorLog') + if __nvJitLinkGetErrorLog == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetErrorLog = dlsym(handle, 'nvJitLinkGetErrorLog') + + global __nvJitLinkGetInfoLogSize + __nvJitLinkGetInfoLogSize = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLogSize') + if __nvJitLinkGetInfoLogSize == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetInfoLogSize = dlsym(handle, 'nvJitLinkGetInfoLogSize') + + global __nvJitLinkGetInfoLog + __nvJitLinkGetInfoLog = dlsym(RTLD_DEFAULT, 'nvJitLinkGetInfoLog') + if __nvJitLinkGetInfoLog == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkGetInfoLog = dlsym(handle, 'nvJitLinkGetInfoLog') + + global __nvJitLinkVersion + __nvJitLinkVersion = dlsym(RTLD_DEFAULT, 'nvJitLinkVersion') + if __nvJitLinkVersion == NULL: + if handle == NULL: + handle = load_library(driver_ver) + __nvJitLinkVersion = dlsym(handle, 'nvJitLinkVersion') + + __py_nvjitlink_init = True + return 0 + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvjitlink() + cdef dict data = {} + + global __nvJitLinkCreate + data["__nvJitLinkCreate"] = __nvJitLinkCreate + + global __nvJitLinkDestroy + data["__nvJitLinkDestroy"] = __nvJitLinkDestroy + + global __nvJitLinkAddData + data["__nvJitLinkAddData"] = __nvJitLinkAddData + + global __nvJitLinkAddFile + data["__nvJitLinkAddFile"] = __nvJitLinkAddFile + + global __nvJitLinkComplete + data["__nvJitLinkComplete"] = __nvJitLinkComplete + + global __nvJitLinkGetLinkedCubinSize + data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize + + global __nvJitLinkGetLinkedCubin + data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin + + global __nvJitLinkGetLinkedPtxSize + data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize + + global __nvJitLinkGetLinkedPtx + data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx + + global __nvJitLinkGetErrorLogSize + data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize + + global __nvJitLinkGetErrorLog + data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog + + global __nvJitLinkGetInfoLogSize + data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize + + global __nvJitLinkGetInfoLog + data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog + + global __nvJitLinkVersion + data["__nvJitLinkVersion"] = __nvJitLinkVersion + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: + global __nvJitLinkCreate + _check_or_init_nvjitlink() + if __nvJitLinkCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkCreate is not found") + return (__nvJitLinkCreate)( + handle, numOptions, options) + + +cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: + global __nvJitLinkDestroy + _check_or_init_nvjitlink() + if __nvJitLinkDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkDestroy is not found") + return (__nvJitLinkDestroy)( + handle) + + +cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: + global __nvJitLinkAddData + _check_or_init_nvjitlink() + if __nvJitLinkAddData == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddData is not found") + return (__nvJitLinkAddData)( + handle, inputType, data, size, name) + + +cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: + global __nvJitLinkAddFile + _check_or_init_nvjitlink() + if __nvJitLinkAddFile == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddFile is not found") + return (__nvJitLinkAddFile)( + handle, inputType, fileName) + + +cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: + global __nvJitLinkComplete + _check_or_init_nvjitlink() + if __nvJitLinkComplete == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkComplete is not found") + return (__nvJitLinkComplete)( + handle) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedCubinSize + _check_or_init_nvjitlink() + if __nvJitLinkGetLinkedCubinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") + return (__nvJitLinkGetLinkedCubinSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: + global __nvJitLinkGetLinkedCubin + _check_or_init_nvjitlink() + if __nvJitLinkGetLinkedCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") + return (__nvJitLinkGetLinkedCubin)( + handle, cubin) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedPtxSize + _check_or_init_nvjitlink() + if __nvJitLinkGetLinkedPtxSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") + return (__nvJitLinkGetLinkedPtxSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: + global __nvJitLinkGetLinkedPtx + _check_or_init_nvjitlink() + if __nvJitLinkGetLinkedPtx == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") + return (__nvJitLinkGetLinkedPtx)( + handle, ptx) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetErrorLogSize + _check_or_init_nvjitlink() + if __nvJitLinkGetErrorLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") + return (__nvJitLinkGetErrorLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetErrorLog + _check_or_init_nvjitlink() + if __nvJitLinkGetErrorLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") + return (__nvJitLinkGetErrorLog)( + handle, log) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetInfoLogSize + _check_or_init_nvjitlink() + if __nvJitLinkGetInfoLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") + return (__nvJitLinkGetInfoLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetInfoLog + _check_or_init_nvjitlink() + if __nvJitLinkGetInfoLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") + return (__nvJitLinkGetInfoLog)( + handle, log) + + +cdef nvJitLinkResult _nvJitLinkVersion(unsigned int* major, unsigned int* minor) except* nogil: + global __nvJitLinkVersion + _check_or_init_nvjitlink() + if __nvJitLinkVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkVersion is not found") + return (__nvJitLinkVersion)( + major, minor) diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx new file mode 100644 index 000000000..b8ab705d8 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx @@ -0,0 +1,413 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .utils cimport get_nvjitlink_dso_version_suffix + +from .utils import FunctionNotFoundError, NotSupportedError + +import os +import site + +import win32api + + +############################################################################### +# Wrapper init +############################################################################### + +LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 +LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 +LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 +cdef bint __py_nvjitlink_init = False +cdef void* __cuDriverGetVersion = NULL + +cdef void* __nvJitLinkCreate = NULL +cdef void* __nvJitLinkDestroy = NULL +cdef void* __nvJitLinkAddData = NULL +cdef void* __nvJitLinkAddFile = NULL +cdef void* __nvJitLinkComplete = NULL +cdef void* __nvJitLinkGetLinkedCubinSize = NULL +cdef void* __nvJitLinkGetLinkedCubin = NULL +cdef void* __nvJitLinkGetLinkedPtxSize = NULL +cdef void* __nvJitLinkGetLinkedPtx = NULL +cdef void* __nvJitLinkGetErrorLogSize = NULL +cdef void* __nvJitLinkGetErrorLog = NULL +cdef void* __nvJitLinkGetInfoLogSize = NULL +cdef void* __nvJitLinkGetInfoLog = NULL +cdef void* __nvJitLinkVersion = NULL + + +cdef inline list get_site_packages(): + return [site.getusersitepackages()] + site.getsitepackages() + + +cdef load_library(const int driver_ver): + handle = 0 + + for suffix in get_nvjitlink_dso_version_suffix(driver_ver): + if len(suffix) == 0: + continue + dll_name = f"nvJitLink_{suffix}0_0.dll" + + # First check if the DLL has been loaded by 3rd parties + try: + handle = win32api.GetModuleHandle(dll_name) + except: + pass + else: + break + + # Next, check if DLLs are installed via pip + for sp in get_site_packages(): + mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin") + if not os.path.isdir(mod_path): + continue + os.add_dll_directory(mod_path) + try: + handle = win32api.LoadLibraryEx( + # Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path... + os.path.join(mod_path, dll_name), + 0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR) + except: + pass + else: + break + + # Finally, try default search + try: + handle = win32api.LoadLibrary(dll_name) + except: + pass + else: + break + else: + raise RuntimeError('Failed to load nvJitLink') + + assert handle != 0 + return handle + + +cdef int _check_or_init_nvjitlink() except -1 nogil: + global __py_nvjitlink_init + if __py_nvjitlink_init: + return 0 + + cdef int err, driver_ver + with gil: + # Load driver to check version + try: + handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32) + except Exception as e: + raise NotSupportedError(f'CUDA driver is not found ({e})') + global __cuDriverGetVersion + if __cuDriverGetVersion == NULL: + __cuDriverGetVersion = win32api.GetProcAddress(handle, 'cuDriverGetVersion') + if __cuDriverGetVersion == NULL: + raise RuntimeError('something went wrong') + err = (__cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError('something went wrong') + + # Load library + handle = load_library(driver_ver) + + # Load function + global __nvJitLinkCreate + try: + __nvJitLinkCreate = win32api.GetProcAddress(handle, 'nvJitLinkCreate') + except: + pass + + global __nvJitLinkDestroy + try: + __nvJitLinkDestroy = win32api.GetProcAddress(handle, 'nvJitLinkDestroy') + except: + pass + + global __nvJitLinkAddData + try: + __nvJitLinkAddData = win32api.GetProcAddress(handle, 'nvJitLinkAddData') + except: + pass + + global __nvJitLinkAddFile + try: + __nvJitLinkAddFile = win32api.GetProcAddress(handle, 'nvJitLinkAddFile') + except: + pass + + global __nvJitLinkComplete + try: + __nvJitLinkComplete = win32api.GetProcAddress(handle, 'nvJitLinkComplete') + except: + pass + + global __nvJitLinkGetLinkedCubinSize + try: + __nvJitLinkGetLinkedCubinSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubinSize') + except: + pass + + global __nvJitLinkGetLinkedCubin + try: + __nvJitLinkGetLinkedCubin = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedCubin') + except: + pass + + global __nvJitLinkGetLinkedPtxSize + try: + __nvJitLinkGetLinkedPtxSize = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtxSize') + except: + pass + + global __nvJitLinkGetLinkedPtx + try: + __nvJitLinkGetLinkedPtx = win32api.GetProcAddress(handle, 'nvJitLinkGetLinkedPtx') + except: + pass + + global __nvJitLinkGetErrorLogSize + try: + __nvJitLinkGetErrorLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLogSize') + except: + pass + + global __nvJitLinkGetErrorLog + try: + __nvJitLinkGetErrorLog = win32api.GetProcAddress(handle, 'nvJitLinkGetErrorLog') + except: + pass + + global __nvJitLinkGetInfoLogSize + try: + __nvJitLinkGetInfoLogSize = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLogSize') + except: + pass + + global __nvJitLinkGetInfoLog + try: + __nvJitLinkGetInfoLog = win32api.GetProcAddress(handle, 'nvJitLinkGetInfoLog') + except: + pass + + global __nvJitLinkVersion + try: + __nvJitLinkVersion = win32api.GetProcAddress(handle, 'nvJitLinkVersion') + except: + pass + + __py_nvjitlink_init = True + return 0 + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_nvjitlink() + cdef dict data = {} + + global __nvJitLinkCreate + data["__nvJitLinkCreate"] = __nvJitLinkCreate + + global __nvJitLinkDestroy + data["__nvJitLinkDestroy"] = __nvJitLinkDestroy + + global __nvJitLinkAddData + data["__nvJitLinkAddData"] = __nvJitLinkAddData + + global __nvJitLinkAddFile + data["__nvJitLinkAddFile"] = __nvJitLinkAddFile + + global __nvJitLinkComplete + data["__nvJitLinkComplete"] = __nvJitLinkComplete + + global __nvJitLinkGetLinkedCubinSize + data["__nvJitLinkGetLinkedCubinSize"] = __nvJitLinkGetLinkedCubinSize + + global __nvJitLinkGetLinkedCubin + data["__nvJitLinkGetLinkedCubin"] = __nvJitLinkGetLinkedCubin + + global __nvJitLinkGetLinkedPtxSize + data["__nvJitLinkGetLinkedPtxSize"] = __nvJitLinkGetLinkedPtxSize + + global __nvJitLinkGetLinkedPtx + data["__nvJitLinkGetLinkedPtx"] = __nvJitLinkGetLinkedPtx + + global __nvJitLinkGetErrorLogSize + data["__nvJitLinkGetErrorLogSize"] = __nvJitLinkGetErrorLogSize + + global __nvJitLinkGetErrorLog + data["__nvJitLinkGetErrorLog"] = __nvJitLinkGetErrorLog + + global __nvJitLinkGetInfoLogSize + data["__nvJitLinkGetInfoLogSize"] = __nvJitLinkGetInfoLogSize + + global __nvJitLinkGetInfoLog + data["__nvJitLinkGetInfoLog"] = __nvJitLinkGetInfoLog + + global __nvJitLinkVersion + data["__nvJitLinkVersion"] = __nvJitLinkVersion + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult _nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: + global __nvJitLinkCreate + _check_or_init_nvjitlink() + if __nvJitLinkCreate == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkCreate is not found") + return (__nvJitLinkCreate)( + handle, numOptions, options) + + +cdef nvJitLinkResult _nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: + global __nvJitLinkDestroy + _check_or_init_nvjitlink() + if __nvJitLinkDestroy == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkDestroy is not found") + return (__nvJitLinkDestroy)( + handle) + + +cdef nvJitLinkResult _nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: + global __nvJitLinkAddData + _check_or_init_nvjitlink() + if __nvJitLinkAddData == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddData is not found") + return (__nvJitLinkAddData)( + handle, inputType, data, size, name) + + +cdef nvJitLinkResult _nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: + global __nvJitLinkAddFile + _check_or_init_nvjitlink() + if __nvJitLinkAddFile == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkAddFile is not found") + return (__nvJitLinkAddFile)( + handle, inputType, fileName) + + +cdef nvJitLinkResult _nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: + global __nvJitLinkComplete + _check_or_init_nvjitlink() + if __nvJitLinkComplete == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkComplete is not found") + return (__nvJitLinkComplete)( + handle) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedCubinSize + _check_or_init_nvjitlink() + if __nvJitLinkGetLinkedCubinSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubinSize is not found") + return (__nvJitLinkGetLinkedCubinSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: + global __nvJitLinkGetLinkedCubin + _check_or_init_nvjitlink() + if __nvJitLinkGetLinkedCubin == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedCubin is not found") + return (__nvJitLinkGetLinkedCubin)( + handle, cubin) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetLinkedPtxSize + _check_or_init_nvjitlink() + if __nvJitLinkGetLinkedPtxSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtxSize is not found") + return (__nvJitLinkGetLinkedPtxSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: + global __nvJitLinkGetLinkedPtx + _check_or_init_nvjitlink() + if __nvJitLinkGetLinkedPtx == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetLinkedPtx is not found") + return (__nvJitLinkGetLinkedPtx)( + handle, ptx) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetErrorLogSize + _check_or_init_nvjitlink() + if __nvJitLinkGetErrorLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLogSize is not found") + return (__nvJitLinkGetErrorLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetErrorLog + _check_or_init_nvjitlink() + if __nvJitLinkGetErrorLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetErrorLog is not found") + return (__nvJitLinkGetErrorLog)( + handle, log) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + global __nvJitLinkGetInfoLogSize + _check_or_init_nvjitlink() + if __nvJitLinkGetInfoLogSize == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLogSize is not found") + return (__nvJitLinkGetInfoLogSize)( + handle, size) + + +cdef nvJitLinkResult _nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: + global __nvJitLinkGetInfoLog + _check_or_init_nvjitlink() + if __nvJitLinkGetInfoLog == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkGetInfoLog is not found") + return (__nvJitLinkGetInfoLog)( + handle, log) + + +cdef nvJitLinkResult _nvJitLinkVersion(unsigned int* major, unsigned int* minor) except* nogil: + global __nvJitLinkVersion + _check_or_init_nvjitlink() + if __nvJitLinkVersion == NULL: + with gil: + raise FunctionNotFoundError("function nvJitLinkVersion is not found") + return (__nvJitLinkVersion)( + major, minor) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pxd b/cuda_bindings/cuda/bindings/_internal/utils.pxd new file mode 100644 index 000000000..d629179dc --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/utils.pxd @@ -0,0 +1,167 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +from libc.stdint cimport int32_t, int64_t, intptr_t +from libcpp.vector cimport vector +from libcpp cimport bool as cppbool +from libcpp cimport nullptr_t, nullptr +from libcpp.memory cimport unique_ptr + + +cdef extern from * nogil: + """ + template + class nullable_unique_ptr { + public: + nullable_unique_ptr() noexcept = default; + + nullable_unique_ptr(std::nullptr_t) noexcept = delete; + + explicit nullable_unique_ptr(T* data, bool own_data): + own_data_(own_data) + { + if (own_data) + manager_.reset(data); + else + raw_data_ = data; + } + + nullable_unique_ptr(const nullable_unique_ptr&) = delete; + + nullable_unique_ptr& operator=(const nullable_unique_ptr&) = delete; + + nullable_unique_ptr(nullable_unique_ptr&& other) noexcept + { + own_data_ = other.own_data_; + other.own_data_ = false; // ownership is transferred + if (own_data_) + { + manager_ = std::move(other.manager_); + raw_data_ = nullptr; // just in case + } + else + { + manager_.reset(nullptr); // just in case + raw_data_ = other.raw_data_; + } + } + + nullable_unique_ptr& operator=(nullable_unique_ptr&& other) noexcept + { + own_data_ = other.own_data_; + other.own_data_ = false; // ownership is transferred + if (own_data_) + { + manager_ = std::move(other.manager_); + raw_data_ = nullptr; // just in case + } + else + { + manager_.reset(nullptr); // just in case + raw_data_ = other.raw_data_; + } + return *this; + } + + ~nullable_unique_ptr() = default; + + void reset(T* data, bool own_data) + { + own_data_ = own_data; + if (own_data_) + { + manager_.reset(data); + raw_data_ = nullptr; + } + else + { + manager_.reset(nullptr); + raw_data_ = data; + } + } + + void swap(nullable_unique_ptr& other) noexcept + { + std::swap(manager_, other.manager_); + std::swap(raw_data_, other.raw_data_); + std::swap(own_data_, other.own_data_); + } + + /* + * Get the pointer to the underlying object (this is different from data()!). + */ + T* get() const noexcept + { + if (own_data_) + return manager_.get(); + else + return raw_data_; + } + + /* + * Get the pointer to the underlying buffer (this is different from get()!). + */ + void* data() noexcept + { + if (own_data_) + return manager_.get()->data(); + else + return raw_data_; + } + + T& operator*() + { + if (own_data_) + return *manager_; + else + return *raw_data_; + } + + private: + std::unique_ptr manager_{}; + T* raw_data_{nullptr}; + bool own_data_{false}; + }; + """ + # xref: cython/Cython/Includes/libcpp/memory.pxd + cdef cppclass nullable_unique_ptr[T]: + nullable_unique_ptr() + nullable_unique_ptr(T*, cppbool) + nullable_unique_ptr(nullable_unique_ptr[T]&) + + # Modifiers + void reset(T*, cppbool) + void swap(nullable_unique_ptr&) + + # Observers + T* get() + T& operator*() + void* data() + + +ctypedef fused ResT: + int + int32_t + int64_t + char + + +ctypedef fused PtrT: + void + + +cdef cppclass nested_resource[T]: + nullable_unique_ptr[ vector[intptr_t] ] ptrs + nullable_unique_ptr[ vector[vector[T]] ] nested_resource_ptr + + +# accepts the output pointer as input to use the return value for exception propagation +cdef int get_resource_ptr(nullable_unique_ptr[vector[ResT]] &in_out_ptr, object obj, ResT* __unused) except 1 +cdef int get_resource_ptrs(nullable_unique_ptr[ vector[PtrT*] ] &in_out_ptr, object obj, PtrT* __unused) except 1 +cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, ResT* __unused) except 1 + +cdef bint is_nested_sequence(data) +cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=*) except* + +cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver) diff --git a/cuda_bindings/cuda/bindings/_internal/utils.pyx b/cuda_bindings/cuda/bindings/_internal/utils.pyx new file mode 100644 index 000000000..55945ec96 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/utils.pyx @@ -0,0 +1,136 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +cimport cpython +from libc.stdint cimport intptr_t +from libcpp.utility cimport move +from cython.operator cimport dereference as deref + + +cdef bint is_nested_sequence(data): + if not cpython.PySequence_Check(data): + return False + else: + for i in data: + if not cpython.PySequence_Check(i): + return False + else: + return True + + +cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=True) except*: + """The caller must ensure ``buf`` is alive when the returned pointer is in use.""" + cdef void* bufPtr + cdef int flags = cpython.PyBUF_ANY_CONTIGUOUS + if not readonly: + flags |= cpython.PyBUF_WRITABLE + cdef int status = -1 + cdef cpython.Py_buffer view + + if isinstance(buf, int): + bufPtr = buf + else: # try buffer protocol + try: + status = cpython.PyObject_GetBuffer(buf, &view, flags) + # when the caller does not provide a size, it is set to -1 at generate-time by cybind + if size != -1: + assert view.len == size + assert view.ndim == 1 + except Exception as e: + adj = "writable " if not readonly else "" + raise ValueError( + "buf must be either a Python int representing the pointer " + f"address to a valid buffer, or a 1D contiguous {adj}" + "buffer, of size bytes") from e + else: + bufPtr = view.buf + finally: + if status == 0: + cpython.PyBuffer_Release(&view) + + return bufPtr + + +# Cython can't infer the ResT overload when it is wrapped in nullable_unique_ptr, +# so we need a dummy (__unused) input argument to help it +cdef int get_resource_ptr(nullable_unique_ptr[vector[ResT]] &in_out_ptr, object obj, ResT* __unused) except 1: + if cpython.PySequence_Check(obj): + vec = new vector[ResT](len(obj)) + # set the ownership immediately to avoid leaking the `vec` memory in + # case of exception in the following loop + in_out_ptr.reset(vec, True) + for i in range(len(obj)): + deref(vec)[i] = obj[i] + else: + in_out_ptr.reset(obj, False) + return 0 + + +cdef int get_resource_ptrs(nullable_unique_ptr[ vector[PtrT*] ] &in_out_ptr, object obj, PtrT* __unused) except 1: + if cpython.PySequence_Check(obj): + vec = new vector[PtrT*](len(obj)) + # set the ownership immediately to avoid leaking the `vec` memory in + # case of exception in the following loop + in_out_ptr.reset(vec, True) + for i in range(len(obj)): + deref(vec)[i] = (obj[i]) + else: + in_out_ptr.reset(obj, False) + return 0 + + +cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj, ResT* __unused) except 1: + cdef nullable_unique_ptr[ vector[intptr_t] ] nested_ptr + cdef nullable_unique_ptr[ vector[vector[ResT]] ] nested_res_ptr + cdef vector[intptr_t]* nested_vec = NULL + cdef vector[vector[ResT]]* nested_res_vec = NULL + cdef size_t i = 0, length = 0 + cdef intptr_t addr + + if is_nested_sequence(obj): + length = len(obj) + nested_res_vec = new vector[vector[ResT]](length) + nested_vec = new vector[intptr_t](length) + # set the ownership immediately to avoid leaking memory in case of + # exception in the following loop + nested_res_ptr.reset(nested_res_vec, True) + nested_ptr.reset(nested_vec, True) + for i, obj_i in enumerate(obj): + if ResT is char: + obj_i_bytes = ((obj_i)).encode() + str_len = (len(obj_i_bytes)) + 1 # including null termination + deref(nested_res_vec)[i].resize(str_len) + obj_i_ptr = (obj_i_bytes) + # cast to size_t explicitly to work around a potentially Cython bug + deref(nested_res_vec)[i].assign(obj_i_ptr, obj_i_ptr + str_len) + else: + deref(nested_res_vec)[i] = obj_i + deref(nested_vec)[i] = (deref(nested_res_vec)[i].data()) + elif cpython.PySequence_Check(obj): + length = len(obj) + nested_vec = new vector[intptr_t](length) + nested_ptr.reset(nested_vec, True) + for i, addr in enumerate(obj): + deref(nested_vec)[i] = addr + nested_res_ptr.reset(NULL, False) + else: + # obj is an int (ResT**) + nested_res_ptr.reset(NULL, False) + nested_ptr.reset(obj, False) + + in_out_ptr.ptrs = move(nested_ptr) + in_out_ptr.nested_resource_ptr = move(nested_res_ptr) + return 0 + + +class FunctionNotFoundError(RuntimeError): pass + +class NotSupportedError(RuntimeError): pass + + +cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver): + if 12000 <= driver_ver < 13000: + return ('12', '') + else: + raise NotSupportedError('only CUDA 12 driver is supported') diff --git a/cuda_bindings/cuda/bindings/cynvjitlink.pxd b/cuda_bindings/cuda/bindings/cynvjitlink.pxd new file mode 100644 index 000000000..3c22d939e --- /dev/null +++ b/cuda_bindings/cuda/bindings/cynvjitlink.pxd @@ -0,0 +1,60 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. + +from libc.stdint cimport intptr_t, uint32_t + + +############################################################################### +# Types (structs, enums, ...) +############################################################################### + +# enums +ctypedef enum nvJitLinkResult "nvJitLinkResult": + NVJITLINK_SUCCESS "NVJITLINK_SUCCESS" = 0 + NVJITLINK_ERROR_UNRECOGNIZED_OPTION "NVJITLINK_ERROR_UNRECOGNIZED_OPTION" + NVJITLINK_ERROR_MISSING_ARCH "NVJITLINK_ERROR_MISSING_ARCH" + NVJITLINK_ERROR_INVALID_INPUT "NVJITLINK_ERROR_INVALID_INPUT" + NVJITLINK_ERROR_PTX_COMPILE "NVJITLINK_ERROR_PTX_COMPILE" + NVJITLINK_ERROR_NVVM_COMPILE "NVJITLINK_ERROR_NVVM_COMPILE" + NVJITLINK_ERROR_INTERNAL "NVJITLINK_ERROR_INTERNAL" + NVJITLINK_ERROR_THREADPOOL "NVJITLINK_ERROR_THREADPOOL" + NVJITLINK_ERROR_UNRECOGNIZED_INPUT "NVJITLINK_ERROR_UNRECOGNIZED_INPUT" + NVJITLINK_ERROR_FINALIZE "NVJITLINK_ERROR_FINALIZE" + +ctypedef enum nvJitLinkInputType "nvJitLinkInputType": + NVJITLINK_INPUT_NONE "NVJITLINK_INPUT_NONE" = 0 + NVJITLINK_INPUT_CUBIN "NVJITLINK_INPUT_CUBIN" = 1 + NVJITLINK_INPUT_PTX "NVJITLINK_INPUT_PTX" + NVJITLINK_INPUT_LTOIR "NVJITLINK_INPUT_LTOIR" + NVJITLINK_INPUT_FATBIN "NVJITLINK_INPUT_FATBIN" + NVJITLINK_INPUT_OBJECT "NVJITLINK_INPUT_OBJECT" + NVJITLINK_INPUT_LIBRARY "NVJITLINK_INPUT_LIBRARY" + NVJITLINK_INPUT_INDEX "NVJITLINK_INPUT_INDEX" + NVJITLINK_INPUT_ANY "NVJITLINK_INPUT_ANY" = 10 + + +# types +ctypedef void* nvJitLinkHandle 'nvJitLinkHandle' + + +############################################################################### +# Functions +############################################################################### + +cdef nvJitLinkResult nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil +cdef nvJitLinkResult nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil +cdef nvJitLinkResult nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil +cdef nvJitLinkResult nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil +cdef nvJitLinkResult nvJitLinkComplete(nvJitLinkHandle handle) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil +cdef nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil +cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil +cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil +cdef nvJitLinkResult nvJitLinkVersion(unsigned int* major, unsigned int* minor) except* nogil diff --git a/cuda_bindings/cuda/bindings/cynvjitlink.pyx b/cuda_bindings/cuda/bindings/cynvjitlink.pyx new file mode 100644 index 000000000..2a8695434 --- /dev/null +++ b/cuda_bindings/cuda/bindings/cynvjitlink.pyx @@ -0,0 +1,67 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. + +from ._internal cimport nvjitlink as _nvjitlink + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef nvJitLinkResult nvJitLinkCreate(nvJitLinkHandle* handle, uint32_t numOptions, const char** options) except* nogil: + return _nvjitlink._nvJitLinkCreate(handle, numOptions, options) + + +cdef nvJitLinkResult nvJitLinkDestroy(nvJitLinkHandle* handle) except* nogil: + return _nvjitlink._nvJitLinkDestroy(handle) + + +cdef nvJitLinkResult nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void* data, size_t size, const char* name) except* nogil: + return _nvjitlink._nvJitLinkAddData(handle, inputType, data, size, name) + + +cdef nvJitLinkResult nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char* fileName) except* nogil: + return _nvjitlink._nvJitLinkAddFile(handle, inputType, fileName) + + +cdef nvJitLinkResult nvJitLinkComplete(nvJitLinkHandle handle) except* nogil: + return _nvjitlink._nvJitLinkComplete(handle) + + +cdef nvJitLinkResult nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvjitlink._nvJitLinkGetLinkedCubinSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void* cubin) except* nogil: + return _nvjitlink._nvJitLinkGetLinkedCubin(handle, cubin) + + +cdef nvJitLinkResult nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvjitlink._nvJitLinkGetLinkedPtxSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char* ptx) except* nogil: + return _nvjitlink._nvJitLinkGetLinkedPtx(handle, ptx) + + +cdef nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvjitlink._nvJitLinkGetErrorLogSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char* log) except* nogil: + return _nvjitlink._nvJitLinkGetErrorLog(handle, log) + + +cdef nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t* size) except* nogil: + return _nvjitlink._nvJitLinkGetInfoLogSize(handle, size) + + +cdef nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char* log) except* nogil: + return _nvjitlink._nvJitLinkGetInfoLog(handle, log) + + +cdef nvJitLinkResult nvJitLinkVersion(unsigned int* major, unsigned int* minor) except* nogil: + return _nvjitlink._nvJitLinkVersion(major, minor) diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pxd b/cuda_bindings/cuda/bindings/nvjitlink.pxd new file mode 100644 index 000000000..4f701ed4d --- /dev/null +++ b/cuda_bindings/cuda/bindings/nvjitlink.pxd @@ -0,0 +1,43 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. + +from libc.stdint cimport intptr_t, uint32_t + +from .cynvjitlink cimport * + + +############################################################################### +# Types +############################################################################### + +ctypedef nvJitLinkHandle Handle + + +############################################################################### +# Enum +############################################################################### + +ctypedef nvJitLinkResult _Result +ctypedef nvJitLinkInputType _InputType + + +############################################################################### +# Functions +############################################################################### + +cpdef intptr_t create(uint32_t num_options, options) except -1 +cpdef add_data(intptr_t handle, int input_type, data, size_t size, name) +cpdef add_file(intptr_t handle, int input_type, file_name) +cpdef complete(intptr_t handle) +cpdef size_t get_linked_cubin_size(intptr_t handle) except? 0 +cpdef get_linked_cubin(intptr_t handle, cubin) +cpdef size_t get_linked_ptx_size(intptr_t handle) except? 0 +cpdef get_linked_ptx(intptr_t handle, ptx) +cpdef size_t get_error_log_size(intptr_t handle) except? 0 +cpdef get_error_log(intptr_t handle, log) +cpdef size_t get_info_log_size(intptr_t handle) except? 0 +cpdef get_info_log(intptr_t handle, log) +cpdef tuple version() diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pyx b/cuda_bindings/cuda/bindings/nvjitlink.pyx new file mode 100644 index 000000000..01a12528e --- /dev/null +++ b/cuda_bindings/cuda/bindings/nvjitlink.pyx @@ -0,0 +1,317 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# +# This code was automatically generated across versions from 12.0.1 to 12.6.2. Do not modify it directly. + +cimport cython # NOQA + +from ._internal.utils cimport (get_resource_ptr, get_nested_resource_ptr, nested_resource, nullable_unique_ptr, + get_buffer_pointer, get_resource_ptrs) + +from enum import IntEnum as _IntEnum +from libcpp.vector cimport vector + + +############################################################################### +# Enum +############################################################################### + +class Result(_IntEnum): + """See `nvJitLinkResult`.""" + SUCCESS = NVJITLINK_SUCCESS + ERROR_UNRECOGNIZED_OPTION = NVJITLINK_ERROR_UNRECOGNIZED_OPTION + ERROR_MISSING_ARCH = NVJITLINK_ERROR_MISSING_ARCH + ERROR_INVALID_INPUT = NVJITLINK_ERROR_INVALID_INPUT + ERROR_PTX_COMPILE = NVJITLINK_ERROR_PTX_COMPILE + ERROR_NVVM_COMPILE = NVJITLINK_ERROR_NVVM_COMPILE + ERROR_INTERNAL = NVJITLINK_ERROR_INTERNAL + ERROR_THREADPOOL = NVJITLINK_ERROR_THREADPOOL + ERROR_UNRECOGNIZED_INPUT = NVJITLINK_ERROR_UNRECOGNIZED_INPUT + ERROR_FINALIZE = NVJITLINK_ERROR_FINALIZE + +class InputType(_IntEnum): + """See `nvJitLinkInputType`.""" + NONE = NVJITLINK_INPUT_NONE + CUBIN = NVJITLINK_INPUT_CUBIN + PTX = NVJITLINK_INPUT_PTX + LTOIR = NVJITLINK_INPUT_LTOIR + FATBIN = NVJITLINK_INPUT_FATBIN + OBJECT = NVJITLINK_INPUT_OBJECT + LIBRARY = NVJITLINK_INPUT_LIBRARY + INDEX = NVJITLINK_INPUT_INDEX + ANY = NVJITLINK_INPUT_ANY + + +############################################################################### +# Error handling +############################################################################### + +class nvJitLinkError(Exception): + + def __init__(self, status): + self.status = status + s = Result(status) + cdef str err = f"{s.name} ({s.value})" + super(nvJitLinkError, self).__init__(err) + + def __reduce__(self): + return (type(self), (self.status,)) + + +@cython.profile(False) +cdef int check_status(int status) except 1 nogil: + if status != 0: + with gil: + raise nvJitLinkError(status) + return status + + +############################################################################### +# Wrapper functions +############################################################################### + +cpdef destroy(intptr_t handle): + """nvJitLinkDestroy frees the memory associated with the given handle. + + Args: + handle (intptr_t): nvJitLink handle. + + .. seealso:: `nvJitLinkDestroy` + """ + cdef Handle h = handle + with nogil: + status = nvJitLinkDestroy(&h) + check_status(status) + + +cpdef intptr_t create(uint32_t num_options, options) except -1: + """nvJitLinkCreate creates an instance of nvJitLinkHandle with the given input options, and sets the output parameter ``handle``. + + Args: + num_options (uint32_t): Number of options passed. + options (object): Array of size ``num_options`` of option strings. It can be: + + - an :class:`int` as the pointer address to the nested sequence, or + - a Python sequence of :class:`int`\s, each of which is a pointer address + to a valid sequence of 'char', or + - a nested Python sequence of ``str``. + + + Returns: + intptr_t: Address of nvJitLink handle. + + .. seealso:: `nvJitLinkCreate` + """ + cdef nested_resource[ char ] _options_ + get_nested_resource_ptr[char](_options_, options, NULL) + cdef Handle handle + with nogil: + status = nvJitLinkCreate(&handle, num_options, (_options_.ptrs.data())) + check_status(status) + return handle + + +cpdef add_data(intptr_t handle, int input_type, data, size_t size, name): + """nvJitLinkAddData adds data image to the link. + + Args: + handle (intptr_t): nvJitLink handle. + input_type (InputType): kind of input. + data (bytes): pointer to data image in memory. + size (size_t): size of the data. + name (str): name of input object. + + .. seealso:: `nvJitLinkAddData` + """ + cdef void* _data_ = get_buffer_pointer(data, size, readonly=True) + if not isinstance(name, str): + raise TypeError("name must be a Python str") + cdef bytes _temp_name_ = (name).encode() + cdef char* _name_ = _temp_name_ + with nogil: + status = nvJitLinkAddData(handle, <_InputType>input_type, _data_, size, _name_) + check_status(status) + + +cpdef add_file(intptr_t handle, int input_type, file_name): + """nvJitLinkAddFile reads data from file and links it in. + + Args: + handle (intptr_t): nvJitLink handle. + input_type (InputType): kind of input. + file_name (str): name of file. + + .. seealso:: `nvJitLinkAddFile` + """ + if not isinstance(file_name, str): + raise TypeError("file_name must be a Python str") + cdef bytes _temp_file_name_ = (file_name).encode() + cdef char* _file_name_ = _temp_file_name_ + with nogil: + status = nvJitLinkAddFile(handle, <_InputType>input_type, _file_name_) + check_status(status) + + +cpdef complete(intptr_t handle): + """nvJitLinkComplete does the actual link. + + Args: + handle (intptr_t): nvJitLink handle. + + .. seealso:: `nvJitLinkComplete` + """ + with nogil: + status = nvJitLinkComplete(handle) + check_status(status) + + +cpdef size_t get_linked_cubin_size(intptr_t handle) except? 0: + """nvJitLinkGetLinkedCubinSize gets the size of the linked cubin. + + Args: + handle (intptr_t): nvJitLink handle. + + Returns: + size_t: Size of the linked cubin. + + .. seealso:: `nvJitLinkGetLinkedCubinSize` + """ + cdef size_t size + with nogil: + status = nvJitLinkGetLinkedCubinSize(handle, &size) + check_status(status) + return size + + +cpdef get_linked_cubin(intptr_t handle, cubin): + """nvJitLinkGetLinkedCubin gets the linked cubin. + + Args: + handle (intptr_t): nvJitLink handle. + cubin (bytes): The linked cubin. + + .. seealso:: `nvJitLinkGetLinkedCubin` + """ + cdef void* _cubin_ = get_buffer_pointer(cubin, -1, readonly=False) + with nogil: + status = nvJitLinkGetLinkedCubin(handle, _cubin_) + check_status(status) + + +cpdef size_t get_linked_ptx_size(intptr_t handle) except? 0: + """nvJitLinkGetLinkedPtxSize gets the size of the linked ptx. + + Args: + handle (intptr_t): nvJitLink handle. + + Returns: + size_t: Size of the linked PTX. + + .. seealso:: `nvJitLinkGetLinkedPtxSize` + """ + cdef size_t size + with nogil: + status = nvJitLinkGetLinkedPtxSize(handle, &size) + check_status(status) + return size + + +cpdef get_linked_ptx(intptr_t handle, ptx): + """nvJitLinkGetLinkedPtx gets the linked ptx. + + Args: + handle (intptr_t): nvJitLink handle. + ptx (bytes): The linked PTX. + + .. seealso:: `nvJitLinkGetLinkedPtx` + """ + cdef void* _ptx_ = get_buffer_pointer(ptx, -1, readonly=False) + with nogil: + status = nvJitLinkGetLinkedPtx(handle, _ptx_) + check_status(status) + + +cpdef size_t get_error_log_size(intptr_t handle) except? 0: + """nvJitLinkGetErrorLogSize gets the size of the error log. + + Args: + handle (intptr_t): nvJitLink handle. + + Returns: + size_t: Size of the error log. + + .. seealso:: `nvJitLinkGetErrorLogSize` + """ + cdef size_t size + with nogil: + status = nvJitLinkGetErrorLogSize(handle, &size) + check_status(status) + return size + + +cpdef get_error_log(intptr_t handle, log): + """nvJitLinkGetErrorLog puts any error messages in the log. + + Args: + handle (intptr_t): nvJitLink handle. + log (bytes): The error log. + + .. seealso:: `nvJitLinkGetErrorLog` + """ + cdef void* _log_ = get_buffer_pointer(log, -1, readonly=False) + with nogil: + status = nvJitLinkGetErrorLog(handle, _log_) + check_status(status) + + +cpdef size_t get_info_log_size(intptr_t handle) except? 0: + """nvJitLinkGetInfoLogSize gets the size of the info log. + + Args: + handle (intptr_t): nvJitLink handle. + + Returns: + size_t: Size of the info log. + + .. seealso:: `nvJitLinkGetInfoLogSize` + """ + cdef size_t size + with nogil: + status = nvJitLinkGetInfoLogSize(handle, &size) + check_status(status) + return size + + +cpdef get_info_log(intptr_t handle, log): + """nvJitLinkGetInfoLog puts any info messages in the log. + + Args: + handle (intptr_t): nvJitLink handle. + log (bytes): The info log. + + .. seealso:: `nvJitLinkGetInfoLog` + """ + cdef void* _log_ = get_buffer_pointer(log, -1, readonly=False) + with nogil: + status = nvJitLinkGetInfoLog(handle, _log_) + check_status(status) + + +cpdef tuple version(): + """nvJitLinkVersion returns the current version of nvJitLink. + + Returns: + A 2-tuple containing: + + - unsigned int: The major version. + - unsigned int: The minor version. + + .. seealso:: `nvJitLinkVersion` + """ + cdef unsigned int major + cdef unsigned int minor + with nogil: + status = nvJitLinkVersion(&major, &minor) + check_status(status) + return (major, minor) diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py index fb9d7b953..ca1f82648 100644 --- a/cuda_bindings/setup.py +++ b/cuda_bindings/setup.py @@ -11,6 +11,7 @@ import platform import sys import sysconfig +import atexit from Cython import Tempita from Cython.Build import cythonize @@ -19,6 +20,8 @@ from setuptools.extension import Extension from setuptools.command.build_ext import build_ext import versioneer +import tempfile +import shutil # ---------------------------------------------------------------------- @@ -147,7 +150,9 @@ def generate_output(infile, local): os.path.join('cuda', 'bindings'), os.path.join('cuda', 'bindings', '_bindings'), os.path.join('cuda', 'bindings', '_lib'), - os.path.join('cuda', 'bindings', '_lib', 'cyruntime')] + os.path.join('cuda', 'bindings', '_lib', 'cyruntime'), + os.path.join('cuda', 'bindings', '_internal'), + ] input_files = [] for path in path_list: input_files += fetch_input_files(path) @@ -182,6 +187,7 @@ def generate_output(infile, local): # For Setup extensions = [] +new_extensions = [] cmdclass = {} # ---------------------------------------------------------------------- @@ -208,6 +214,41 @@ def prep_extensions(sources): return exts +# new path for the bindings from cybind +def rename_architecture_specific_files(): + architechture_specific_files_dir = 'cuda/bindings/_internal/' + if sys.platform == 'linux': + src_files = glob.glob(os.path.join(path, '*_linux.pyx')) + elif sys.platform == 'win32': + src_files = glob.glob(os.path.join(path, '*_windows.pyx')) + else: + raise RuntimeError(f'platform is unrecognized: {sys.platform}') + dst_files = [] + for src in src_files: + # Set up a temporary file; it must be under the cache directory so + # that atomic moves within the same filesystem can be guaranteed + with tempfile.NamedTemporaryFile(delete=False, dir='.') as f: + shutil.copy2(src, f.name) + f_name = f.name + dst = src.replace('_linux', '').replace('_windows', '') + # atomic move with the destination guaranteed to be overwritten + os.replace(f_name, f"./{dst}") + dst_files.append(dst) + return dst_files + + +dst_files = rename_architecture_specific_files() + + +@atexit.register +def cleanup_dst_files(): + for dst in dst_files: + try: + os.remove(dst) + except FileNotFoundError: + pass + + def do_cythonize(extensions): return cythonize( extensions, @@ -230,6 +271,9 @@ def do_cythonize(extensions): ["cuda/*.pyx"], # tests ["tests/*.pyx"], + # interal files used by generated bindings + ['cuda/bindings/_internal/nvjitlink.pyx'], + ['cuda/bindings/_internal/utils.pyx'], ] for sources in sources_list: @@ -260,9 +304,9 @@ def finalize_options(self): setup( version=versioneer.get_version(), ext_modules=do_cythonize(extensions), - packages=find_packages(include=["cuda.cuda", "cuda.cuda.*", "cuda.cuda.bindings", "cuda.cuda.bindings._bindings", "cuda.cuda.bindings._lib", "cuda.cuda.bindings._lib.cyruntime", "tests"]), + packages=find_packages(include=["cuda.cuda", "cuda.cuda.*", "cuda.cuda.bindings", "cuda.cuda.bindings._bindings", "cuda.cuda.bindings._lib", "cuda.cuda.bindings._lib.cyruntime", "cuda.cuda.bindings._internal", "tests"]), package_data=dict.fromkeys( - find_packages(include=["cuda.cuda", "cuda.cuda.*", "cuda.cuda.bindings", "cuda.cuda.bindings._bindings", "cuda.cuda.bindings._lib", "cuda.cuda.bindings._lib.cyruntime", "tests"]), + find_packages(include=["cuda.cuda", "cuda.cuda.*", "cuda.cuda.bindings", "cuda.cuda.bindings._bindings", "cuda.cuda.bindings._lib", "cuda.cuda.bindings._lib.cyruntime", "cuda.cuda.bindings._internal", "tests"]), ["*.pxd", "*.pyx", "*.py", "*.h", "*.cpp"], ), cmdclass=cmdclass, diff --git a/cuda_bindings/tests/test_nvjitlink.py b/cuda_bindings/tests/test_nvjitlink.py new file mode 100644 index 000000000..182d2bc40 --- /dev/null +++ b/cuda_bindings/tests/test_nvjitlink.py @@ -0,0 +1,126 @@ +# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import pytest + +from cuda.bindings import nvjitlink + + +ptx_kernel = """ +.version 8.5 +.target sm_90 +.address_size 64 + +.visible .entry _Z6kernelPi( + .param .u64 _Z6kernelPi_param_0 +) +{ + .reg .pred %p<2>; + .reg .b32 %r<3>; + .reg .b64 %rd<3>; + + ld.param.u64 %rd1, [_Z6kernelPi_param_0]; + cvta.to.global.u64 %rd2, %rd1; + mov.u32 %r1, %tid.x; + st.global.u32 [%rd2+0], %r1; + ret; +} +""" + +minimal_ptx_kernel = """ +.version 8.5 +.target sm_90 +.address_size 64 + +.func _MinimalKernel() +{ + ret; +} +""" + +ptx_kernel_bytes = ptx_kernel.encode('utf-8') +minimal_ptx_kernel_bytes = minimal_ptx_kernel.encode('utf-8') + +def test_unrecognized_option_error(): + with pytest.raises(nvjitlink.nvJitLinkError, match="ERROR_UNRECOGNIZED_OPTION"): + nvjitlink.create(1, ["-fictitious_option"]) + + +def test_invalid_arch_error(): + with pytest.raises(nvjitlink.nvJitLinkError, match="ERROR_UNRECOGNIZED_OPTION"): + nvjitlink.create(1, ["-arch=sm_XX"]) + + +def test_create_and_destroy(): + handle = nvjitlink.create(1, ["-arch=sm_53"]) + assert handle != 0 + nvjitlink.destroy(handle) + + +def test_complete_empty(): + handle = nvjitlink.create(1, ["-arch=sm_90"]) + nvjitlink.complete(handle) + nvjitlink.destroy(handle) + + +def test_add_data(): + handle = nvjitlink.create(1, ["-arch=sm_90"]) + nvjitlink.add_data(handle, nvjitlink.InputType.ANY, ptx_kernel_bytes, len(ptx_kernel_bytes), "test_data") + nvjitlink.add_data(handle, nvjitlink.InputType.ANY, minimal_ptx_kernel_bytes, len(minimal_ptx_kernel_bytes), "minimal_test_data") + nvjitlink.complete(handle) + nvjitlink.destroy(handle) + + +def test_add_file(tmp_path): + handle = nvjitlink.create(1, ["-arch=sm_90"]) + file_path = tmp_path / "test_file.cubin" + file_path.write_bytes(ptx_kernel_bytes) + nvjitlink.add_file(handle, nvjitlink.InputType.ANY, str(file_path)) + nvjitlink.complete(handle) + nvjitlink.destroy(handle) + + +def test_get_error_log(): + handle = nvjitlink.create(1, ["-arch=sm_90"]) + nvjitlink.complete(handle) + log_size = nvjitlink.get_error_log_size(handle) + log = bytearray(log_size) + nvjitlink.get_error_log(handle, log) + assert len(log) == log_size + nvjitlink.destroy(handle) + + +def test_get_info_log(): + handle = nvjitlink.create(1, ["-arch=sm_90"]) + nvjitlink.add_data(handle, nvjitlink.InputType.ANY, ptx_kernel_bytes, len(ptx_kernel_bytes), "test_data") + nvjitlink.complete(handle) + log_size = nvjitlink.get_info_log_size(handle) + log = bytearray(log_size) + nvjitlink.get_info_log(handle, log) + assert len(log) == log_size + nvjitlink.destroy(handle) + + +def test_get_linked_cubin(): + handle = nvjitlink.create(1, ["-arch=sm_90"]) + nvjitlink.add_data(handle, nvjitlink.InputType.ANY, ptx_kernel_bytes, len(ptx_kernel_bytes), "test_data") + nvjitlink.complete(handle) + cubin_size = nvjitlink.get_linked_cubin_size(handle) + cubin = bytearray(cubin_size) + nvjitlink.get_linked_cubin(handle, cubin) + assert len(cubin) == cubin_size + nvjitlink.destroy(handle) + + +def test_get_linked_ptx(): + # TODO improve this test to call get_linked_ptx without this error + handle = nvjitlink.create(2, ["-arch=sm_90", "-lto"]) + with pytest.raises(nvjitlink.nvJitLinkError, match="ERROR_NVVM_COMPILE"): + nvjitlink.complete(handle) + + +def test_package_version(): + ver = nvjitlink.version() + assert len(ver) == 2 + assert ver >= (12, 0)