From faf7b3d1bb34854ad05e6da0a2f658276490b316 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 24 Apr 2024 17:49:40 +0200 Subject: [PATCH 1/3] Document the two HUGETLB options for buffer allocation --- Makefile.rule | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Makefile.rule b/Makefile.rule index bb239be2a4..d25299775d 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -219,6 +219,16 @@ NO_AFFINITY = 1 # to the user space. If bigphysarea is enabled, it will use it. # DEVICEDRIVER_ALLOCATION = 1 +# Use large page allocation (called hugepage support in Linux context) +# for the thread buffers (with access by shared memory operations) +# HUGETLB_ALLOCATION = 1 + +# Use large page allocation called hugepages in Linux) based on mmap accessing +# a memory-backed pseudofile (requires hugetlbfs to be mounted in the system, +# the example below has it mounted on /hugepages. OpenBLAS will create the backing +# file as gotoblas.processid in that path) +# HUGETLBFILE_ALLOCATION = /hugepages + # If you need to synchronize FP CSR between threads (for x86/x86_64 and aarch64 only). # CONSISTENT_FPCSR = 1 From 9c4e10fbd155daa0ee8206a8313436f14f7f7282 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 4 May 2024 14:48:02 +0200 Subject: [PATCH 2/3] sort hugetlb and shm alloc options --- Makefile.system | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/Makefile.system b/Makefile.system index 840389db73..67830f2a3e 100644 --- a/Makefile.system +++ b/Makefile.system @@ -355,7 +355,10 @@ OBJCONV = $(CROSS_SUFFIX)objconv # When fortran support was either not detected or actively deselected, only build BLAS. ifeq ($(NOFORTRAN), 1) C_LAPACK = 1 -override FEXTRALIB = +override FEXTRALIB = +ifeq ($(C_COMPILER), GCC) +CCOMMON_OPT += -Wno-error=incompatible-pointer-types +endif endif ifeq ($(C_COMPILER), GCC) @@ -959,18 +962,12 @@ endif ifeq ($(ARCH), loongarch64) LA64_ABI=$(shell $(CC) -mabi=lp64d -c $(TOPDIR)/cpuid_loongarch64.c -o /dev/null > /dev/null 2> /dev/null && echo lp64d) -LA64_ARCH=$(shell $(CC) -march=loongarch64 -c $(TOPDIR)/cpuid_loongarch64.c -o /dev/null > /dev/null 2> /dev/null && echo loongarch64) ifneq ($(LA64_ABI), lp64d) LA64_ABI=lp64 endif -ifneq ($(LA64_ARCH), loongarch64) -CCOMMON_OPT += -mabi=$(LA64_ABI) -FCOMMON_OPT += -mabi=$(LA64_ABI) -else CCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI) FCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI) endif -endif endif @@ -1589,13 +1586,23 @@ ifdef FUNCTION_PROFILE CCOMMON_OPT += -DFUNCTION_PROFILE endif +ifdef SHMEM_ALLOCATION +ifneq ($(SHMEM_ALLOCATION), 0) +CCOMMON_OPT += -DALLOC_SHM +endif +endif + ifdef HUGETLB_ALLOCATION +ifneq ($(HUGETLB_ALLOCATION), 0) CCOMMON_OPT += -DALLOC_HUGETLB endif +endif ifdef HUGETLBFILE_ALLOCATION +ifneq ($(HUGETLBFILE_ALLOCATION), 0) CCOMMON_OPT += -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=$(HUGETLBFILE_ALLOCATION) endif +endif ifdef STATIC_ALLOCATION CCOMMON_OPT += -DALLOC_STATIC From dc99b613805fd15f1e71836b0f3e37c17770d697 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 4 May 2024 14:49:00 +0200 Subject: [PATCH 3/3] sort unwanted interdependencies of alloc_shm and alloc_hugetlb --- driver/others/memory.c | 58 ++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index c55688aced..94b2f1a6ad 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -1165,11 +1165,10 @@ void *blas_memory_alloc(int procpos){ #ifdef ALLOC_DEVICEDRIVER alloc_devicedirver, #endif -/* Hugetlb implicitly assumes ALLOC_SHM */ -#ifdef ALLOC_SHM +#ifdef ALLOC_SHM && !defined(ALLOC_HUGETLB) alloc_shm, #endif -#if ((defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)) +#if ((defined ALLOC_HUGETLB) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)) alloc_hugetlb, #endif #ifdef ALLOC_MMAP @@ -1190,7 +1189,6 @@ void *blas_memory_alloc(int procpos){ struct alloc_t * alloc_info; struct alloc_t ** alloc_table; - #if defined(SMP) && !defined(USE_OPENMP) int mi; LOCK_COMMAND(&alloc_lock); @@ -1282,7 +1280,7 @@ UNLOCK_COMMAND(&alloc_lock); } #endif -#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) +#if (defined ALLOC_HUGETLB) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; #endif @@ -2494,7 +2492,7 @@ static void *alloc_devicedirver(void *address){ #endif -#ifdef ALLOC_SHM +#if defined(ALLOC_SHM) && !defined(ALLOC_HUGETLB) static void alloc_shm_free(struct release_t *release){ @@ -2506,7 +2504,9 @@ static void alloc_shm_free(struct release_t *release){ static void *alloc_shm(void *address){ void *map_address; int shmid; - +#ifdef DEBUG + fprintf(stderr,"alloc_shm got called\n"); +#endif shmid = shmget(IPC_PRIVATE, BUFFER_SIZE,IPC_CREAT | 0600); map_address = (void *)shmat(shmid, address, 0); @@ -2533,6 +2533,7 @@ static void *alloc_shm(void *address){ return map_address; } +#endif #if defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS @@ -2562,6 +2563,10 @@ static void *alloc_hugetlb(void *address){ void *map_address = (void *)-1; +#ifdef DEBUG +fprintf(stderr,"alloc_hugetlb got called\n"); +#endif + #if defined(OS_LINUX) || defined(OS_AIX) int shmid; @@ -2583,7 +2588,7 @@ static void *alloc_hugetlb(void *address){ if (map_address != (void *)-1){ shmctl(shmid, IPC_RMID, 0); - } + }else printf("alloc_hugetlb failed\n"); } #endif @@ -2645,7 +2650,6 @@ static void *alloc_hugetlb(void *address){ } #endif -#endif #ifdef ALLOC_HUGETLBFILE @@ -2762,11 +2766,10 @@ void *blas_memory_alloc(int procpos){ #ifdef ALLOC_DEVICEDRIVER alloc_devicedirver, #endif -/* Hugetlb implicitly assumes ALLOC_SHM */ -#ifdef ALLOC_SHM +#ifdef ALLOC_SHM && !defined(ALLOC_HUGETLB) alloc_shm, #endif -#if ((defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)) +#if ((defined ALLOC_HUGETLB) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)) alloc_hugetlb, #endif #ifdef ALLOC_MMAP @@ -2945,8 +2948,22 @@ void *blas_memory_alloc(int procpos){ } #endif -#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) +#if (defined ALLOC_HUGETLB) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; +#ifdef DEBUG + if (hugetlb_allocated) printf("allocating via shared memory with large page support (hugetlb)\n"); +#endif +#endif + +#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) +#ifdef DEBUG + printf("allocating via shared memory\n"); +#endif + if ((*func == alloc_shm) && (map_address == (void *)-1)) { +#ifndef OS_WINDOWS + fprintf(stderr, "OpenBLAS Warning ... shared memory allocation was failed.\n"); +#endif + } #endif func ++; @@ -3061,10 +3078,23 @@ void *blas_memory_alloc(int procpos){ } #endif -#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) +#if (defined ALLOC_HUGETLB) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) +#ifdef DEBUG + fprintf(stderr,"OpenBLAS: allocating via shared memory with large page support (hugetlb)\n"); +#endif if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1; #endif +#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS) +#ifdef DEBUG + fprintf(stderr,"allocating via shared memory\n"); +#endif + if ((*func == alloc_shm) && (map_address == (void *)-1)) { +#ifndef OS_WINDOWS + fprintf(stderr, "OpenBLAS Warning ... shared memory allocation was failed.\n"); +#endif + } +#endif func ++; }