From faf7b3d1bb34854ad05e6da0a2f658276490b316 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Wed, 24 Apr 2024 17:49:40 +0200
Subject: [PATCH 1/3] Document the two HUGETLB options for buffer allocation

---
 Makefile.rule | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Makefile.rule b/Makefile.rule
index bb239be2a4..d25299775d 100644
--- a/Makefile.rule
+++ b/Makefile.rule
@@ -219,6 +219,16 @@ NO_AFFINITY = 1
 # to the user space. If bigphysarea is enabled, it will use it.
 # DEVICEDRIVER_ALLOCATION = 1
 
+# Use large page allocation (called hugepage support in Linux context)
+# for the thread buffers (with access by shared memory operations)
+# HUGETLB_ALLOCATION = 1
+
+# Use large page allocation called hugepages in Linux) based on mmap accessing
+# a memory-backed pseudofile (requires hugetlbfs to be mounted in the system,
+# the example below has it mounted on /hugepages. OpenBLAS will create the backing
+# file as gotoblas.processid in that path)
+# HUGETLBFILE_ALLOCATION = /hugepages
+
 # If you need to synchronize FP CSR between threads (for x86/x86_64 and aarch64 only).
 # CONSISTENT_FPCSR = 1
 

From 9c4e10fbd155daa0ee8206a8313436f14f7f7282 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Sat, 4 May 2024 14:48:02 +0200
Subject: [PATCH 2/3] sort hugetlb and shm alloc options

---
 Makefile.system | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/Makefile.system b/Makefile.system
index 840389db73..67830f2a3e 100644
--- a/Makefile.system
+++ b/Makefile.system
@@ -355,7 +355,10 @@ OBJCONV = $(CROSS_SUFFIX)objconv
 # When fortran support was either not detected or actively deselected, only build BLAS.
 ifeq ($(NOFORTRAN), 1)
 C_LAPACK = 1
-override FEXTRALIB = 
+override FEXTRALIB =
+ifeq ($(C_COMPILER), GCC)
+CCOMMON_OPT += -Wno-error=incompatible-pointer-types
+endif
 endif
 
 ifeq ($(C_COMPILER), GCC)
@@ -959,18 +962,12 @@ endif
 
 ifeq ($(ARCH), loongarch64)
 LA64_ABI=$(shell $(CC) -mabi=lp64d -c $(TOPDIR)/cpuid_loongarch64.c -o /dev/null > /dev/null 2> /dev/null && echo lp64d)
-LA64_ARCH=$(shell $(CC) -march=loongarch64 -c $(TOPDIR)/cpuid_loongarch64.c -o /dev/null > /dev/null 2> /dev/null && echo loongarch64)
 ifneq ($(LA64_ABI), lp64d)
 LA64_ABI=lp64
 endif
-ifneq ($(LA64_ARCH), loongarch64)
-CCOMMON_OPT += -mabi=$(LA64_ABI)
-FCOMMON_OPT += -mabi=$(LA64_ABI)
-else
 CCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI)
 FCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI)
 endif
-endif
 
 endif
 
@@ -1589,13 +1586,23 @@ ifdef FUNCTION_PROFILE
 CCOMMON_OPT	+= -DFUNCTION_PROFILE
 endif
 
+ifdef SHMEM_ALLOCATION
+ifneq ($(SHMEM_ALLOCATION), 0)
+CCOMMON_OPT	+= -DALLOC_SHM
+endif
+endif
+
 ifdef HUGETLB_ALLOCATION
+ifneq ($(HUGETLB_ALLOCATION), 0)
 CCOMMON_OPT	+= -DALLOC_HUGETLB
 endif
+endif
 
 ifdef HUGETLBFILE_ALLOCATION
+ifneq ($(HUGETLBFILE_ALLOCATION), 0)
 CCOMMON_OPT	+= -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=$(HUGETLBFILE_ALLOCATION)
 endif
+endif
 
 ifdef STATIC_ALLOCATION
 CCOMMON_OPT	+= -DALLOC_STATIC

From dc99b613805fd15f1e71836b0f3e37c17770d697 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Sat, 4 May 2024 14:49:00 +0200
Subject: [PATCH 3/3] sort unwanted interdependencies of alloc_shm and
 alloc_hugetlb

---
 driver/others/memory.c | 58 ++++++++++++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 14 deletions(-)

diff --git a/driver/others/memory.c b/driver/others/memory.c
index c55688aced..94b2f1a6ad 100644
--- a/driver/others/memory.c
+++ b/driver/others/memory.c
@@ -1165,11 +1165,10 @@ void *blas_memory_alloc(int procpos){
 #ifdef ALLOC_DEVICEDRIVER
     alloc_devicedirver,
 #endif
-/* Hugetlb implicitly assumes ALLOC_SHM */
-#ifdef ALLOC_SHM
+#ifdef ALLOC_SHM && !defined(ALLOC_HUGETLB)
     alloc_shm,
 #endif
-#if ((defined ALLOC_SHM) && (defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS))
+#if ((defined ALLOC_HUGETLB) && (defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS))
     alloc_hugetlb,
 #endif
 #ifdef ALLOC_MMAP
@@ -1190,7 +1189,6 @@ void *blas_memory_alloc(int procpos){
   struct alloc_t * alloc_info;
   struct alloc_t ** alloc_table;
 
-
 #if defined(SMP) && !defined(USE_OPENMP)
 int mi;
 LOCK_COMMAND(&alloc_lock);
@@ -1282,7 +1280,7 @@ UNLOCK_COMMAND(&alloc_lock);
         }
 #endif
 
-#if (defined ALLOC_SHM) && (defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS)
+#if (defined ALLOC_HUGETLB) && (defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS)
         if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
 #endif
 
@@ -2494,7 +2492,7 @@ static void *alloc_devicedirver(void *address){
 
 #endif
 
-#ifdef ALLOC_SHM
+#if defined(ALLOC_SHM) && !defined(ALLOC_HUGETLB)
 
 static void alloc_shm_free(struct release_t *release){
 
@@ -2506,7 +2504,9 @@ static void alloc_shm_free(struct release_t *release){
 static void *alloc_shm(void *address){
   void *map_address;
   int shmid;
-
+#ifdef DEBUG
+ fprintf(stderr,"alloc_shm got called\n");
+#endif
   shmid = shmget(IPC_PRIVATE, BUFFER_SIZE,IPC_CREAT | 0600);
 
   map_address = (void *)shmat(shmid, address, 0);
@@ -2533,6 +2533,7 @@ static void *alloc_shm(void *address){
 
   return map_address;
 }
+#endif
 
 #if defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS
 
@@ -2562,6 +2563,10 @@ static void *alloc_hugetlb(void *address){
 
   void *map_address = (void *)-1;
 
+#ifdef DEBUG
+fprintf(stderr,"alloc_hugetlb got called\n");
+#endif
+
 #if defined(OS_LINUX) || defined(OS_AIX)
   int shmid;
 
@@ -2583,7 +2588,7 @@ static void *alloc_hugetlb(void *address){
 
     if (map_address != (void *)-1){
       shmctl(shmid, IPC_RMID, 0);
-    }
+    }else printf("alloc_hugetlb failed\n");
   }
 #endif
 
@@ -2645,7 +2650,6 @@ static void *alloc_hugetlb(void *address){
 }
 #endif
 
-#endif
 
 #ifdef  ALLOC_HUGETLBFILE
 
@@ -2762,11 +2766,10 @@ void *blas_memory_alloc(int procpos){
 #ifdef ALLOC_DEVICEDRIVER
     alloc_devicedirver,
 #endif
-/* Hugetlb implicitly assumes ALLOC_SHM */
-#ifdef ALLOC_SHM
+#ifdef ALLOC_SHM && !defined(ALLOC_HUGETLB)
     alloc_shm,
 #endif
-#if ((defined ALLOC_SHM) && (defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS))
+#if ((defined ALLOC_HUGETLB) && (defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS))
     alloc_hugetlb,
 #endif
 #ifdef ALLOC_MMAP
@@ -2945,8 +2948,22 @@ void *blas_memory_alloc(int procpos){
         }
 #endif
 
-#if (defined ALLOC_SHM) && (defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS)
+#if (defined ALLOC_HUGETLB) && (defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS)
         if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
+#ifdef DEBUG
+	if (hugetlb_allocated) printf("allocating via shared memory with large page support (hugetlb)\n");
+#endif
+#endif
+
+#if (defined ALLOC_SHM) && (defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS)
+#ifdef DEBUG
+	printf("allocating via shared memory\n");
+#endif
+        if ((*func == alloc_shm) && (map_address == (void *)-1)) {
+#ifndef OS_WINDOWS
+            fprintf(stderr, "OpenBLAS Warning ... shared memory allocation was failed.\n");
+#endif
+	}
 #endif
 
         func ++;
@@ -3061,10 +3078,23 @@ void *blas_memory_alloc(int procpos){
         }
 #endif
 
-#if (defined ALLOC_SHM) && (defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS)
+#if (defined ALLOC_HUGETLB) && (defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS)
+#ifdef DEBUG
+	fprintf(stderr,"OpenBLAS: allocating via shared memory with large page support (hugetlb)\n");
+#endif
         if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
 #endif
 
+#if (defined ALLOC_SHM) && (defined OS_LINUX  || defined OS_AIX  || defined __sun__  || defined OS_WINDOWS)
+#ifdef DEBUG
+	fprintf(stderr,"allocating via shared memory\n");
+#endif
+        if ((*func == alloc_shm) && (map_address == (void *)-1)) {
+#ifndef OS_WINDOWS
+            fprintf(stderr, "OpenBLAS Warning ... shared memory allocation was failed.\n");
+#endif
+	}
+#endif
         func ++;
       }