Skip to content

Fix and document the two HUGETLB options for buffer allocation in Makefile.rule #4662

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Makefile.rule
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,16 @@ NO_AFFINITY = 1
# to the user space. If bigphysarea is enabled, it will use it.
# DEVICEDRIVER_ALLOCATION = 1

# Use large page allocation (called hugepage support in Linux context)
# for the thread buffers (with access by shared memory operations)
# HUGETLB_ALLOCATION = 1

# Use large page allocation called hugepages in Linux) based on mmap accessing
# a memory-backed pseudofile (requires hugetlbfs to be mounted in the system,
# the example below has it mounted on /hugepages. OpenBLAS will create the backing
# file as gotoblas.processid in that path)
# HUGETLBFILE_ALLOCATION = /hugepages

# If you need to synchronize FP CSR between threads (for x86/x86_64 and aarch64 only).
# CONSISTENT_FPCSR = 1

Expand Down
16 changes: 10 additions & 6 deletions Makefile.system
Original file line number Diff line number Diff line change
Expand Up @@ -962,18 +962,12 @@ endif

ifeq ($(ARCH), loongarch64)
LA64_ABI=$(shell $(CC) -mabi=lp64d -c $(TOPDIR)/cpuid_loongarch64.c -o /dev/null > /dev/null 2> /dev/null && echo lp64d)
LA64_ARCH=$(shell $(CC) -march=loongarch64 -c $(TOPDIR)/cpuid_loongarch64.c -o /dev/null > /dev/null 2> /dev/null && echo loongarch64)
ifneq ($(LA64_ABI), lp64d)
LA64_ABI=lp64
endif
ifneq ($(LA64_ARCH), loongarch64)
CCOMMON_OPT += -mabi=$(LA64_ABI)
FCOMMON_OPT += -mabi=$(LA64_ABI)
else
CCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI)
FCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI)
endif
endif

endif

Expand Down Expand Up @@ -1592,13 +1586,23 @@ ifdef FUNCTION_PROFILE
CCOMMON_OPT += -DFUNCTION_PROFILE
endif

ifdef SHMEM_ALLOCATION
ifneq ($(SHMEM_ALLOCATION), 0)
CCOMMON_OPT += -DALLOC_SHM
endif
endif

ifdef HUGETLB_ALLOCATION
ifneq ($(HUGETLB_ALLOCATION), 0)
CCOMMON_OPT += -DALLOC_HUGETLB
endif
endif

ifdef HUGETLBFILE_ALLOCATION
ifneq ($(HUGETLBFILE_ALLOCATION), 0)
CCOMMON_OPT += -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=$(HUGETLBFILE_ALLOCATION)
endif
endif

ifdef STATIC_ALLOCATION
CCOMMON_OPT += -DALLOC_STATIC
Expand Down
58 changes: 44 additions & 14 deletions driver/others/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1165,11 +1165,10 @@ void *blas_memory_alloc(int procpos){
#ifdef ALLOC_DEVICEDRIVER
alloc_devicedirver,
#endif
/* Hugetlb implicitly assumes ALLOC_SHM */
#ifdef ALLOC_SHM
#ifdef ALLOC_SHM && !defined(ALLOC_HUGETLB)
alloc_shm,
#endif
#if ((defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS))
#if ((defined ALLOC_HUGETLB) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS))
alloc_hugetlb,
#endif
#ifdef ALLOC_MMAP
Expand All @@ -1190,7 +1189,6 @@ void *blas_memory_alloc(int procpos){
struct alloc_t * alloc_info;
struct alloc_t ** alloc_table;


#if defined(SMP) && !defined(USE_OPENMP)
int mi;
LOCK_COMMAND(&alloc_lock);
Expand Down Expand Up @@ -1282,7 +1280,7 @@ UNLOCK_COMMAND(&alloc_lock);
}
#endif

#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
#if (defined ALLOC_HUGETLB) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
#endif

Expand Down Expand Up @@ -2494,7 +2492,7 @@ static void *alloc_devicedirver(void *address){

#endif

#ifdef ALLOC_SHM
#if defined(ALLOC_SHM) && !defined(ALLOC_HUGETLB)

static void alloc_shm_free(struct release_t *release){

Expand All @@ -2506,7 +2504,9 @@ static void alloc_shm_free(struct release_t *release){
static void *alloc_shm(void *address){
void *map_address;
int shmid;

#ifdef DEBUG
fprintf(stderr,"alloc_shm got called\n");
#endif
shmid = shmget(IPC_PRIVATE, BUFFER_SIZE,IPC_CREAT | 0600);

map_address = (void *)shmat(shmid, address, 0);
Expand All @@ -2533,6 +2533,7 @@ static void *alloc_shm(void *address){

return map_address;
}
#endif

#if defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS

Expand Down Expand Up @@ -2562,6 +2563,10 @@ static void *alloc_hugetlb(void *address){

void *map_address = (void *)-1;

#ifdef DEBUG
fprintf(stderr,"alloc_hugetlb got called\n");
#endif

#if defined(OS_LINUX) || defined(OS_AIX)
int shmid;

Expand All @@ -2583,7 +2588,7 @@ static void *alloc_hugetlb(void *address){

if (map_address != (void *)-1){
shmctl(shmid, IPC_RMID, 0);
}
}else printf("alloc_hugetlb failed\n");
}
#endif

Expand Down Expand Up @@ -2645,7 +2650,6 @@ static void *alloc_hugetlb(void *address){
}
#endif

#endif

#ifdef ALLOC_HUGETLBFILE

Expand Down Expand Up @@ -2762,11 +2766,10 @@ void *blas_memory_alloc(int procpos){
#ifdef ALLOC_DEVICEDRIVER
alloc_devicedirver,
#endif
/* Hugetlb implicitly assumes ALLOC_SHM */
#ifdef ALLOC_SHM
#ifdef ALLOC_SHM && !defined(ALLOC_HUGETLB)
alloc_shm,
#endif
#if ((defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS))
#if ((defined ALLOC_HUGETLB) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS))
alloc_hugetlb,
#endif
#ifdef ALLOC_MMAP
Expand Down Expand Up @@ -2945,8 +2948,22 @@ void *blas_memory_alloc(int procpos){
}
#endif

#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
#if (defined ALLOC_HUGETLB) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
#ifdef DEBUG
if (hugetlb_allocated) printf("allocating via shared memory with large page support (hugetlb)\n");
#endif
#endif

#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
#ifdef DEBUG
printf("allocating via shared memory\n");
#endif
if ((*func == alloc_shm) && (map_address == (void *)-1)) {
#ifndef OS_WINDOWS
fprintf(stderr, "OpenBLAS Warning ... shared memory allocation was failed.\n");
#endif
}
#endif

func ++;
Expand Down Expand Up @@ -3061,10 +3078,23 @@ void *blas_memory_alloc(int procpos){
}
#endif

#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
#if (defined ALLOC_HUGETLB) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
#ifdef DEBUG
fprintf(stderr,"OpenBLAS: allocating via shared memory with large page support (hugetlb)\n");
#endif
if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
#endif

#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
#ifdef DEBUG
fprintf(stderr,"allocating via shared memory\n");
#endif
if ((*func == alloc_shm) && (map_address == (void *)-1)) {
#ifndef OS_WINDOWS
fprintf(stderr, "OpenBLAS Warning ... shared memory allocation was failed.\n");
#endif
}
#endif
func ++;
}

Expand Down
Loading