Skip to content

Commit 3cdfe33

Browse files
authored
Merge pull request #3352 from martin-frbg/3321-2n
Allocate an auxiliary struct when running out of preconfigured threads
2 parents 47171e4 + cd10d1c commit 3cdfe33

File tree

1 file changed

+216
-3
lines changed

1 file changed

+216
-3
lines changed

driver/others/memory.c

Lines changed: 216 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7373

7474
#include "common.h"
7575

76+
#ifndef likely
77+
#ifdef __GNUC__
78+
#define likely(x) __builtin_expect(!!(x), 1)
79+
#define unlikely(x) __builtin_expect(!!(x), 0)
80+
#else
81+
#define likely(x) (x)
82+
#define unlikely(x) (x)
83+
#endif
84+
#endif
85+
7686
#if defined(USE_TLS) && defined(SMP)
7787
#define COMPILE_TLS
7888

@@ -2060,6 +2070,7 @@ struct release_t {
20602070
int hugetlb_allocated = 0;
20612071

20622072
static struct release_t release_info[NUM_BUFFERS];
2073+
static struct release_t *new_release_info;
20632074
static int release_pos = 0;
20642075

20652076
#if defined(OS_LINUX) && !defined(NO_WARMUP)
@@ -2110,8 +2121,13 @@ static void *alloc_mmap(void *address){
21102121
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
21112122
LOCK_COMMAND(&alloc_lock);
21122123
#endif
2124+
if (likely(release_pos < NUM_BUFFERS)) {
21132125
release_info[release_pos].address = map_address;
21142126
release_info[release_pos].func = alloc_mmap_free;
2127+
} else {
2128+
new_release_info[release_pos-NUM_BUFFERS].address = map_address;
2129+
new_release_info[release_pos-NUM_BUFFERS].func = alloc_mmap_free;
2130+
}
21152131
release_pos ++;
21162132
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
21172133
UNLOCK_COMMAND(&alloc_lock);
@@ -2274,8 +2290,13 @@ static void *alloc_mmap(void *address){
22742290
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
22752291
LOCK_COMMAND(&alloc_lock);
22762292
#endif
2293+
if (likely(release_pos < NUM_BUFFERS)) {
22772294
release_info[release_pos].address = map_address;
22782295
release_info[release_pos].func = alloc_mmap_free;
2296+
} else {
2297+
new_release_info[release_pos-NUM_BUFFERS].address = map_address;
2298+
new_release_info[release_pos-NUM_BUFFERS].func = alloc_mmap_free;
2299+
}
22792300
release_pos ++;
22802301
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
22812302
UNLOCK_COMMAND(&alloc_lock);
@@ -2307,8 +2328,13 @@ static void *alloc_malloc(void *address){
23072328
if (map_address == (void *)NULL) map_address = (void *)-1;
23082329

23092330
if (map_address != (void *)-1) {
2331+
if (likely(release_pos < NUM_BUFFERS)) {
23102332
release_info[release_pos].address = map_address;
23112333
release_info[release_pos].func = alloc_malloc_free;
2334+
} else {
2335+
new_release_info[release_pos-NUM_BUFFERS].address = map_address;
2336+
new_release_info[release_pos-NUM_BUFFERS].func = alloc_malloc_free;
2337+
}
23122338
release_pos ++;
23132339
}
23142340

@@ -2341,8 +2367,13 @@ static void *alloc_qalloc(void *address){
23412367
if (map_address == (void *)NULL) map_address = (void *)-1;
23422368

23432369
if (map_address != (void *)-1) {
2370+
if (likely(release_pos < NUM_BUFFERS)) {
23442371
release_info[release_pos].address = map_address;
23452372
release_info[release_pos].func = alloc_qalloc_free;
2373+
} else {
2374+
new_release_info[release_pos-NUM_BUFFERS].address = map_address;
2375+
new_release_info[release_pos-NUM_BUFFERS].func = alloc_qalloc_free;
2376+
}
23462377
release_pos ++;
23472378
}
23482379

@@ -2370,8 +2401,13 @@ static void *alloc_windows(void *address){
23702401
if (map_address == (void *)NULL) map_address = (void *)-1;
23712402

23722403
if (map_address != (void *)-1) {
2404+
if (likely(release_pos < NUM_BUFFERS)) {
23732405
release_info[release_pos].address = map_address;
23742406
release_info[release_pos].func = alloc_windows_free;
2407+
} else {
2408+
new_release_info[release_pos-NUM_BUFFERS].address = map_address;
2409+
new_release_info[release_pos-NUM_BUFFERS].func = alloc_windows_free;
2410+
}
23752411
release_pos ++;
23762412
}
23772413

@@ -2414,9 +2450,15 @@ static void *alloc_devicedirver(void *address){
24142450
fd, 0);
24152451

24162452
if (map_address != (void *)-1) {
2453+
if (likely(release_pos < NUM_BUFFERS)) {
24172454
release_info[release_pos].address = map_address;
24182455
release_info[release_pos].attr = fd;
24192456
release_info[release_pos].func = alloc_devicedirver_free;
2457+
} else {
2458+
new_release_info[release_pos-NUM_BUFFERS].address = map_address;
2459+
new_release_info[release_pos-NUM_BUFFERS].attr = fd;
2460+
new_release_info[release_pos-NUM_BUFFERS].func = alloc_devicedirver_free;
2461+
}
24202462
release_pos ++;
24212463
}
24222464

@@ -2450,9 +2492,15 @@ static void *alloc_shm(void *address){
24502492

24512493
shmctl(shmid, IPC_RMID, 0);
24522494

2495+
if (likely(release_pos < NUM_BUFFERS)) {
24532496
release_info[release_pos].address = map_address;
24542497
release_info[release_pos].attr = shmid;
24552498
release_info[release_pos].func = alloc_shm_free;
2499+
} else {
2500+
new_release_info[release_pos-NUM_BUFFERS].address = map_address;
2501+
new_release_info[release_pos-NUM_BUFFERS].attr = shmid;
2502+
new_release_info[release_pos-NUM_BUFFERS].func = alloc_shm_free;
2503+
}
24562504
release_pos ++;
24572505
}
24582506

@@ -2556,8 +2604,13 @@ static void *alloc_hugetlb(void *address){
25562604
#endif
25572605

25582606
if (map_address != (void *)-1){
2607+
if (likely(release_pos < NUM_BUFFERS)) {
25592608
release_info[release_pos].address = map_address;
25602609
release_info[release_pos].func = alloc_hugetlb_free;
2610+
} else {
2611+
new_release_info[release_pos-NUM_BUFFERS].address = map_address;
2612+
new_release_info[release_pos-NUM_BUFFERS].func = alloc_hugetlb_free;
2613+
}
25612614
release_pos ++;
25622615
}
25632616

@@ -2604,9 +2657,15 @@ static void *alloc_hugetlbfile(void *address){
26042657
fd, 0);
26052658

26062659
if (map_address != (void *)-1) {
2660+
if (likely(release_pos < NUM_BUFFERS)) {
26072661
release_info[release_pos].address = map_address;
26082662
release_info[release_pos].attr = fd;
26092663
release_info[release_pos].func = alloc_hugetlbfile_free;
2664+
} else {
2665+
new_release_info[release_pos-NUM_BUFFERS].address = map_address;
2666+
new_release_info[release_pos-NUM_BUFFERS].attr = fd;
2667+
new_release_info[release_pos-NUM_BUFFERS].func = alloc_hugetlbfile_free;
2668+
}
26102669
release_pos ++;
26112670
}
26122671

@@ -2636,8 +2695,25 @@ static volatile struct {
26362695

26372696
} memory[NUM_BUFFERS];
26382697

2639-
static int memory_initialized = 0;
2698+
static volatile struct newmemstruct
2699+
{
2700+
BLASULONG lock;
2701+
void *addr;
2702+
#if defined(WHEREAMI) && !defined(USE_OPENMP)
2703+
int pos;
2704+
#endif
2705+
int used;
2706+
#ifndef __64BIT__
2707+
char dummy[48];
2708+
#else
2709+
char dummy[40];
2710+
#endif
2711+
2712+
};
2713+
static volatile struct newmemstruct *newmemory;
26402714

2715+
static int memory_initialized = 0;
2716+
static int memory_overflowed = 0;
26412717
/* Memory allocation routine */
26422718
/* procpos ... indicates where it comes from */
26432719
/* 0 : Level 3 functions */
@@ -2646,6 +2722,8 @@ static int memory_initialized = 0;
26462722

26472723
void *blas_memory_alloc(int procpos){
26482724

2725+
int i;
2726+
26492727
int position;
26502728
#if defined(WHEREAMI) && !defined(USE_OPENMP)
26512729
int mypos = 0;
@@ -2779,6 +2857,29 @@ void *blas_memory_alloc(int procpos){
27792857
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
27802858
UNLOCK_COMMAND(&alloc_lock);
27812859
#endif
2860+
if (memory_overflowed) {
2861+
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
2862+
LOCK_COMMAND(&alloc_lock);
2863+
#endif
2864+
do {
2865+
RMB;
2866+
#if defined(USE_OPENMP)
2867+
if (!newmemory[position-NUM_BUFFERS].used) {
2868+
blas_lock(&newmemory[position-NUM_BUFFERS].lock);
2869+
#endif
2870+
if (!newmemory[position-NUM_BUFFERS].used) goto allocation2;
2871+
2872+
#if defined(USE_OPENMP)
2873+
blas_unlock(&newmemory[position-NUM_BUFFERS].lock);
2874+
}
2875+
#endif
2876+
position ++;
2877+
2878+
} while (position < 512+NUM_BUFFERS);
2879+
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
2880+
UNLOCK_COMMAND(&alloc_lock);
2881+
#endif
2882+
}
27822883
goto error;
27832884

27842885
allocation :
@@ -2883,6 +2984,91 @@ void *blas_memory_alloc(int procpos){
28832984
return (void *)memory[position].addr;
28842985

28852986
error:
2987+
if (memory_overflowed) goto terminate;
2988+
fprintf(stderr,"OpenBLAS warning: precompiled NUM_THREADS exceeded, adding auxiliary array for thread metadata.\n");
2989+
memory_overflowed=1;
2990+
new_release_info = (struct release_t*) malloc(512*sizeof(struct release_t));
2991+
newmemory = (struct newmemstruct*) malloc(512*sizeof(struct newmemstruct));
2992+
for (i = 0; i < 512; i++) {
2993+
newmemory[i].addr = (void *)0;
2994+
#if defined(WHEREAMI) && !defined(USE_OPENMP)
2995+
newmemory[i].pos = -1;
2996+
#endif
2997+
newmemory[i].used = 0;
2998+
newmemory[i].lock = 0;
2999+
}
3000+
newmemory[position-NUM_BUFFERS].used = 1;
3001+
3002+
allocation2:
3003+
newmemory[position-NUM_BUFFERS].used = 1;
3004+
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
3005+
UNLOCK_COMMAND(&alloc_lock);
3006+
#else
3007+
blas_unlock(&newmemory[position-NUM_BUFFERS].lock);
3008+
#endif
3009+
do {
3010+
#ifdef DEBUG
3011+
printf("Allocation Start : %lx\n", base_address);
3012+
#endif
3013+
3014+
map_address = (void *)-1;
3015+
3016+
func = &memoryalloc[0];
3017+
3018+
while ((func != NULL) && (map_address == (void *) -1)) {
3019+
3020+
map_address = (*func)((void *)base_address);
3021+
3022+
#ifdef ALLOC_DEVICEDRIVER
3023+
if ((*func == alloc_devicedirver) && (map_address == (void *)-1)) {
3024+
fprintf(stderr, "OpenBLAS Warning ... Physically contiguous allocation was failed.\n");
3025+
}
3026+
#endif
3027+
3028+
#ifdef ALLOC_HUGETLBFILE
3029+
if ((*func == alloc_hugetlbfile) && (map_address == (void *)-1)) {
3030+
#ifndef OS_WINDOWS
3031+
fprintf(stderr, "OpenBLAS Warning ... HugeTLB(File) allocation was failed.\n");
3032+
#endif
3033+
}
3034+
#endif
3035+
3036+
#if (defined ALLOC_SHM) && (defined OS_LINUX || defined OS_AIX || defined __sun__ || defined OS_WINDOWS)
3037+
if ((*func == alloc_hugetlb) && (map_address != (void *)-1)) hugetlb_allocated = 1;
3038+
#endif
3039+
3040+
func ++;
3041+
}
3042+
3043+
#ifdef DEBUG
3044+
printf(" Success -> %08lx\n", map_address);
3045+
#endif
3046+
if (((BLASLONG) map_address) == -1) base_address = 0UL;
3047+
3048+
if (base_address) base_address += BUFFER_SIZE + FIXED_PAGESIZE;
3049+
3050+
} while ((BLASLONG)map_address == -1);
3051+
3052+
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
3053+
LOCK_COMMAND(&alloc_lock);
3054+
#endif
3055+
newmemory[position-NUM_BUFFERS].addr = map_address;
3056+
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
3057+
UNLOCK_COMMAND(&alloc_lock);
3058+
#endif
3059+
3060+
#ifdef DEBUG
3061+
printf(" Mapping Succeeded. %p(%d)\n", (void *)newmemory[position-NUM_BUFFERS].addr, position);
3062+
#endif
3063+
3064+
#if defined(WHEREAMI) && !defined(USE_OPENMP)
3065+
3066+
if (newmemory[position-NUM_BUFFERS].pos == -1) newmemory[position-NUM_BUFFERS].pos = mypos;
3067+
3068+
#endif
3069+
return (void *)newmemory[position-NUM_BUFFERS].addr;
3070+
3071+
terminate:
28863072
printf("OpenBLAS : Program is Terminated. Because you tried to allocate too many memory regions.\n");
28873073
printf("This library was built to support a maximum of %d threads - either rebuild OpenBLAS\n", NUM_BUFFERS);
28883074
printf("with a larger NUM_THREADS value or set the environment variable OPENBLAS_NUM_THREADS to\n");
@@ -2907,13 +3093,28 @@ void blas_memory_free(void *free_area){
29073093
while ((position < NUM_BUFFERS) && (memory[position].addr != free_area))
29083094
position++;
29093095

2910-
if (position >= NUM_BUFFERS) goto error;
3096+
if (position >= NUM_BUFFERS && !memory_overflowed) goto error;
29113097

29123098
#ifdef DEBUG
29133099
if (memory[position].addr != free_area) goto error;
29143100
printf(" Position : %d\n", position);
29153101
#endif
3102+
if (unlikely(memory_overflowed && position >= NUM_BUFFERS)) {
3103+
while ((position < NUM_BUFFERS+512) && (newmemory[position-NUM_BUFFERS].addr != free_area))
3104+
position++;
3105+
// arm: ensure all writes are finished before other thread takes this memory
3106+
WMB;
3107+
3108+
newmemory[position].used = 0;
3109+
#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP)
3110+
UNLOCK_COMMAND(&alloc_lock);
3111+
#endif
29163112

3113+
#ifdef DEBUG
3114+
printf("Unmap from overflow area succeeded.\n\n");
3115+
#endif
3116+
return;
3117+
} else {
29173118
// arm: ensure all writes are finished before other thread takes this memory
29183119
WMB;
29193120

@@ -2927,7 +3128,7 @@ void blas_memory_free(void *free_area){
29273128
#endif
29283129

29293130
return;
2930-
3131+
}
29313132
error:
29323133
printf("BLAS : Bad memory unallocation! : %4d %p\n", position, free_area);
29333134

@@ -2962,7 +3163,10 @@ void blas_shutdown(void){
29623163
LOCK_COMMAND(&alloc_lock);
29633164

29643165
for (pos = 0; pos < release_pos; pos ++) {
3166+
if (likely(pos < NUM_BUFFERS))
29653167
release_info[pos].func(&release_info[pos]);
3168+
else
3169+
new_release_info[pos-NUM_BUFFERS].func(&new_release_info[pos-NUM_BUFFERS]);
29663170
}
29673171

29683172
#ifdef SEEK_ADDRESS
@@ -2979,6 +3183,15 @@ void blas_shutdown(void){
29793183
#endif
29803184
memory[pos].lock = 0;
29813185
}
3186+
if (memory_overflowed)
3187+
for (pos = 0; pos < 512; pos ++){
3188+
newmemory[pos].addr = (void *)0;
3189+
newmemory[pos].used = 0;
3190+
#if defined(WHEREAMI) && !defined(USE_OPENMP)
3191+
newmemory[pos].pos = -1;
3192+
#endif
3193+
newmemory[pos].lock = 0;
3194+
}
29823195

29833196
UNLOCK_COMMAND(&alloc_lock);
29843197

0 commit comments

Comments
 (0)