Skip to content

Commit e5a39ff

Browse files
committed
Restored blas_server.c
1 parent 553031f commit e5a39ff

File tree

1 file changed

+91
-145
lines changed

1 file changed

+91
-145
lines changed

driver/others/blas_server.c

Lines changed: 91 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,6 @@ int blas_server_avail __attribute__((aligned(ATTRIBUTE_SIZE))) = 0;
115115

116116
int blas_omp_threads_local = 1;
117117

118-
static void * blas_thread_buffer[MAX_CPU_NUMBER];
119-
120118
/* Local Variables */
121119
#if defined(USE_PTHREAD_LOCK)
122120
static pthread_mutex_t server_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -192,10 +190,6 @@ static int main_status[MAX_CPU_NUMBER];
192190
BLASLONG exit_time[MAX_CPU_NUMBER];
193191
#endif
194192

195-
//Prototypes
196-
static void exec_threads(int , blas_queue_t *, int);
197-
static void adjust_thread_buffers();
198-
199193
static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
200194

201195
if (!(mode & BLAS_COMPLEX)){
@@ -381,6 +375,7 @@ static void* blas_thread_server(void *arg){
381375
/* Thread identifier */
382376
BLASLONG cpu = (BLASLONG)arg;
383377
unsigned int last_tick;
378+
void *buffer, *sa, *sb;
384379
blas_queue_t *queue;
385380

386381
blas_queue_t *tscq;
@@ -400,6 +395,8 @@ blas_queue_t *tscq;
400395
main_status[cpu] = MAIN_ENTER;
401396
#endif
402397

398+
buffer = blas_memory_alloc(2);
399+
403400
#ifdef SMP_DEBUG
404401
fprintf(STDERR, "Server[%2ld] Thread has just been spawned!\n", cpu);
405402
#endif
@@ -460,8 +457,92 @@ blas_queue_t *tscq;
460457
#endif
461458

462459
if (queue) {
460+
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = (int (*)(blas_arg_t *, void *, void *, void *, void *, BLASLONG))queue -> routine;
463461

464-
exec_threads(cpu, queue, 0);
462+
atomic_store_queue(&thread_status[cpu].queue, (blas_queue_t *)1);
463+
464+
sa = queue -> sa;
465+
sb = queue -> sb;
466+
467+
#ifdef SMP_DEBUG
468+
if (queue -> args) {
469+
fprintf(STDERR, "Server[%2ld] Calculation started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",
470+
cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);
471+
}
472+
#endif
473+
474+
#ifdef CONSISTENT_FPCSR
475+
#ifdef __aarch64__
476+
__asm__ __volatile__ ("msr fpcr, %0" : : "r" (queue -> sse_mode));
477+
#else
478+
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
479+
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
480+
#endif
481+
#endif
482+
483+
#ifdef MONITOR
484+
main_status[cpu] = MAIN_RUNNING1;
485+
#endif
486+
487+
if (sa == NULL) sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
488+
489+
if (sb == NULL) {
490+
if (!(queue -> mode & BLAS_COMPLEX)){
491+
#ifdef EXPRECISION
492+
if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){
493+
sb = (void *)(((BLASLONG)sa + ((QGEMM_P * QGEMM_Q * sizeof(xdouble)
494+
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
495+
} else
496+
#endif
497+
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) {
498+
#ifdef BUILD_DOUBLE
499+
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
500+
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
501+
#endif
502+
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
503+
#ifdef BUILD_SINGLE
504+
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
505+
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
506+
#endif
507+
} else {
508+
/* Other types in future */
509+
}
510+
} else {
511+
#ifdef EXPRECISION
512+
if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){
513+
sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * 2 * sizeof(xdouble)
514+
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
515+
} else
516+
#endif
517+
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
518+
#ifdef BUILD_COMPLEX16
519+
sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
520+
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
521+
#endif
522+
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
523+
#ifdef BUILD_COMPLEX
524+
sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
525+
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
526+
#endif
527+
} else {
528+
/* Other types in future */
529+
}
530+
}
531+
queue->sb=sb;
532+
}
533+
534+
#ifdef MONITOR
535+
main_status[cpu] = MAIN_RUNNING2;
536+
#endif
537+
538+
if (queue -> mode & BLAS_LEGACY) {
539+
legacy_exec(routine, queue -> mode, queue -> args, sb);
540+
} else
541+
if (queue -> mode & BLAS_PTHREAD) {
542+
void (*pthreadcompat)(void *) = (void(*)(void*))queue -> routine;
543+
(pthreadcompat)(queue -> args);
544+
} else
545+
(routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);
465546

466547
#ifdef SMP_DEBUG
467548
fprintf(STDERR, "Server[%2ld] Calculation finished!\n", cpu);
@@ -476,7 +557,7 @@ blas_queue_t *tscq;
476557
MB;
477558
atomic_store_queue(&thread_status[cpu].queue, (blas_queue_t *)0);
478559

479-
560+
480561
}
481562

482563
#ifdef MONITOR
@@ -499,6 +580,8 @@ blas_queue_t *tscq;
499580
fprintf(STDERR, "Server[%2ld] Shutdown!\n", cpu);
500581
#endif
501582

583+
blas_memory_free(buffer);
584+
502585
//pthread_exit(NULL);
503586

504587
return NULL;
@@ -580,9 +663,6 @@ int blas_thread_init(void){
580663

581664
LOCK_COMMAND(&server_lock);
582665

583-
// Adjust thread buffers
584-
adjust_thread_buffers();
585-
586666
if (!blas_server_avail){
587667

588668
thread_timeout_env=openblas_thread_timeout();
@@ -813,18 +893,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
813893
fprintf(STDERR, "Exec_blas is called. Number of executing threads : %ld\n", num);
814894
#endif
815895

816-
//Redirect to caller's callback routine
817-
if (openblas_threads_callback_) {
818-
int buf_index = 0;
819-
#ifndef USE_SIMPLE_THREADED_LEVEL3
820-
for (int i = 0; i < num; i ++)
821-
queue[i].position = i;
822-
#endif
823-
openblas_threads_callback_(1, (openblas_dojob_callback) exec_threads, num, sizeof(blas_queue_t), (void*) queue, buf_index);
824-
return 0;
825-
}
826-
827-
828896
#ifdef __ELF__
829897
if (omp_in_parallel && (num > 1)) {
830898
if (omp_in_parallel() > 0) {
@@ -998,14 +1066,6 @@ int BLASFUNC(blas_thread_shutdown)(void){
9981066

9991067
LOCK_COMMAND(&server_lock);
10001068

1001-
//Free buffers allocated for threads
1002-
for(i=0; i<MAX_CPU_NUMBER; i++){
1003-
if(blas_thread_buffer[i]!=NULL){
1004-
blas_memory_free(blas_thread_buffer[i]);
1005-
blas_thread_buffer[i]=NULL;
1006-
}
1007-
}
1008-
10091069
if (blas_server_avail) {
10101070

10111071
for (i = 0; i < blas_num_threads - 1; i++) {
@@ -1042,118 +1102,4 @@ int BLASFUNC(blas_thread_shutdown)(void){
10421102
return 0;
10431103
}
10441104

1045-
static void adjust_thread_buffers() {
1046-
1047-
int i=0;
1048-
1049-
//adjust buffer for each thread
1050-
for(i=0; i < blas_cpu_number; i++){
1051-
if(blas_thread_buffer[i] == NULL){
1052-
blas_thread_buffer[i] = blas_memory_alloc(2);
1053-
}
1054-
}
1055-
for(; i < MAX_CPU_NUMBER; i++){
1056-
if(blas_thread_buffer[i] != NULL){
1057-
blas_memory_free(blas_thread_buffer[i]);
1058-
blas_thread_buffer[i] = NULL;
1059-
}
1060-
}
1061-
}
1062-
1063-
static void exec_threads(int cpu, blas_queue_t *queue, int buf_index)
1064-
{
1065-
1066-
void *buffer, *sa, *sb;
1067-
1068-
buffer = blas_thread_buffer[cpu];
1069-
1070-
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = (int (*)(blas_arg_t *, void *, void *, void *, void *, BLASLONG))queue -> routine;
1071-
1072-
atomic_store_queue(&thread_status[cpu].queue, (blas_queue_t *)1);
1073-
1074-
sa = queue -> sa;
1075-
sb = queue -> sb;
1076-
1077-
#ifdef SMP_DEBUG
1078-
if (queue -> args) {
1079-
fprintf(STDERR, "Server[%2ld] Calculation started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",
1080-
cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);
1081-
}
1082-
#endif
1083-
1084-
#ifdef CONSISTENT_FPCSR
1085-
#ifdef __aarch64__
1086-
__asm__ __volatile__ ("msr fpcr, %0" : : "r" (queue -> sse_mode));
1087-
#else
1088-
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
1089-
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
1090-
#endif
1091-
#endif
1092-
1093-
#ifdef MONITOR
1094-
main_status[cpu] = MAIN_RUNNING1;
1095-
#endif
1096-
1097-
if (sa == NULL) sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
1098-
1099-
if (sb == NULL) {
1100-
if (!(queue -> mode & BLAS_COMPLEX)){
1101-
#ifdef EXPRECISION
1102-
if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){
1103-
sb = (void *)(((BLASLONG)sa + ((QGEMM_P * QGEMM_Q * sizeof(xdouble)
1104-
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
1105-
} else
1106-
#endif
1107-
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) {
1108-
#ifdef BUILD_DOUBLE
1109-
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
1110-
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
11111105
#endif
1112-
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
1113-
#ifdef BUILD_SINGLE
1114-
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
1115-
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
1116-
#endif
1117-
} else {
1118-
/* Other types in future */
1119-
}
1120-
} else {
1121-
#ifdef EXPRECISION
1122-
if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){
1123-
sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * 2 * sizeof(xdouble)
1124-
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
1125-
} else
1126-
#endif
1127-
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
1128-
#ifdef BUILD_COMPLEX16
1129-
sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
1130-
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
1131-
#endif
1132-
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
1133-
#ifdef BUILD_COMPLEX
1134-
sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
1135-
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
1136-
#endif
1137-
} else {
1138-
/* Other types in future */
1139-
}
1140-
}
1141-
queue->sb=sb;
1142-
}
1143-
1144-
#ifdef MONITOR
1145-
main_status[cpu] = MAIN_RUNNING2;
1146-
#endif
1147-
1148-
if (queue -> mode & BLAS_LEGACY) {
1149-
legacy_exec(routine, queue -> mode, queue -> args, sb);
1150-
} else
1151-
if (queue -> mode & BLAS_PTHREAD) {
1152-
void (*pthreadcompat)(void *) = (void(*)(void*))queue -> routine;
1153-
(pthreadcompat)(queue -> args);
1154-
} else
1155-
(routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);
1156-
1157-
}
1158-
1159-
#endif

0 commit comments

Comments
 (0)