Skip to content

Add DYNAMIC_ARCH support for ARM64 #1829

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Makefile.system
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,13 @@ CCOMMON_OPT += $(XCCOMMON_OPT)
#CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)'
endif

ifeq ($(ARCH), arm64)
DYNAMIC_CORE = ARMV8
DYNAMIC_CORE += CORTEXA57
DYNAMIC_CORE += THUNDERX
DYNAMIC_CORE += THUNDERX2T99
endif

# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
ifndef DYNAMIC_CORE
override DYNAMIC_ARCH=
Expand Down
1 change: 0 additions & 1 deletion cpuid_arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,6 @@ void get_cpuconfig(void)
break;

case CPU_THUNDERX:
printf("#define ARMV8\n");
printf("#define THUNDERX\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 128\n");
Expand Down
8 changes: 8 additions & 0 deletions driver/others/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ endif
# COMMONOBJS += info.$(SUFFIX)

ifeq ($(DYNAMIC_ARCH), 1)
ifeq ($(ARCH),arm64)
COMMONOBJS += dynamic_arm64.$(SUFFIX)
else
COMMONOBJS += dynamic.$(SUFFIX)
endif
else
COMMONOBJS += parameter.$(SUFFIX)
endif
Expand Down Expand Up @@ -71,7 +75,11 @@ BLAS_SERVER = blas_server.c
endif

ifeq ($(DYNAMIC_ARCH), 1)
ifeq ($(ARCH),arm64)
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_arm64.$(SUFFIX)
else
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
endif
else
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
endif
Expand Down
198 changes: 198 additions & 0 deletions driver/others/dynamic_arm64.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/

#include "common.h"
#include <asm/hwcap.h>
#include <sys/auxv.h>

extern gotoblas_t gotoblas_ARMV8;
extern gotoblas_t gotoblas_CORTEXA57;
extern gotoblas_t gotoblas_THUNDERX;
extern gotoblas_t gotoblas_THUNDERX2T99;

extern void openblas_warning(int verbose, const char * msg);

#define NUM_CORETYPES 4

/*
* In case asm/hwcap.h is outdated on the build system, make sure
* that HWCAP_CPUID is defined
*/
#ifndef HWCAP_CPUID
#define HWCAP_CPUID (1 << 11)
#endif

#define get_cpu_ftr(id, var) ({ \
asm("mrs %0, "#id : "=r" (var)); \
})

static char *corename[] = {
"armv8",
"cortexa57",
"thunderx",
"thunderx2t99",
"unknown"
};

char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_ARMV8) return corename[ 0];
if (gotoblas == &gotoblas_CORTEXA57) return corename[ 1];
if (gotoblas == &gotoblas_THUNDERX) return corename[ 2];
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 3];
return corename[NUM_CORETYPES];
}

static gotoblas_t *force_coretype(char *coretype) {
int i ;
int found = -1;
char message[128];

for ( i=0 ; i < NUM_CORETYPES; i++)
{
if (!strncasecmp(coretype, corename[i], 20))
{
found = i;
break;
}
}

switch (found)
{
case 0: return (&gotoblas_ARMV8);
case 1: return (&gotoblas_CORTEXA57);
case 2: return (&gotoblas_THUNDERX);
case 3: return (&gotoblas_THUNDERX2T99);
}
snprintf(message, 128, "Core not found: %s\n", coretype);
openblas_warning(1, message);
return NULL;
}

static gotoblas_t *get_coretype(void) {
int implementer, variant, part, arch, revision, midr_el1;

if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
char coremsg[128];
snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n");
openblas_warning(1, coremsg);
return NULL;
}

get_cpu_ftr(MIDR_EL1, midr_el1);
/*
* MIDR_EL1
*
* 31 24 23 20 19 16 15 4 3 0
* -----------------------------------------------------------------
* | Implementer | Variant | Architecture | Part Number | Revision |
* -----------------------------------------------------------------
*/
implementer = (midr_el1 >> 24) & 0xFF;
part = (midr_el1 >> 4) & 0xFFF;

switch(implementer)
{
case 0x41: // ARM
switch (part)
{
case 0xd07: // Cortex A57
case 0xd08: // Cortex A72
case 0xd03: // Cortex A53
return &gotoblas_CORTEXA57;
}
break;
case 0x42: // Broadcom
switch (part)
{
case 0x516: // Vulcan
return &gotoblas_THUNDERX2T99;
}
break;
case 0x43: // Cavium
switch (part)
{
case 0x0a1: // ThunderX
return &gotoblas_THUNDERX;
case 0x0af: // ThunderX2
return &gotoblas_THUNDERX2T99;
}
break;
}
return NULL;
}

void gotoblas_dynamic_init(void) {

char coremsg[128];
char coren[22];
char *p;

if (gotoblas) return;

p = getenv("OPENBLAS_CORETYPE");
if ( p )
{
gotoblas = force_coretype(p);
}
else
{
gotoblas = get_coretype();
}

if (gotoblas == NULL)
{
snprintf(coremsg, 128, "Falling back to generic ARMV8 core\n");
openblas_warning(1, coremsg);
gotoblas = &gotoblas_ARMV8;
}

if (gotoblas && gotoblas->init) {
strncpy(coren, gotoblas_corename(), 20);
sprintf(coremsg, "Core: %s\n", coren);
openblas_warning(2, coremsg);
gotoblas -> init();
} else {
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
exit(1);
}

}

void gotoblas_dynamic_quit(void) {
gotoblas = NULL;
}
27 changes: 0 additions & 27 deletions driver/others/parameter.c
Original file line number Diff line number Diff line change
Expand Up @@ -730,35 +730,8 @@ void blas_set_parameter(void){

#if defined(ARCH_ARM64)

#if defined(VULCAN) || defined(THUNDERX2T99) || defined(ARMV8)
unsigned long dgemm_prefetch_size_a;
unsigned long dgemm_prefetch_size_b;
unsigned long dgemm_prefetch_size_c;
#endif

void blas_set_parameter(void)
{
#if defined(VULCAN) || defined(THUNDERX2T99) || defined(ARMV8)
dgemm_p = 160;
dgemm_q = 128;
dgemm_r = 4096;

sgemm_p = 128;
sgemm_q = 352;
sgemm_r = 4096;

cgemm_p = 128;
cgemm_q = 224;
cgemm_r = 4096;

zgemm_p = 128;
zgemm_q = 112;
zgemm_r = 4096;

dgemm_prefetch_size_a = 3584;
dgemm_prefetch_size_b = 512;
dgemm_prefetch_size_c = 128;
#endif
}

#endif
4 changes: 4 additions & 0 deletions kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,11 @@ lsame.$(SUFFIX): $(KERNELDIR)/$(LSAME_KERNEL)
$(CC) -c $(CFLAGS) -DF_INTERFACE $< -o $(@F)

setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h
ifeq ($(USE_GEMM3M), 1)
$(CC) -c $(CFLAGS) -DUSE_GEMM3M $< -o $@
else
$(CC) -c $(CFLAGS) $< -o $@
endif

setparam$(TSUFFIX).c : setparam-ref.c
sed 's/TS/$(TSUFFIX)/g' $< > $(@F)
Expand Down
Loading