diff --git a/ggml.c b/ggml.c index 793b67f4c70..8f672871695 100644 --- a/ggml.c +++ b/ggml.c @@ -2195,6 +2195,7 @@ struct ggml_context { bool mem_buffer_owned; bool no_alloc; bool no_alloc_save; // this is used to save the no_alloc state when using scratch buffers + bool use_hwaccel; int n_objects; @@ -2754,6 +2755,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, /*.no_alloc =*/ params.no_alloc, /*.no_alloc_save =*/ params.no_alloc, + /*.use_hwaccel =*/ params.use_hwaccel, /*.n_objects =*/ 0, /*.objects_begin =*/ NULL, /*.objects_end =*/ NULL, @@ -2985,9 +2987,13 @@ static struct ggml_tensor * ggml_new_tensor_impl( /*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data, /*.name =*/ { 0 }, /*.extra =*/ NULL, + /*.rank =*/ n_dims, /*.padding =*/ { 0 }, }; + if (ctx->use_hwaccel) + result->backend = GGML_BACKEND_TYPE_GPU; + // TODO: this should not be needed as long as we don't rely on aligned SIMD loads //ggml_assert_aligned(result->data); diff --git a/ggml.h b/ggml.h index abe3767f224..76bc63dea71 100644 --- a/ggml.h +++ b/ggml.h @@ -591,7 +591,9 @@ extern "C" { void * extra; // extra things e.g. for ggml-cuda.cu - char padding[8]; + int32_t rank; + + char padding[20]; }; static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor); @@ -657,6 +659,7 @@ extern "C" { size_t mem_size; // bytes void * mem_buffer; // if NULL, memory will be allocated internally bool no_alloc; // don't allocate memory for the tensor data + bool use_hwaccel; }; diff --git a/whisper.cpp b/whisper.cpp index b9e1ef2ced1..0cabf3273c0 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -6518,6 +6518,9 @@ WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) { /*.no_alloc =*/ false, }; +#ifdef GGML_USE_QNN + gparams.use_hwaccel = true; +#endif struct ggml_context * ctx0 = ggml_init(gparams); struct ggml_tensor * a = ggml_new_tensor_2d(ctx0, wtype, N, N);