File tree Expand file tree Collapse file tree 4 files changed +15
-16
lines changed Expand file tree Collapse file tree 4 files changed +15
-16
lines changed Original file line number Diff line number Diff line change 89
89
class qnn_instance ;
90
90
91
91
// TODO: should be removed because this is a workaround method during development stage
92
+ // a minor modification is required during development stage for validate QNN backend on Android phone:
93
+ //
94
+ // modify from
95
+ //
96
+ // static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor)
97
+ //
98
+ // to
99
+ //
100
+ // void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor)
101
+ //
102
+ // in source file ggml.c#L16156
103
+ //
104
+ // this workaround will not be needed when the final QNN backend is complete
92
105
extern " C" void ggml_compute_forward (struct ggml_compute_params * params, struct ggml_tensor * tensor);
93
106
94
107
#if (defined __ANDROID__) || (defined ANDROID) // Qualcomm's QNN could running on Windows over ARM(aka WoA)
Original file line number Diff line number Diff line change @@ -46,10 +46,6 @@ GGML_API void ggml_backend_qnn_get_device_description(int device, char
46
46
GGML_API ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type (size_t dev_num );
47
47
48
48
49
- //temporary API, should be removed in the future
50
- GGML_API bool ggml_qnn_compute_forward (struct ggml_compute_params * params , struct ggml_tensor * tensor );
51
-
52
-
53
49
#ifdef __cplusplus
54
50
}
55
51
#endif
Original file line number Diff line number Diff line change @@ -16153,8 +16153,7 @@ static void ggml_compute_forward_cross_entropy_loss_back(
16153
16153
16154
16154
/////////////////////////////////
16155
16155
16156
- //workaround for Qualcomm QNN backend
16157
- void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
16156
+ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
16158
16157
GGML_ASSERT(params);
16159
16158
16160
16159
if (tensor->op == GGML_OP_NONE || ggml_is_empty(tensor)) {
Original file line number Diff line number Diff line change @@ -15403,7 +15403,7 @@ struct llama_context * llama_new_context_with_model(
15403
15403
#elif defined(GGML_USE_QNN)
15404
15404
if (model->n_gpu_layers > 0) {
15405
15405
//the second param is package name of Andorid app, can be got by JNI from Java layer
15406
- ggml_backend_t backend = ggml_backend_qnn_init(QNN_CPU , "/data/data/com.ggml.llamacpp/");
15406
+ ggml_backend_t backend = ggml_backend_qnn_init(model->main_gpu , "/data/data/com.ggml.llamacpp/");
15407
15407
if (nullptr == backend) {
15408
15408
LLAMA_LOG_ERROR("%s: failed to initialize QNN backend\n", __func__);
15409
15409
llama_free(ctx);
@@ -17577,14 +17577,6 @@ void llama_reset_timings(struct llama_context * ctx) {
17577
17577
ctx->t_p_eval_us = ctx->n_p_eval = 0;
17578
17578
}
17579
17579
17580
- static int llama_has_qnn(void) {
17581
- #ifdef GGML_USE_QNN
17582
- return 1;
17583
- #else
17584
- return 0;
17585
- #endif
17586
- }
17587
-
17588
17580
const char * llama_print_system_info(void) {
17589
17581
static std::string s;
17590
17582
@@ -17606,7 +17598,6 @@ const char * llama_print_system_info(void) {
17606
17598
s += "SSSE3 = " + std::to_string(ggml_cpu_has_ssse3()) + " | ";
17607
17599
s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
17608
17600
s += "MATMUL_INT8 = " + std::to_string(ggml_cpu_has_matmul_int8()) + " | ";
17609
- s += "QNN = " + std::to_string(llama_has_qnn()) + " | ";
17610
17601
17611
17602
return s.c_str();
17612
17603
}
You can’t perform that action at this time.
0 commit comments