File tree Expand file tree Collapse file tree 2 files changed +8
-3
lines changed Expand file tree Collapse file tree 2 files changed +8
-3
lines changed Original file line number Diff line number Diff line change @@ -539,8 +539,8 @@ void ggml_metal_graph_compute(
539
539
540
540
id <MTLComputeCommandEncoder > encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
541
541
542
- const int node_start = (cb_idx + 0 ) * n_nodes_per_cb;
543
- const int node_end = ( cb_idx == n_cb - 1 ) ? n_nodes : (cb_idx + 1 ) * n_nodes_per_cb;
542
+ const int node_start = (cb_idx + 0 ) * n_nodes_per_cb;
543
+ const int node_end = MIN (( cb_idx == n_cb - 1 ) ? n_nodes : (cb_idx + 1 ) * n_nodes_per_cb, n_nodes) ;
544
544
545
545
for (int ind = node_start; ind < node_end; ++ind) {
546
546
const int i = has_concur ? ctx->concur_list [ind] : ind;
Original file line number Diff line number Diff line change @@ -87,7 +87,12 @@ kernel void kernel_gelu(
87
87
device float * dst,
88
88
uint tpig[[thread_position_in_grid]]) {
89
89
float x = src0[tpig];
90
- dst[tpig] = 0 .5f *x*(1 .0f + tanh (SQRT_2_OVER_PI*x*(1 .0f + GELU_COEF_A*x*x)));
90
+
91
+ // BEWARE !!!
92
+ // Simply using "tanh" instead of "precise::tanh" will sometimes results in NaNs!
93
+ // This was observed with Falcon 7B and 40B models
94
+ //
95
+ dst[tpig] = 0 .5f *x*(1 .0f + precise::tanh (SQRT_2_OVER_PI*x*(1 .0f + GELU_COEF_A*x*x)));
91
96
}
92
97
93
98
kernel void kernel_soft_max (
You can’t perform that action at this time.
0 commit comments