@@ -168,21 +168,24 @@ struct AYSSchedule : SigmaSchedule {
168
168
std::vector<float > inputs;
169
169
std::vector<float > results (n + 1 );
170
170
171
- if (sd_version_is_sd2 ((SDVersion)version)) {
172
- LOG_WARN (" AYS not designed for SD2.X models" );
173
- } /* fallthrough */
174
- else if (sd_version_is_sd1 ((SDVersion)version)) {
175
- LOG_INFO (" AYS using SD1.5 noise levels" );
176
- inputs = noise_levels[0 ];
177
- } else if (sd_version_is_sdxl ((SDVersion)version)) {
178
- LOG_INFO (" AYS using SDXL noise levels" );
179
- inputs = noise_levels[1 ];
180
- } else if (version == VERSION_SVD) {
181
- LOG_INFO (" AYS using SVD noise levels" );
182
- inputs = noise_levels[2 ];
183
- } else {
184
- LOG_ERROR (" Version not compatible with AYS scheduler" );
185
- return results;
171
+ switch (version) {
172
+ case VERSION_SD2: /* fallthrough */
173
+ LOG_WARN (" AYS not designed for SD2.X models" );
174
+ case VERSION_SD1:
175
+ LOG_INFO (" AYS using SD1.5 noise levels" );
176
+ inputs = noise_levels[0 ];
177
+ break ;
178
+ case VERSION_SDXL:
179
+ LOG_INFO (" AYS using SDXL noise levels" );
180
+ inputs = noise_levels[1 ];
181
+ break ;
182
+ case VERSION_SVD:
183
+ LOG_INFO (" AYS using SVD noise levels" );
184
+ inputs = noise_levels[2 ];
185
+ break ;
186
+ default :
187
+ LOG_ERROR (" Version not compatable with AYS scheduler" );
188
+ return results;
186
189
}
187
190
188
191
/* Stretches those pre-calculated reference levels out to the desired
@@ -232,6 +235,24 @@ struct GITSSchedule : SigmaSchedule {
232
235
}
233
236
};
234
237
238
+ struct SGMUniformSchedule : SigmaSchedule {
239
+ std::vector<float > get_sigmas (uint32_t n, float sigma_min_in, float sigma_max_in, t_to_sigma_t t_to_sigma_func) override {
240
+
241
+ std::vector<float > result;
242
+ if (n == 0 ) {
243
+ result.push_back (0 .0f );
244
+ return result;
245
+ }
246
+ result.reserve (n + 1 );
247
+ int t_max = TIMESTEPS -1 ;
248
+ float step = static_cast <float >(t_max) / static_cast <float >(n > 1 ? (n -1 ) : 1 ) ;
249
+ for (uint32_t i=0 ; i<n; ++i) {
250
+ result.push_back (t_to_sigma_func (t_max - step * i));
251
+ }
252
+ result.push_back (0 .0f );
253
+ return result;
254
+ }
255
+ };
235
256
struct KarrasSchedule : SigmaSchedule {
236
257
std::vector<float > get_sigmas (uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
237
258
// These *COULD* be function arguments here,
@@ -251,6 +272,36 @@ struct KarrasSchedule : SigmaSchedule {
251
272
}
252
273
};
253
274
275
+ struct SimpleSchedule : SigmaSchedule {
276
+ std::vector<float > get_sigmas (uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
277
+ std::vector<float > result_sigmas;
278
+
279
+ if (n == 0 ) {
280
+ return result_sigmas;
281
+ }
282
+
283
+ result_sigmas.reserve (n + 1 );
284
+
285
+ int model_sigmas_len = TIMESTEPS;
286
+
287
+ float step_factor = static_cast <float >(model_sigmas_len) / static_cast <float >(n);
288
+
289
+ for (uint32_t i = 0 ; i < n; ++i) {
290
+
291
+ int offset_from_start_of_py_array = static_cast <int >(static_cast <float >(i) * step_factor);
292
+ int timestep_index = model_sigmas_len - 1 - offset_from_start_of_py_array;
293
+
294
+ if (timestep_index < 0 ) {
295
+ timestep_index = 0 ;
296
+ }
297
+
298
+ result_sigmas.push_back (t_to_sigma (static_cast <float >(timestep_index)));
299
+ }
300
+ result_sigmas.push_back (0 .0f );
301
+ return result_sigmas;
302
+ }
303
+ };
304
+
254
305
struct Denoiser {
255
306
std::shared_ptr<SigmaSchedule> schedule = std::make_shared<DiscreteSchedule>();
256
307
virtual float sigma_min () = 0;
@@ -262,8 +313,39 @@ struct Denoiser {
262
313
virtual ggml_tensor* inverse_noise_scaling (float sigma, ggml_tensor* latent) = 0;
263
314
264
315
virtual std::vector<float > get_sigmas (uint32_t n) {
265
- auto bound_t_to_sigma = std::bind (&Denoiser::t_to_sigma, this , std::placeholders::_1);
266
- return schedule->get_sigmas (n, sigma_min (), sigma_max (), bound_t_to_sigma);
316
+ // Check if the current schedule is SGMUniformSchedule
317
+ if (std::dynamic_pointer_cast<SGMUniformSchedule>(schedule)) {
318
+ std::vector<float > sigs;
319
+ sigs.reserve (n + 1 );
320
+
321
+ if (n == 0 ) {
322
+ sigs.push_back (0 .0f );
323
+ return sigs;
324
+ }
325
+
326
+ // Use the Denoiser's own sigma_to_t and t_to_sigma methods
327
+ float start_t_val = this ->sigma_to_t (this ->sigma_max ());
328
+ float end_t_val = this ->sigma_to_t (this ->sigma_min ());
329
+
330
+ float dt_per_step;
331
+ if (n > 0 ) {
332
+ dt_per_step = (end_t_val - start_t_val) / static_cast <float >(n);
333
+ } else {
334
+ dt_per_step = 0 .0f ;
335
+ }
336
+
337
+ for (uint32_t i = 0 ; i < n; ++i) {
338
+ float current_t = start_t_val + static_cast <float >(i) * dt_per_step;
339
+ sigs.push_back (this ->t_to_sigma (current_t ));
340
+ }
341
+
342
+ sigs.push_back (0 .0f );
343
+ return sigs;
344
+
345
+ } else { // For all other schedules, use the existing virtual dispatch
346
+ auto bound_t_to_sigma = std::bind (&Denoiser::t_to_sigma, this , std::placeholders::_1);
347
+ return schedule->get_sigmas (n, sigma_min (), sigma_max (), bound_t_to_sigma);
348
+ }
267
349
}
268
350
};
269
351
@@ -343,32 +425,6 @@ struct CompVisVDenoiser : public CompVisDenoiser {
343
425
}
344
426
};
345
427
346
- struct EDMVDenoiser : public CompVisVDenoiser {
347
- float min_sigma = 0.002 ;
348
- float max_sigma = 120.0 ;
349
-
350
- EDMVDenoiser (float min_sigma = 0.002 , float max_sigma = 120.0 )
351
- : min_sigma(min_sigma), max_sigma(max_sigma) {
352
- schedule = std::make_shared<ExponentialSchedule>();
353
- }
354
-
355
- float t_to_sigma (float t) {
356
- return std::exp (t * 4 / (float )TIMESTEPS);
357
- }
358
-
359
- float sigma_to_t (float s) {
360
- return 0.25 * std::log (s);
361
- }
362
-
363
- float sigma_min () {
364
- return min_sigma;
365
- }
366
-
367
- float sigma_max () {
368
- return max_sigma;
369
- }
370
- };
371
-
372
428
float time_snr_shift (float alpha, float t) {
373
429
if (alpha == 1 .0f ) {
374
430
return t;
@@ -1042,7 +1098,7 @@ static void sample_k_diffusion(sample_method_t method,
1042
1098
// also needed to invert the behavior of CompVisDenoiser
1043
1099
// (k-diffusion's LMSDiscreteScheduler)
1044
1100
float beta_start = 0 .00085f ;
1045
- float beta_end = 0 .0120f ;
1101
+ float beta_end = 0 .0120f ;
1046
1102
std::vector<double > alphas_cumprod;
1047
1103
std::vector<double > compvis_sigmas;
1048
1104
@@ -1053,9 +1109,8 @@ static void sample_k_diffusion(sample_method_t method,
1053
1109
(i == 0 ? 1 .0f : alphas_cumprod[i - 1 ]) *
1054
1110
(1 .0f -
1055
1111
std::pow (sqrtf (beta_start) +
1056
- (sqrtf (beta_end) - sqrtf (beta_start)) *
1057
- ((float )i / (TIMESTEPS - 1 )),
1058
- 2 ));
1112
+ (sqrtf (beta_end) - sqrtf (beta_start)) *
1113
+ ((float )i / (TIMESTEPS - 1 )), 2 ));
1059
1114
compvis_sigmas[i] =
1060
1115
std::sqrt ((1 - alphas_cumprod[i]) /
1061
1116
alphas_cumprod[i]);
@@ -1085,8 +1140,7 @@ static void sample_k_diffusion(sample_method_t method,
1085
1140
// - pred_prev_sample -> "x_t-1"
1086
1141
int timestep =
1087
1142
roundf (TIMESTEPS -
1088
- i * ((float )TIMESTEPS / steps)) -
1089
- 1 ;
1143
+ i * ((float )TIMESTEPS / steps)) - 1 ;
1090
1144
// 1. get previous step value (=t-1)
1091
1145
int prev_timestep = timestep - TIMESTEPS / steps;
1092
1146
// The sigma here is chosen to cause the
@@ -1111,9 +1165,10 @@ static void sample_k_diffusion(sample_method_t method,
1111
1165
float * vec_x = (float *)x->data ;
1112
1166
for (int j = 0 ; j < ggml_nelements (x); j++) {
1113
1167
vec_x[j] *= std::sqrt (sigma * sigma + 1 ) /
1114
- sigma;
1168
+ sigma;
1115
1169
}
1116
- } else {
1170
+ }
1171
+ else {
1117
1172
// For the subsequent steps after the first one,
1118
1173
// at this point x = latents or x = sample, and
1119
1174
// needs to be prescaled with x <- sample / c_in
@@ -1151,8 +1206,9 @@ static void sample_k_diffusion(sample_method_t method,
1151
1206
float alpha_prod_t = alphas_cumprod[timestep];
1152
1207
// Note final_alpha_cumprod = alphas_cumprod[0] due to
1153
1208
// trailing timestep spacing
1154
- float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1155
- float beta_prod_t = 1 - alpha_prod_t ;
1209
+ float alpha_prod_t_prev = prev_timestep >= 0 ?
1210
+ alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1211
+ float beta_prod_t = 1 - alpha_prod_t ;
1156
1212
// 3. compute predicted original sample from predicted
1157
1213
// noise also called "predicted x_0" of formula (12)
1158
1214
// from https://arxiv.org/pdf/2010.02502.pdf
@@ -1168,7 +1224,7 @@ static void sample_k_diffusion(sample_method_t method,
1168
1224
vec_pred_original_sample[j] =
1169
1225
(vec_x[j] / std::sqrt (sigma * sigma + 1 ) -
1170
1226
std::sqrt (beta_prod_t ) *
1171
- vec_model_output[j]) *
1227
+ vec_model_output[j]) *
1172
1228
(1 / std::sqrt (alpha_prod_t ));
1173
1229
}
1174
1230
}
@@ -1182,8 +1238,8 @@ static void sample_k_diffusion(sample_method_t method,
1182
1238
// sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) *
1183
1239
// sqrt(1 - alpha_t/alpha_t-1)
1184
1240
float beta_prod_t_prev = 1 - alpha_prod_t_prev;
1185
- float variance = (beta_prod_t_prev / beta_prod_t ) *
1186
- (1 - alpha_prod_t / alpha_prod_t_prev);
1241
+ float variance = (beta_prod_t_prev / beta_prod_t ) *
1242
+ (1 - alpha_prod_t / alpha_prod_t_prev);
1187
1243
float std_dev_t = eta * std::sqrt (variance);
1188
1244
// 6. compute "direction pointing to x_t" of formula
1189
1245
// (12) from https://arxiv.org/pdf/2010.02502.pdf
@@ -1202,8 +1258,8 @@ static void sample_k_diffusion(sample_method_t method,
1202
1258
std::pow (std_dev_t , 2 )) *
1203
1259
vec_model_output[j];
1204
1260
vec_x[j] = std::sqrt (alpha_prod_t_prev) *
1205
- vec_pred_original_sample[j] +
1206
- pred_sample_direction;
1261
+ vec_pred_original_sample[j] +
1262
+ pred_sample_direction;
1207
1263
}
1208
1264
}
1209
1265
if (eta > 0 ) {
@@ -1231,7 +1287,7 @@ static void sample_k_diffusion(sample_method_t method,
1231
1287
// by Semi-Linear Consistency Function with Trajectory
1232
1288
// Mapping", arXiv:2402.19159 [cs.CV]
1233
1289
float beta_start = 0 .00085f ;
1234
- float beta_end = 0 .0120f ;
1290
+ float beta_end = 0 .0120f ;
1235
1291
std::vector<double > alphas_cumprod;
1236
1292
std::vector<double > compvis_sigmas;
1237
1293
@@ -1242,9 +1298,8 @@ static void sample_k_diffusion(sample_method_t method,
1242
1298
(i == 0 ? 1 .0f : alphas_cumprod[i - 1 ]) *
1243
1299
(1 .0f -
1244
1300
std::pow (sqrtf (beta_start) +
1245
- (sqrtf (beta_end) - sqrtf (beta_start)) *
1246
- ((float )i / (TIMESTEPS - 1 )),
1247
- 2 ));
1301
+ (sqrtf (beta_end) - sqrtf (beta_start)) *
1302
+ ((float )i / (TIMESTEPS - 1 )), 2 ));
1248
1303
compvis_sigmas[i] =
1249
1304
std::sqrt ((1 - alphas_cumprod[i]) /
1250
1305
alphas_cumprod[i]);
@@ -1259,10 +1314,13 @@ static void sample_k_diffusion(sample_method_t method,
1259
1314
for (int i = 0 ; i < steps; i++) {
1260
1315
// Analytic form for TCD timesteps
1261
1316
int timestep = TIMESTEPS - 1 -
1262
- (TIMESTEPS / original_steps) *
1263
- (int )floor (i * ((float )original_steps / steps));
1317
+ (TIMESTEPS / original_steps) *
1318
+ (int )floor (i * ((float )original_steps / steps));
1264
1319
// 1. get previous step value
1265
- int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int )floor ((i + 1 ) * ((float )original_steps / steps));
1320
+ int prev_timestep = i >= steps - 1 ? 0 :
1321
+ TIMESTEPS - 1 - (TIMESTEPS / original_steps) *
1322
+ (int )floor ((i + 1 ) *
1323
+ ((float )original_steps / steps));
1266
1324
// Here timestep_s is tau_n' in Algorithm 4. The _s
1267
1325
// notation appears to be that from C. Lu,
1268
1326
// "DPM-Solver: A Fast ODE Solver for Diffusion
@@ -1279,9 +1337,10 @@ static void sample_k_diffusion(sample_method_t method,
1279
1337
float * vec_x = (float *)x->data ;
1280
1338
for (int j = 0 ; j < ggml_nelements (x); j++) {
1281
1339
vec_x[j] *= std::sqrt (sigma * sigma + 1 ) /
1282
- sigma;
1340
+ sigma;
1283
1341
}
1284
- } else {
1342
+ }
1343
+ else {
1285
1344
float * vec_x = (float *)x->data ;
1286
1345
for (int j = 0 ; j < ggml_nelements (x); j++) {
1287
1346
vec_x[j] *= std::sqrt (sigma * sigma + 1 );
@@ -1314,14 +1373,15 @@ static void sample_k_diffusion(sample_method_t method,
1314
1373
// DPM-Solver. In fact, we have alpha_{t_n} =
1315
1374
// \sqrt{\hat{alpha_n}}, [...]"
1316
1375
float alpha_prod_t = alphas_cumprod[timestep];
1317
- float beta_prod_t = 1 - alpha_prod_t ;
1376
+ float beta_prod_t = 1 - alpha_prod_t ;
1318
1377
// Note final_alpha_cumprod = alphas_cumprod[0] since
1319
1378
// TCD is always "trailing"
1320
- float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1379
+ float alpha_prod_t_prev = prev_timestep >= 0 ?
1380
+ alphas_cumprod[prev_timestep] : alphas_cumprod[0 ];
1321
1381
// The subscript _s are the only portion in this
1322
1382
// section (2) unique to TCD
1323
1383
float alpha_prod_s = alphas_cumprod[timestep_s];
1324
- float beta_prod_s = 1 - alpha_prod_s;
1384
+ float beta_prod_s = 1 - alpha_prod_s;
1325
1385
// 3. Compute the predicted noised sample x_s based on
1326
1386
// the model parameterization
1327
1387
//
@@ -1336,7 +1396,7 @@ static void sample_k_diffusion(sample_method_t method,
1336
1396
vec_pred_original_sample[j] =
1337
1397
(vec_x[j] / std::sqrt (sigma * sigma + 1 ) -
1338
1398
std::sqrt (beta_prod_t ) *
1339
- vec_model_output[j]) *
1399
+ vec_model_output[j]) *
1340
1400
(1 / std::sqrt (alpha_prod_t ));
1341
1401
}
1342
1402
}
@@ -1358,9 +1418,9 @@ static void sample_k_diffusion(sample_method_t method,
1358
1418
// pred_epsilon = model_output
1359
1419
vec_x[j] =
1360
1420
std::sqrt (alpha_prod_s) *
1361
- vec_pred_original_sample[j] +
1421
+ vec_pred_original_sample[j] +
1362
1422
std::sqrt (beta_prod_s) *
1363
- vec_model_output[j];
1423
+ vec_model_output[j];
1364
1424
}
1365
1425
}
1366
1426
// 4. Sample and inject noise z ~ N(0, I) for
@@ -1376,7 +1436,7 @@ static void sample_k_diffusion(sample_method_t method,
1376
1436
// In this case, x is still pred_noised_sample,
1377
1437
// continue in-place
1378
1438
ggml_tensor_set_f32_randn (noise, rng);
1379
- float * vec_x = (float *)x->data ;
1439
+ float * vec_x = (float *)x->data ;
1380
1440
float * vec_noise = (float *)noise->data ;
1381
1441
for (int j = 0 ; j < ggml_nelements (x); j++) {
1382
1442
// Corresponding to (35) in Zheng et
@@ -1385,10 +1445,10 @@ static void sample_k_diffusion(sample_method_t method,
1385
1445
vec_x[j] =
1386
1446
std::sqrt (alpha_prod_t_prev /
1387
1447
alpha_prod_s) *
1388
- vec_x[j] +
1448
+ vec_x[j] +
1389
1449
std::sqrt (1 - alpha_prod_t_prev /
1390
- alpha_prod_s) *
1391
- vec_noise[j];
1450
+ alpha_prod_s) *
1451
+ vec_noise[j];
1392
1452
}
1393
1453
}
1394
1454
}
@@ -1400,4 +1460,4 @@ static void sample_k_diffusion(sample_method_t method,
1400
1460
}
1401
1461
}
1402
1462
1403
- #endif // __DENOISER_HPP__
1463
+ #endif // __DENOISER_HPP__
0 commit comments