Skip to content

Commit 386026a

Browse files
committed
Apply changes from PR
1 parent f6b9aa1 commit 386026a

File tree

4 files changed

+453
-243
lines changed

4 files changed

+453
-243
lines changed

denoiser.hpp

Lines changed: 138 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -168,21 +168,24 @@ struct AYSSchedule : SigmaSchedule {
168168
std::vector<float> inputs;
169169
std::vector<float> results(n + 1);
170170

171-
if (sd_version_is_sd2((SDVersion)version)) {
172-
LOG_WARN("AYS not designed for SD2.X models");
173-
} /* fallthrough */
174-
else if (sd_version_is_sd1((SDVersion)version)) {
175-
LOG_INFO("AYS using SD1.5 noise levels");
176-
inputs = noise_levels[0];
177-
} else if (sd_version_is_sdxl((SDVersion)version)) {
178-
LOG_INFO("AYS using SDXL noise levels");
179-
inputs = noise_levels[1];
180-
} else if (version == VERSION_SVD) {
181-
LOG_INFO("AYS using SVD noise levels");
182-
inputs = noise_levels[2];
183-
} else {
184-
LOG_ERROR("Version not compatible with AYS scheduler");
185-
return results;
171+
switch (version) {
172+
case VERSION_SD2: /* fallthrough */
173+
LOG_WARN("AYS not designed for SD2.X models");
174+
case VERSION_SD1:
175+
LOG_INFO("AYS using SD1.5 noise levels");
176+
inputs = noise_levels[0];
177+
break;
178+
case VERSION_SDXL:
179+
LOG_INFO("AYS using SDXL noise levels");
180+
inputs = noise_levels[1];
181+
break;
182+
case VERSION_SVD:
183+
LOG_INFO("AYS using SVD noise levels");
184+
inputs = noise_levels[2];
185+
break;
186+
default:
187+
LOG_ERROR("Version not compatable with AYS scheduler");
188+
return results;
186189
}
187190

188191
/* Stretches those pre-calculated reference levels out to the desired
@@ -232,6 +235,24 @@ struct GITSSchedule : SigmaSchedule {
232235
}
233236
};
234237

238+
struct SGMUniformSchedule : SigmaSchedule {
239+
std::vector<float> get_sigmas(uint32_t n, float sigma_min_in, float sigma_max_in, t_to_sigma_t t_to_sigma_func) override {
240+
241+
std::vector<float> result;
242+
if (n == 0) {
243+
result.push_back(0.0f);
244+
return result;
245+
}
246+
result.reserve(n + 1);
247+
int t_max = TIMESTEPS -1;
248+
float step = static_cast<float>(t_max) / static_cast<float>(n > 1 ? (n -1) : 1) ;
249+
for(uint32_t i=0; i<n; ++i) {
250+
result.push_back(t_to_sigma_func(t_max - step * i));
251+
}
252+
result.push_back(0.0f);
253+
return result;
254+
}
255+
};
235256
struct KarrasSchedule : SigmaSchedule {
236257
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
237258
// These *COULD* be function arguments here,
@@ -251,6 +272,36 @@ struct KarrasSchedule : SigmaSchedule {
251272
}
252273
};
253274

275+
struct SimpleSchedule : SigmaSchedule {
276+
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
277+
std::vector<float> result_sigmas;
278+
279+
if (n == 0) {
280+
return result_sigmas;
281+
}
282+
283+
result_sigmas.reserve(n + 1);
284+
285+
int model_sigmas_len = TIMESTEPS;
286+
287+
float step_factor = static_cast<float>(model_sigmas_len) / static_cast<float>(n);
288+
289+
for (uint32_t i = 0; i < n; ++i) {
290+
291+
int offset_from_start_of_py_array = static_cast<int>(static_cast<float>(i) * step_factor);
292+
int timestep_index = model_sigmas_len - 1 - offset_from_start_of_py_array;
293+
294+
if (timestep_index < 0) {
295+
timestep_index = 0;
296+
}
297+
298+
result_sigmas.push_back(t_to_sigma(static_cast<float>(timestep_index)));
299+
}
300+
result_sigmas.push_back(0.0f);
301+
return result_sigmas;
302+
}
303+
};
304+
254305
struct Denoiser {
255306
std::shared_ptr<SigmaSchedule> schedule = std::make_shared<DiscreteSchedule>();
256307
virtual float sigma_min() = 0;
@@ -262,8 +313,39 @@ struct Denoiser {
262313
virtual ggml_tensor* inverse_noise_scaling(float sigma, ggml_tensor* latent) = 0;
263314

264315
virtual std::vector<float> get_sigmas(uint32_t n) {
265-
auto bound_t_to_sigma = std::bind(&Denoiser::t_to_sigma, this, std::placeholders::_1);
266-
return schedule->get_sigmas(n, sigma_min(), sigma_max(), bound_t_to_sigma);
316+
// Check if the current schedule is SGMUniformSchedule
317+
if (std::dynamic_pointer_cast<SGMUniformSchedule>(schedule)) {
318+
std::vector<float> sigs;
319+
sigs.reserve(n + 1);
320+
321+
if (n == 0) {
322+
sigs.push_back(0.0f);
323+
return sigs;
324+
}
325+
326+
// Use the Denoiser's own sigma_to_t and t_to_sigma methods
327+
float start_t_val = this->sigma_to_t(this->sigma_max());
328+
float end_t_val = this->sigma_to_t(this->sigma_min());
329+
330+
float dt_per_step;
331+
if (n > 0) {
332+
dt_per_step = (end_t_val - start_t_val) / static_cast<float>(n);
333+
} else {
334+
dt_per_step = 0.0f;
335+
}
336+
337+
for (uint32_t i = 0; i < n; ++i) {
338+
float current_t = start_t_val + static_cast<float>(i) * dt_per_step;
339+
sigs.push_back(this->t_to_sigma(current_t));
340+
}
341+
342+
sigs.push_back(0.0f);
343+
return sigs;
344+
345+
} else { // For all other schedules, use the existing virtual dispatch
346+
auto bound_t_to_sigma = std::bind(&Denoiser::t_to_sigma, this, std::placeholders::_1);
347+
return schedule->get_sigmas(n, sigma_min(), sigma_max(), bound_t_to_sigma);
348+
}
267349
}
268350
};
269351

@@ -343,32 +425,6 @@ struct CompVisVDenoiser : public CompVisDenoiser {
343425
}
344426
};
345427

346-
struct EDMVDenoiser : public CompVisVDenoiser {
347-
float min_sigma = 0.002;
348-
float max_sigma = 120.0;
349-
350-
EDMVDenoiser(float min_sigma = 0.002, float max_sigma = 120.0)
351-
: min_sigma(min_sigma), max_sigma(max_sigma) {
352-
schedule = std::make_shared<ExponentialSchedule>();
353-
}
354-
355-
float t_to_sigma(float t) {
356-
return std::exp(t * 4 / (float)TIMESTEPS);
357-
}
358-
359-
float sigma_to_t(float s) {
360-
return 0.25 * std::log(s);
361-
}
362-
363-
float sigma_min() {
364-
return min_sigma;
365-
}
366-
367-
float sigma_max() {
368-
return max_sigma;
369-
}
370-
};
371-
372428
float time_snr_shift(float alpha, float t) {
373429
if (alpha == 1.0f) {
374430
return t;
@@ -1042,7 +1098,7 @@ static void sample_k_diffusion(sample_method_t method,
10421098
// also needed to invert the behavior of CompVisDenoiser
10431099
// (k-diffusion's LMSDiscreteScheduler)
10441100
float beta_start = 0.00085f;
1045-
float beta_end = 0.0120f;
1101+
float beta_end = 0.0120f;
10461102
std::vector<double> alphas_cumprod;
10471103
std::vector<double> compvis_sigmas;
10481104

@@ -1053,9 +1109,8 @@ static void sample_k_diffusion(sample_method_t method,
10531109
(i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
10541110
(1.0f -
10551111
std::pow(sqrtf(beta_start) +
1056-
(sqrtf(beta_end) - sqrtf(beta_start)) *
1057-
((float)i / (TIMESTEPS - 1)),
1058-
2));
1112+
(sqrtf(beta_end) - sqrtf(beta_start)) *
1113+
((float)i / (TIMESTEPS - 1)), 2));
10591114
compvis_sigmas[i] =
10601115
std::sqrt((1 - alphas_cumprod[i]) /
10611116
alphas_cumprod[i]);
@@ -1085,8 +1140,7 @@ static void sample_k_diffusion(sample_method_t method,
10851140
// - pred_prev_sample -> "x_t-1"
10861141
int timestep =
10871142
roundf(TIMESTEPS -
1088-
i * ((float)TIMESTEPS / steps)) -
1089-
1;
1143+
i * ((float)TIMESTEPS / steps)) - 1;
10901144
// 1. get previous step value (=t-1)
10911145
int prev_timestep = timestep - TIMESTEPS / steps;
10921146
// The sigma here is chosen to cause the
@@ -1111,9 +1165,10 @@ static void sample_k_diffusion(sample_method_t method,
11111165
float* vec_x = (float*)x->data;
11121166
for (int j = 0; j < ggml_nelements(x); j++) {
11131167
vec_x[j] *= std::sqrt(sigma * sigma + 1) /
1114-
sigma;
1168+
sigma;
11151169
}
1116-
} else {
1170+
}
1171+
else {
11171172
// For the subsequent steps after the first one,
11181173
// at this point x = latents or x = sample, and
11191174
// needs to be prescaled with x <- sample / c_in
@@ -1151,8 +1206,9 @@ static void sample_k_diffusion(sample_method_t method,
11511206
float alpha_prod_t = alphas_cumprod[timestep];
11521207
// Note final_alpha_cumprod = alphas_cumprod[0] due to
11531208
// trailing timestep spacing
1154-
float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
1155-
float beta_prod_t = 1 - alpha_prod_t;
1209+
float alpha_prod_t_prev = prev_timestep >= 0 ?
1210+
alphas_cumprod[prev_timestep] : alphas_cumprod[0];
1211+
float beta_prod_t = 1 - alpha_prod_t;
11561212
// 3. compute predicted original sample from predicted
11571213
// noise also called "predicted x_0" of formula (12)
11581214
// from https://arxiv.org/pdf/2010.02502.pdf
@@ -1168,7 +1224,7 @@ static void sample_k_diffusion(sample_method_t method,
11681224
vec_pred_original_sample[j] =
11691225
(vec_x[j] / std::sqrt(sigma * sigma + 1) -
11701226
std::sqrt(beta_prod_t) *
1171-
vec_model_output[j]) *
1227+
vec_model_output[j]) *
11721228
(1 / std::sqrt(alpha_prod_t));
11731229
}
11741230
}
@@ -1182,8 +1238,8 @@ static void sample_k_diffusion(sample_method_t method,
11821238
// sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) *
11831239
// sqrt(1 - alpha_t/alpha_t-1)
11841240
float beta_prod_t_prev = 1 - alpha_prod_t_prev;
1185-
float variance = (beta_prod_t_prev / beta_prod_t) *
1186-
(1 - alpha_prod_t / alpha_prod_t_prev);
1241+
float variance = (beta_prod_t_prev / beta_prod_t) *
1242+
(1 - alpha_prod_t / alpha_prod_t_prev);
11871243
float std_dev_t = eta * std::sqrt(variance);
11881244
// 6. compute "direction pointing to x_t" of formula
11891245
// (12) from https://arxiv.org/pdf/2010.02502.pdf
@@ -1202,8 +1258,8 @@ static void sample_k_diffusion(sample_method_t method,
12021258
std::pow(std_dev_t, 2)) *
12031259
vec_model_output[j];
12041260
vec_x[j] = std::sqrt(alpha_prod_t_prev) *
1205-
vec_pred_original_sample[j] +
1206-
pred_sample_direction;
1261+
vec_pred_original_sample[j] +
1262+
pred_sample_direction;
12071263
}
12081264
}
12091265
if (eta > 0) {
@@ -1231,7 +1287,7 @@ static void sample_k_diffusion(sample_method_t method,
12311287
// by Semi-Linear Consistency Function with Trajectory
12321288
// Mapping", arXiv:2402.19159 [cs.CV]
12331289
float beta_start = 0.00085f;
1234-
float beta_end = 0.0120f;
1290+
float beta_end = 0.0120f;
12351291
std::vector<double> alphas_cumprod;
12361292
std::vector<double> compvis_sigmas;
12371293

@@ -1242,9 +1298,8 @@ static void sample_k_diffusion(sample_method_t method,
12421298
(i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
12431299
(1.0f -
12441300
std::pow(sqrtf(beta_start) +
1245-
(sqrtf(beta_end) - sqrtf(beta_start)) *
1246-
((float)i / (TIMESTEPS - 1)),
1247-
2));
1301+
(sqrtf(beta_end) - sqrtf(beta_start)) *
1302+
((float)i / (TIMESTEPS - 1)), 2));
12481303
compvis_sigmas[i] =
12491304
std::sqrt((1 - alphas_cumprod[i]) /
12501305
alphas_cumprod[i]);
@@ -1259,10 +1314,13 @@ static void sample_k_diffusion(sample_method_t method,
12591314
for (int i = 0; i < steps; i++) {
12601315
// Analytic form for TCD timesteps
12611316
int timestep = TIMESTEPS - 1 -
1262-
(TIMESTEPS / original_steps) *
1263-
(int)floor(i * ((float)original_steps / steps));
1317+
(TIMESTEPS / original_steps) *
1318+
(int)floor(i * ((float)original_steps / steps));
12641319
// 1. get previous step value
1265-
int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor((i + 1) * ((float)original_steps / steps));
1320+
int prev_timestep = i >= steps - 1 ? 0 :
1321+
TIMESTEPS - 1 - (TIMESTEPS / original_steps) *
1322+
(int)floor((i + 1) *
1323+
((float)original_steps / steps));
12661324
// Here timestep_s is tau_n' in Algorithm 4. The _s
12671325
// notation appears to be that from C. Lu,
12681326
// "DPM-Solver: A Fast ODE Solver for Diffusion
@@ -1279,9 +1337,10 @@ static void sample_k_diffusion(sample_method_t method,
12791337
float* vec_x = (float*)x->data;
12801338
for (int j = 0; j < ggml_nelements(x); j++) {
12811339
vec_x[j] *= std::sqrt(sigma * sigma + 1) /
1282-
sigma;
1340+
sigma;
12831341
}
1284-
} else {
1342+
}
1343+
else {
12851344
float* vec_x = (float*)x->data;
12861345
for (int j = 0; j < ggml_nelements(x); j++) {
12871346
vec_x[j] *= std::sqrt(sigma * sigma + 1);
@@ -1314,14 +1373,15 @@ static void sample_k_diffusion(sample_method_t method,
13141373
// DPM-Solver. In fact, we have alpha_{t_n} =
13151374
// \sqrt{\hat{alpha_n}}, [...]"
13161375
float alpha_prod_t = alphas_cumprod[timestep];
1317-
float beta_prod_t = 1 - alpha_prod_t;
1376+
float beta_prod_t = 1 - alpha_prod_t;
13181377
// Note final_alpha_cumprod = alphas_cumprod[0] since
13191378
// TCD is always "trailing"
1320-
float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
1379+
float alpha_prod_t_prev = prev_timestep >= 0 ?
1380+
alphas_cumprod[prev_timestep] : alphas_cumprod[0];
13211381
// The subscript _s are the only portion in this
13221382
// section (2) unique to TCD
13231383
float alpha_prod_s = alphas_cumprod[timestep_s];
1324-
float beta_prod_s = 1 - alpha_prod_s;
1384+
float beta_prod_s = 1 - alpha_prod_s;
13251385
// 3. Compute the predicted noised sample x_s based on
13261386
// the model parameterization
13271387
//
@@ -1336,7 +1396,7 @@ static void sample_k_diffusion(sample_method_t method,
13361396
vec_pred_original_sample[j] =
13371397
(vec_x[j] / std::sqrt(sigma * sigma + 1) -
13381398
std::sqrt(beta_prod_t) *
1339-
vec_model_output[j]) *
1399+
vec_model_output[j]) *
13401400
(1 / std::sqrt(alpha_prod_t));
13411401
}
13421402
}
@@ -1358,9 +1418,9 @@ static void sample_k_diffusion(sample_method_t method,
13581418
// pred_epsilon = model_output
13591419
vec_x[j] =
13601420
std::sqrt(alpha_prod_s) *
1361-
vec_pred_original_sample[j] +
1421+
vec_pred_original_sample[j] +
13621422
std::sqrt(beta_prod_s) *
1363-
vec_model_output[j];
1423+
vec_model_output[j];
13641424
}
13651425
}
13661426
// 4. Sample and inject noise z ~ N(0, I) for
@@ -1376,7 +1436,7 @@ static void sample_k_diffusion(sample_method_t method,
13761436
// In this case, x is still pred_noised_sample,
13771437
// continue in-place
13781438
ggml_tensor_set_f32_randn(noise, rng);
1379-
float* vec_x = (float*)x->data;
1439+
float* vec_x = (float*)x->data;
13801440
float* vec_noise = (float*)noise->data;
13811441
for (int j = 0; j < ggml_nelements(x); j++) {
13821442
// Corresponding to (35) in Zheng et
@@ -1385,10 +1445,10 @@ static void sample_k_diffusion(sample_method_t method,
13851445
vec_x[j] =
13861446
std::sqrt(alpha_prod_t_prev /
13871447
alpha_prod_s) *
1388-
vec_x[j] +
1448+
vec_x[j] +
13891449
std::sqrt(1 - alpha_prod_t_prev /
1390-
alpha_prod_s) *
1391-
vec_noise[j];
1450+
alpha_prod_s) *
1451+
vec_noise[j];
13921452
}
13931453
}
13941454
}
@@ -1400,4 +1460,4 @@ static void sample_k_diffusion(sample_method_t method,
14001460
}
14011461
}
14021462

1403-
#endif // __DENOISER_HPP__
1463+
#endif // __DENOISER_HPP__

0 commit comments

Comments
 (0)