From 6885d97c8834f3cf29b89380a5cd4cc3d05a9d79 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 9 Nov 2021 19:59:36 -0800 Subject: [PATCH 1/3] hoist some calculations out --- .../Quantization/WuQuantizer{TPixel}.cs | 26 +++++++++++++++++++ .../Codecs/EncodeIndexedPng.cs | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs b/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs index bf4a5ca41e..9b82631632 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs @@ -418,18 +418,44 @@ private void Get3DMoments(MemoryAllocator allocator) for (int r = 1; r < IndexCount; r++) { volumeSpan.Clear(); +#if ALTERNATE + int ind1_r = (r << ((IndexBits * 2) + IndexAlphaBits)) + + (r << (IndexBits + IndexAlphaBits + 1)) + + (r << (IndexBits * 2)) + + (r << (IndexBits + 1)) + + r; +#endif for (int g = 1; g < IndexCount; g++) { areaSpan.Clear(); +#if ALTERNATE + int ind1_g = ind1_r + + (g << (IndexBits + IndexAlphaBits)) + + (g << IndexBits) + + g; + + int r_g = r + g; +#endif + for (int b = 1; b < IndexCount; b++) { +#if ALTERNATE + int ind1_b = ind1_g + + ((r_g + b) << IndexAlphaBits) + + b; +#endif + Moment line = default; for (int a = 1; a < IndexAlphaCount; a++) { +#if ALTERNATE + int ind1 = ind1_b + a; +#else int ind1 = GetPaletteIndex(r, g, b, a); +#endif line += momentSpan[ind1]; areaSpan[a] += line; diff --git a/tests/ImageSharp.Benchmarks/Codecs/EncodeIndexedPng.cs b/tests/ImageSharp.Benchmarks/Codecs/EncodeIndexedPng.cs index de8d412029..b339222627 100644 --- a/tests/ImageSharp.Benchmarks/Codecs/EncodeIndexedPng.cs +++ b/tests/ImageSharp.Benchmarks/Codecs/EncodeIndexedPng.cs @@ -85,7 +85,7 @@ public void PngCoreWu() public void PngCoreWuNoDither() { using var memoryStream = new MemoryStream(); - var options = new PngEncoder { Quantizer = new WuQuantizer(new QuantizerOptions { Dither = null }) }; + var options = new PngEncoder { Quantizer = new WuQuantizer(new QuantizerOptions { Dither = null }), ColorType = PngColorType.Palette }; this.bmpCore.SaveAsPng(memoryStream, options); } } From c0d5dfbac59d1ce6937f3be61ccb0841f59e702d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 10 Nov 2021 23:55:01 -0800 Subject: [PATCH 2/3] Better variable names --- .../Quantization/WuQuantizer{TPixel}.cs | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs b/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs index 9b82631632..2824f53eef 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs @@ -417,45 +417,40 @@ private void Get3DMoments(MemoryAllocator allocator) for (int r = 1; r < IndexCount; r++) { - volumeSpan.Clear(); -#if ALTERNATE - int ind1_r = (r << ((IndexBits * 2) + IndexAlphaBits)) + + // Currently, RyuJIT hoists the invariants of multi-level nested loop only to the + // immediate outer loop. See https://github.com/dotnet/runtime/issues/61420 + // To ensure the calculation doesn't happen repeatedly, hoist some of the calculations + // in the form of ind1* manually. + int ind1R = (r << ((IndexBits * 2) + IndexAlphaBits)) + (r << (IndexBits + IndexAlphaBits + 1)) + (r << (IndexBits * 2)) + (r << (IndexBits + 1)) + r; -#endif + + volumeSpan.Clear(); for (int g = 1; g < IndexCount; g++) { - areaSpan.Clear(); - -#if ALTERNATE - int ind1_g = ind1_r + + int ind1G = ind1R + (g << (IndexBits + IndexAlphaBits)) + (g << IndexBits) + g; - int r_g = r + g; -#endif + + areaSpan.Clear(); for (int b = 1; b < IndexCount; b++) { -#if ALTERNATE - int ind1_b = ind1_g + + int ind1B = ind1G + ((r_g + b) << IndexAlphaBits) + b; -#endif Moment line = default; for (int a = 1; a < IndexAlphaCount; a++) { -#if ALTERNATE - int ind1 = ind1_b + a; -#else - int ind1 = GetPaletteIndex(r, g, b, a); -#endif + int ind1 = ind1B + a; + line += momentSpan[ind1]; areaSpan[a] += line; From 57357b076a939011b3a80f5f83700e90e0899c9a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 12 Nov 2021 07:41:08 -0800 Subject: [PATCH 3/3] Optimize Mark --- .../Quantization/WuQuantizer{TPixel}.cs | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs b/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs index 2824f53eef..4218810d77 100644 --- a/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs +++ b/src/ImageSharp/Processing/Processors/Quantization/WuQuantizer{TPixel}.cs @@ -649,13 +649,35 @@ private void Mark(ref Box cube, byte label) for (int r = cube.RMin + 1; r <= cube.RMax; r++) { + // Currently, RyuJIT hoists the invariants of multi-level nested loop only to the + // immediate outer loop. See https://github.com/dotnet/runtime/issues/61420 + // To ensure the calculation doesn't happen repeatedly, hoist some of the calculations + // in the form of ind1* manually. + int ind1R = (r << ((IndexBits * 2) + IndexAlphaBits)) + + (r << (IndexBits + IndexAlphaBits + 1)) + + (r << (IndexBits * 2)) + + (r << (IndexBits + 1)) + + r; + for (int g = cube.GMin + 1; g <= cube.GMax; g++) { + int ind1G = ind1R + + (g << (IndexBits + IndexAlphaBits)) + + (g << IndexBits) + + g; + int r_g = r + g; + for (int b = cube.BMin + 1; b <= cube.BMax; b++) { + int ind1B = ind1G + + ((r_g + b) << IndexAlphaBits) + + b; + for (int a = cube.AMin + 1; a <= cube.AMax; a++) { - tagSpan[GetPaletteIndex(r, g, b, a)] = label; + int index = ind1B + a; + + tagSpan[index] = label; } } }