Skip to content

Commit 61d97f4

Browse files
authored
decode_gif: add transparency support and image offsets (#8419)
1 parent 51429c2 commit 61d97f4

File tree

2 files changed

+61
-36
lines changed

2 files changed

+61
-36
lines changed

test/test_image.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,9 @@ def test_pathlib_support(tmpdir):
551551
write_png(img, write_path)
552552

553553

554-
@pytest.mark.parametrize("name", ("gifgrid", "fire", "porsche", "treescap", "treescap-interlaced", "solid2", "x-trans"))
554+
@pytest.mark.parametrize(
555+
"name", ("gifgrid", "fire", "porsche", "treescap", "treescap-interlaced", "solid2", "x-trans", "earth")
556+
)
555557
@pytest.mark.parametrize("scripted", (True, False))
556558
def test_decode_gif(tmpdir, name, scripted):
557559
# Using test images from GIFLIB
@@ -560,9 +562,16 @@ def test_decode_gif(tmpdir, name, scripted):
560562
# We're not testing against "welcome2" because PIL and GIFLIB disagee on what
561563
# the background color should be (likely a difference in the way they handle
562564
# transparency?)
565+
# 'earth' image is from wikipedia, licensed under CC BY-SA 3.0
566+
# https://creativecommons.org/licenses/by-sa/3.0/
567+
# it allows to properly test for transparency, TOP-LEFT offsets, and
568+
# disposal modes.
563569

564570
path = tmpdir / f"{name}.gif"
565-
url = f"https://sourceforge.net/p/giflib/code/ci/master/tree/pic/{name}.gif?format=raw"
571+
if name == "earth":
572+
url = "https://upload.wikimedia.org/wikipedia/commons/2/2c/Rotating_earth_%28large%29.gif"
573+
else:
574+
url = f"https://sourceforge.net/p/giflib/code/ci/master/tree/pic/{name}.gif?format=raw"
566575
with open(path, "wb") as f:
567576
f.write(requests.get(url).content)
568577

torchvision/csrc/io/image/cpu/decode_gif.cpp

Lines changed: 50 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -86,32 +86,19 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) {
8686
// This check should already done within DGifSlurp(), just to be safe
8787
TORCH_CHECK(num_images > 0, "GIF file should contain at least one image!");
8888

89-
// Note:
90-
// The GIF format has this notion of "canvas" and "canvas size", where each
91-
// image could be displayed on the canvas at different offsets, forming a
92-
// mosaic/picture wall like so:
93-
//
94-
// <--- canvas W --->
95-
// ------------------------ ^
96-
// | | | |
97-
// | img1 | img3 | |
98-
// | |------------| canvas H
99-
// |---------- | |
100-
// | img2 | img4 | |
101-
// | | | |
102-
// ------------------------ v
103-
// The GifLib docs indicate that this is mostly vestigial
104-
// (https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html), and
105-
// modern viewers ignore the canvas size as well as image offsets. Hence,
106-
// we're ignoring that too:
107-
// - We're ignoring the canvas width and height and assume that the shape of
108-
// the canvas and of all images is the shape of the first image.
109-
// - We're enforcing that all images have the same shape.
110-
// - Left and Top offsets of each image are ignored as well and assumed to be
111-
// 0.
112-
113-
auto out_h = gifFile->SavedImages[0].ImageDesc.Height;
114-
auto out_w = gifFile->SavedImages[0].ImageDesc.Width;
89+
GifColorType bg = {0, 0, 0};
90+
if (gifFile->SColorMap) {
91+
bg = gifFile->SColorMap->Colors[gifFile->SBackGroundColor];
92+
}
93+
94+
// The GIFLIB docs say that the canvas's height and width are potentially
95+
// ignored by modern viewers, so to be on the safe side we set the output
96+
// height to max(canvas_heigh, first_image_height). Same for width.
97+
// https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html
98+
auto out_h =
99+
std::max(gifFile->SHeight, gifFile->SavedImages[0].ImageDesc.Height);
100+
auto out_w =
101+
std::max(gifFile->SWidth, gifFile->SavedImages[0].ImageDesc.Width);
115102

116103
// We output a channels-last tensor for consistency with other image decoders.
117104
// Torchvision's resize tends to be is faster on uint8 channels-last tensors.
@@ -121,30 +108,59 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) {
121108
auto out = torch::empty(
122109
{int64_t(num_images), 3, int64_t(out_h), int64_t(out_w)}, options);
123110
auto out_a = out.accessor<uint8_t, 4>();
124-
125111
for (int i = 0; i < num_images; i++) {
126112
const SavedImage& img = gifFile->SavedImages[i];
127-
const GifImageDesc& desc = img.ImageDesc;
128-
TORCH_CHECK(
129-
desc.Width == out_w && desc.Height == out_h,
130-
"All images in the gif should have the same dimensions.");
131113

114+
GraphicsControlBlock gcb;
115+
DGifSavedExtensionToGCB(gifFile, i, &gcb);
116+
117+
const GifImageDesc& desc = img.ImageDesc;
132118
const ColorMapObject* cmap =
133119
desc.ColorMap ? desc.ColorMap : gifFile->SColorMap;
134120
TORCH_CHECK(
135121
cmap != nullptr,
136122
"Global and local color maps are missing. This should never happen!");
137123

124+
// When going from one image to another, there is a "disposal method" which
125+
// specifies how to handle the transition. E.g. DISPOSE_DO_NOT means that
126+
// the current image should essentially be drawn on top of the previous
127+
// canvas. The pixels of that previous canvas will appear on the new one if
128+
// either:
129+
// - a pixel is transparent in the current image
130+
// - the current image is smaller than the canvas, hence exposing its pixels
131+
// The "background" disposal method means that the current canvas should be
132+
// set to the background color.
133+
// We only support these 2 modes and default to "background" when the
134+
// disposal method is unspecified, or when it's set to "DISPOSE_PREVIOUS"
135+
// which according to GIFLIB is not widely supported.
136+
// (https://giflib.sourceforge.net/whatsinagif/animation_and_transparency.html).
137+
if (i > 0 && gcb.DisposalMode == DISPOSE_DO_NOT) {
138+
out[i] = out[i - 1];
139+
} else {
140+
// Background. If bg wasn't defined, it will be (0, 0, 0)
141+
for (int h = 0; h < gifFile->SHeight; h++) {
142+
for (int w = 0; w < gifFile->SWidth; w++) {
143+
out_a[i][0][h][w] = bg.Red;
144+
out_a[i][1][h][w] = bg.Green;
145+
out_a[i][2][h][w] = bg.Blue;
146+
}
147+
}
148+
}
149+
138150
for (int h = 0; h < desc.Height; h++) {
139151
for (int w = 0; w < desc.Width; w++) {
140152
auto c = img.RasterBits[h * desc.Width + w];
153+
if (c == gcb.TransparentColor) {
154+
continue;
155+
}
141156
GifColorType rgb = cmap->Colors[c];
142-
out_a[i][0][h][w] = rgb.Red;
143-
out_a[i][1][h][w] = rgb.Green;
144-
out_a[i][2][h][w] = rgb.Blue;
157+
out_a[i][0][h + desc.Top][w + desc.Left] = rgb.Red;
158+
out_a[i][1][h + desc.Top][w + desc.Left] = rgb.Green;
159+
out_a[i][2][h + desc.Top][w + desc.Left] = rgb.Blue;
145160
}
146161
}
147162
}
163+
148164
out = out.squeeze(0); // remove batch dim if there's only one image
149165

150166
DGifCloseFile(gifFile, &error);

0 commit comments

Comments
 (0)