Skip to content

Commit e4d2d1a

Browse files
authored
Add GIF decoder (#8406)
1 parent 1644fff commit e4d2d1a

22 files changed

+2644
-24
lines changed

.github/scripts/unittest.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ eval "$($(which conda) shell.bash hook)" && conda deactivate && conda activate c
99

1010
echo '::group::Install testing utilities'
1111
# TODO: remove the <8 constraint on pytest when https://github.com/pytorch/vision/issues/8238 is closed
12-
pip install --progress-bar=off "pytest<8" pytest-mock pytest-cov expecttest!=0.2.0
12+
pip install --progress-bar=off "pytest<8" pytest-mock pytest-cov expecttest!=0.2.0 requests
1313
echo '::endgroup::'
1414

1515
python test/smoke_test.py

.github/workflows/lint.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ jobs:
6363
6464
echo '::group::Lint C source'
6565
set +e
66-
./.github/scripts/run-clang-format.py -r torchvision/csrc --clang-format-executable ./clang-format
66+
./.github/scripts/run-clang-format.py -r torchvision/csrc --clang-format-executable ./clang-format --exclude "torchvision/csrc/io/image/cpu/giflib/*"
6767
6868
if [ $? -ne 0 ]; then
6969
git --no-pager diff

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ include(GNUInstallDirs)
8080
include(CMakePackageConfigHelpers)
8181

8282
set(TVCPP torchvision/csrc)
83-
list(APPEND ALLOW_LISTED ${TVCPP} ${TVCPP}/io/image ${TVCPP}/io/image/cpu ${TVCPP}/models ${TVCPP}/ops
83+
list(APPEND ALLOW_LISTED ${TVCPP} ${TVCPP}/io/image ${TVCPP}/io/image/cpu ${TVCPP}/io/image/cpu/giflib ${TVCPP}/models ${TVCPP}/ops
8484
${TVCPP}/ops/autograd ${TVCPP}/ops/cpu ${TVCPP}/io/image/cuda)
8585
if(WITH_CUDA)
8686
list(APPEND ALLOW_LISTED ${TVCPP}/ops/cuda ${TVCPP}/ops/autocast)

CONTRIBUTING.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ We don't officially support building from source using `pip`, but _if_ you do, y
7474
#### Other development dependencies (some of these are needed to run tests):
7575

7676
```
77-
pip install expecttest flake8 typing mypy pytest pytest-mock scipy
77+
pip install expecttest flake8 typing mypy pytest pytest-mock scipy requests
7878
```
7979

8080
## Development Process

docs/source/io.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Images
1919
encode_jpeg
2020
decode_jpeg
2121
write_jpeg
22+
decode_gif
2223
encode_png
2324
decode_png
2425
write_png

setup.py

+15-12
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,11 @@ def get_extensions():
332332
image_macros += [("NVJPEG_FOUND", str(int(use_nvjpeg)))]
333333

334334
image_path = os.path.join(extensions_dir, "io", "image")
335-
image_src = glob.glob(os.path.join(image_path, "*.cpp")) + glob.glob(os.path.join(image_path, "cpu", "*.cpp"))
335+
image_src = (
336+
glob.glob(os.path.join(image_path, "*.cpp"))
337+
+ glob.glob(os.path.join(image_path, "cpu", "*.cpp"))
338+
+ glob.glob(os.path.join(image_path, "cpu", "giflib", "*.c"))
339+
)
336340

337341
if is_rocm_pytorch:
338342
image_src += glob.glob(os.path.join(image_path, "hip", "*.cpp"))
@@ -341,18 +345,17 @@ def get_extensions():
341345
else:
342346
image_src += glob.glob(os.path.join(image_path, "cuda", "*.cpp"))
343347

344-
if use_png or use_jpeg:
345-
ext_modules.append(
346-
extension(
347-
"torchvision.image",
348-
image_src,
349-
include_dirs=image_include + include_dirs + [image_path],
350-
library_dirs=image_library + library_dirs,
351-
define_macros=image_macros,
352-
libraries=image_link_flags,
353-
extra_compile_args=extra_compile_args,
354-
)
348+
ext_modules.append(
349+
extension(
350+
"torchvision.image",
351+
image_src,
352+
include_dirs=image_include + include_dirs + [image_path],
353+
library_dirs=image_library + library_dirs,
354+
define_macros=image_macros,
355+
libraries=image_link_flags,
356+
extra_compile_args=extra_compile_args,
355357
)
358+
)
356359

357360
# Locating ffmpeg
358361
ffmpeg_exe = shutil.which("ffmpeg")

test/test_image.py

+45-1
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
11
import glob
22
import io
33
import os
4+
import re
45
import sys
56
from pathlib import Path
67

78
import numpy as np
89
import pytest
10+
import requests
911
import torch
1012
import torchvision.transforms.functional as F
1113
from common_utils import assert_equal, needs_cuda
12-
from PIL import __version__ as PILLOW_VERSION, Image, ImageOps
14+
from PIL import __version__ as PILLOW_VERSION, Image, ImageOps, ImageSequence
1315
from torchvision.io.image import (
1416
_read_png_16,
17+
decode_gif,
1518
decode_image,
1619
decode_jpeg,
1720
decode_png,
@@ -548,5 +551,46 @@ def test_pathlib_support(tmpdir):
548551
write_png(img, write_path)
549552

550553

554+
@pytest.mark.parametrize("name", ("gifgrid", "fire", "porsche", "treescap", "treescap-interlaced", "solid2", "x-trans"))
555+
def test_decode_gif(tmpdir, name):
556+
# Using test images from GIFLIB
557+
# https://sourceforge.net/p/giflib/code/ci/master/tree/pic/, we assert PIL
558+
# and torchvision decoded outputs are equal.
559+
# We're not testing against "welcome2" because PIL and GIFLIB disagee on what
560+
# the background color should be (likely a difference in the way they handle
561+
# transparency?)
562+
563+
path = tmpdir / f"{name}.gif"
564+
url = f"https://sourceforge.net/p/giflib/code/ci/master/tree/pic/{name}.gif?format=raw"
565+
with open(path, "wb") as f:
566+
f.write(requests.get(url).content)
567+
568+
tv_out = read_image(path)
569+
if tv_out.ndim == 3:
570+
tv_out = tv_out[None]
571+
572+
assert tv_out.is_contiguous(memory_format=torch.channels_last)
573+
574+
# For some reason, not using Image.open() as a CM causes "ResourceWarning: unclosed file"
575+
with Image.open(path) as pil_img:
576+
pil_seq = ImageSequence.Iterator(pil_img)
577+
578+
for pil_frame, tv_frame in zip(pil_seq, tv_out):
579+
pil_frame = F.pil_to_tensor(pil_frame.convert("RGB"))
580+
torch.testing.assert_close(tv_frame, pil_frame, atol=0, rtol=0)
581+
582+
583+
def test_decode_gif_errors():
584+
encoded_data = torch.randint(0, 256, (100,), dtype=torch.uint8)
585+
with pytest.raises(RuntimeError, match="Input tensor must be 1-dimensional"):
586+
decode_gif(encoded_data[None])
587+
with pytest.raises(RuntimeError, match="Input tensor must have uint8 data type"):
588+
decode_gif(encoded_data.float())
589+
with pytest.raises(RuntimeError, match="Input tensor must be contiguous"):
590+
decode_gif(encoded_data[::2])
591+
with pytest.raises(RuntimeError, match=re.escape("DGifOpenFileName() failed - 103")):
592+
decode_gif(encoded_data)
593+
594+
551595
if __name__ == "__main__":
552596
pytest.main([__file__])
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
#include "decode_gif.h"
2+
#include <cstring>
3+
#include "giflib/gif_lib.h"
4+
5+
namespace vision {
6+
namespace image {
7+
8+
typedef struct reader_helper_t {
9+
uint8_t const* encoded_data; // input tensor data pointer
10+
size_t encoded_data_size; // size of input tensor in bytes
11+
size_t num_bytes_read; // number of bytes read so far in the tensor
12+
} reader_helper_t;
13+
14+
// That function is used by GIFLIB routines to read the encoded bytes.
15+
// This reads `len` bytes and writes them into `buf`. The data is read from the
16+
// input tensor passed to decode_gif() starting at the `num_bytes_read`
17+
// position.
18+
int read_from_tensor(GifFileType* gifFile, GifByteType* buf, int len) {
19+
// the UserData field was set in DGifOpen()
20+
reader_helper_t* reader_helper =
21+
static_cast<reader_helper_t*>(gifFile->UserData);
22+
23+
size_t num_bytes_to_read = std::min(
24+
(size_t)len,
25+
reader_helper->encoded_data_size - reader_helper->num_bytes_read);
26+
std::memcpy(
27+
buf, reader_helper->encoded_data + reader_helper->num_bytes_read, len);
28+
reader_helper->num_bytes_read += num_bytes_to_read;
29+
return num_bytes_to_read;
30+
}
31+
32+
torch::Tensor decode_gif(const torch::Tensor& encoded_data) {
33+
// LibGif docs: https://giflib.sourceforge.net/intro.html
34+
// Refer over there for more details on the libgif API, API ref, and a
35+
// detailed description of the GIF format.
36+
37+
TORCH_CHECK(encoded_data.is_contiguous(), "Input tensor must be contiguous.");
38+
TORCH_CHECK(
39+
encoded_data.dtype() == torch::kU8,
40+
"Input tensor must have uint8 data type, got ",
41+
encoded_data.dtype());
42+
TORCH_CHECK(
43+
encoded_data.dim() == 1,
44+
"Input tensor must be 1-dimensional, got ",
45+
encoded_data.dim(),
46+
" dims.");
47+
48+
int error = D_GIF_SUCCEEDED;
49+
50+
// We're using DGidOpen. The other entrypoints of libgif are
51+
// DGifOpenFileName and DGifOpenFileHandle but we don't want to use those,
52+
// since we need to read the encoded bytes from a tensor of encoded bytes, not
53+
// from a file (for consistency with existing jpeg and png decoders). Using
54+
// DGifOpen is the only way to read from a custom source.
55+
// For that we need to provide a reader function `read_from_tensor` that
56+
// reads from the tensor, and we have to keep track of the number of bytes
57+
// read so far: this is why we need the reader_helper struct.
58+
59+
// TODO: We are potentially doing an unnecessary copy of the encoded bytes:
60+
// - 1 copy in from file to tensor (in read_file())
61+
// - 1 copy from tensor to GIFLIB buffers (in read_from_tensor())
62+
// Since we're vendoring GIFLIB we can potentially modify the calls to
63+
// InternalRead() and just set the `buf` pointer to the tensor data directly.
64+
// That might even save allocation of those buffers.
65+
// If we do that, we'd have to make sure the buffers are never written to by
66+
// GIFLIB, otherwise we'd be overridding the tensor data.
67+
reader_helper_t reader_helper;
68+
reader_helper.encoded_data = encoded_data.data_ptr<uint8_t>();
69+
reader_helper.encoded_data_size = encoded_data.numel();
70+
reader_helper.num_bytes_read = 0;
71+
GifFileType* gifFile =
72+
DGifOpen(static_cast<void*>(&reader_helper), read_from_tensor, &error);
73+
74+
TORCH_CHECK(
75+
(gifFile != nullptr) && (error == D_GIF_SUCCEEDED),
76+
"DGifOpenFileName() failed - ",
77+
error);
78+
79+
if (DGifSlurp(gifFile) == GIF_ERROR) {
80+
auto gifFileError = gifFile->Error;
81+
DGifCloseFile(gifFile, &error);
82+
TORCH_CHECK(false, "DGifSlurp() failed - ", gifFileError);
83+
}
84+
auto num_images = gifFile->ImageCount;
85+
86+
// This check should already done within DGifSlurp(), just to be safe
87+
TORCH_CHECK(num_images > 0, "GIF file should contain at least one image!");
88+
89+
// Note:
90+
// The GIF format has this notion of "canvas" and "canvas size", where each
91+
// image could be displayed on the canvas at different offsets, forming a
92+
// mosaic/picture wall like so:
93+
//
94+
// <--- canvas W --->
95+
// ------------------------ ^
96+
// | | | |
97+
// | img1 | img3 | |
98+
// | |------------| canvas H
99+
// |---------- | |
100+
// | img2 | img4 | |
101+
// | | | |
102+
// ------------------------ v
103+
// The GifLib docs indicate that this is mostly vestigial
104+
// (https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html), and
105+
// modern viewers ignore the canvas size as well as image offsets. Hence,
106+
// we're ignoring that too:
107+
// - We're ignoring the canvas width and height and assume that the shape of
108+
// the canvas and of all images is the shape of the first image.
109+
// - We're enforcing that all images have the same shape.
110+
// - Left and Top offsets of each image are ignored as well and assumed to be
111+
// 0.
112+
113+
auto out_h = gifFile->SavedImages[0].ImageDesc.Height;
114+
auto out_w = gifFile->SavedImages[0].ImageDesc.Width;
115+
116+
// We output a channels-last tensor for consistency with other image decoders.
117+
// Torchvision's resize tends to be is faster on uint8 channels-last tensors.
118+
auto options = torch::TensorOptions()
119+
.dtype(torch::kU8)
120+
.memory_format(torch::MemoryFormat::ChannelsLast);
121+
auto out = torch::empty(
122+
{int64_t(num_images), 3, int64_t(out_h), int64_t(out_w)}, options);
123+
auto out_a = out.accessor<uint8_t, 4>();
124+
125+
for (int i = 0; i < num_images; i++) {
126+
const SavedImage& img = gifFile->SavedImages[i];
127+
const GifImageDesc& desc = img.ImageDesc;
128+
TORCH_CHECK(
129+
desc.Width == out_w && desc.Height == out_h,
130+
"All images in the gif should have the same dimensions.");
131+
132+
const ColorMapObject* cmap =
133+
desc.ColorMap ? desc.ColorMap : gifFile->SColorMap;
134+
TORCH_CHECK(
135+
cmap != nullptr,
136+
"Global and local color maps are missing. This should never happen!");
137+
138+
for (int h = 0; h < desc.Height; h++) {
139+
for (int w = 0; w < desc.Width; w++) {
140+
auto c = img.RasterBits[h * desc.Width + w];
141+
GifColorType rgb = cmap->Colors[c];
142+
out_a[i][0][h][w] = rgb.Red;
143+
out_a[i][1][h][w] = rgb.Green;
144+
out_a[i][2][h][w] = rgb.Blue;
145+
}
146+
}
147+
}
148+
out = out.squeeze(0); // remove batch dim if there's only one image
149+
150+
DGifCloseFile(gifFile, &error);
151+
TORCH_CHECK(error == D_GIF_SUCCEEDED, "DGifCloseFile() failed - ", error);
152+
153+
return out;
154+
}
155+
156+
} // namespace image
157+
} // namespace vision
+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#pragma once
2+
3+
#include <torch/types.h>
4+
5+
namespace vision {
6+
namespace image {
7+
8+
// encoded_data tensor must be 1D uint8 and contiguous
9+
C10_EXPORT torch::Tensor decode_gif(const torch::Tensor& encoded_data);
10+
11+
} // namespace image
12+
} // namespace vision

torchvision/csrc/io/image/cpu/decode_image.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "decode_image.h"
22

3+
#include "decode_gif.h"
34
#include "decode_jpeg.h"
45
#include "decode_png.h"
56

@@ -23,16 +24,24 @@ torch::Tensor decode_image(
2324

2425
const uint8_t jpeg_signature[3] = {255, 216, 255}; // == "\xFF\xD8\xFF"
2526
const uint8_t png_signature[4] = {137, 80, 78, 71}; // == "\211PNG"
27+
const uint8_t gif_signature_1[6] = {
28+
0x47, 0x49, 0x46, 0x38, 0x39, 0x61}; // == "GIF89a"
29+
const uint8_t gif_signature_2[6] = {
30+
0x47, 0x49, 0x46, 0x38, 0x37, 0x61}; // == "GIF87a"
2631

2732
if (memcmp(jpeg_signature, datap, 3) == 0) {
2833
return decode_jpeg(data, mode, apply_exif_orientation);
2934
} else if (memcmp(png_signature, datap, 4) == 0) {
3035
return decode_png(
3136
data, mode, /*allow_16_bits=*/false, apply_exif_orientation);
37+
} else if (
38+
memcmp(gif_signature_1, datap, 6) == 0 ||
39+
memcmp(gif_signature_2, datap, 6) == 0) {
40+
return decode_gif(data);
3241
} else {
3342
TORCH_CHECK(
3443
false,
35-
"Unsupported image file. Only jpeg and png ",
44+
"Unsupported image file. Only jpeg, png and gif ",
3645
"are currently supported.");
3746
}
3847
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
These files come from the GIFLIB project (https://giflib.sourceforge.net/) and
2+
are licensed under the MIT license.
3+
4+
Some modifications have been made to the original files:
5+
- Remove use of "register" keyword in gifalloc.c for C++17 compatibility.
6+
- Declare loop variable i in DGifGetImageHeader as int instead of unsigned int.
7+
8+
Below is the original license text from the COPYING file of the GIFLIB project:
9+
10+
= MIT LICENSE
11+
12+
Permission is hereby granted, free of charge, to any person obtaining a copy
13+
of this software and associated documentation files (the "Software"), to deal
14+
in the Software without restriction, including without limitation the rights
15+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16+
copies of the Software, and to permit persons to whom the Software is
17+
furnished to do so, subject to the following conditions:
18+
19+
The above copyright notice and this permission notice shall be included in
20+
all copies or substantial portions of the Software.
21+
22+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28+
THE SOFTWARE.

0 commit comments

Comments
 (0)