|
| 1 | +#include "decode_gif.h" |
| 2 | +#include <cstring> |
| 3 | +#include "giflib/gif_lib.h" |
| 4 | + |
| 5 | +namespace vision { |
| 6 | +namespace image { |
| 7 | + |
| 8 | +typedef struct reader_helper_t { |
| 9 | + uint8_t const* encoded_data; // input tensor data pointer |
| 10 | + size_t encoded_data_size; // size of input tensor in bytes |
| 11 | + size_t num_bytes_read; // number of bytes read so far in the tensor |
| 12 | +} reader_helper_t; |
| 13 | + |
| 14 | +// That function is used by GIFLIB routines to read the encoded bytes. |
| 15 | +// This reads `len` bytes and writes them into `buf`. The data is read from the |
| 16 | +// input tensor passed to decode_gif() starting at the `num_bytes_read` |
| 17 | +// position. |
| 18 | +int read_from_tensor(GifFileType* gifFile, GifByteType* buf, int len) { |
| 19 | + // the UserData field was set in DGifOpen() |
| 20 | + reader_helper_t* reader_helper = |
| 21 | + static_cast<reader_helper_t*>(gifFile->UserData); |
| 22 | + |
| 23 | + size_t num_bytes_to_read = std::min( |
| 24 | + (size_t)len, |
| 25 | + reader_helper->encoded_data_size - reader_helper->num_bytes_read); |
| 26 | + std::memcpy( |
| 27 | + buf, reader_helper->encoded_data + reader_helper->num_bytes_read, len); |
| 28 | + reader_helper->num_bytes_read += num_bytes_to_read; |
| 29 | + return num_bytes_to_read; |
| 30 | +} |
| 31 | + |
| 32 | +torch::Tensor decode_gif(const torch::Tensor& encoded_data) { |
| 33 | + // LibGif docs: https://giflib.sourceforge.net/intro.html |
| 34 | + // Refer over there for more details on the libgif API, API ref, and a |
| 35 | + // detailed description of the GIF format. |
| 36 | + |
| 37 | + TORCH_CHECK(encoded_data.is_contiguous(), "Input tensor must be contiguous."); |
| 38 | + TORCH_CHECK( |
| 39 | + encoded_data.dtype() == torch::kU8, |
| 40 | + "Input tensor must have uint8 data type, got ", |
| 41 | + encoded_data.dtype()); |
| 42 | + TORCH_CHECK( |
| 43 | + encoded_data.dim() == 1, |
| 44 | + "Input tensor must be 1-dimensional, got ", |
| 45 | + encoded_data.dim(), |
| 46 | + " dims."); |
| 47 | + |
| 48 | + int error = D_GIF_SUCCEEDED; |
| 49 | + |
| 50 | + // We're using DGidOpen. The other entrypoints of libgif are |
| 51 | + // DGifOpenFileName and DGifOpenFileHandle but we don't want to use those, |
| 52 | + // since we need to read the encoded bytes from a tensor of encoded bytes, not |
| 53 | + // from a file (for consistency with existing jpeg and png decoders). Using |
| 54 | + // DGifOpen is the only way to read from a custom source. |
| 55 | + // For that we need to provide a reader function `read_from_tensor` that |
| 56 | + // reads from the tensor, and we have to keep track of the number of bytes |
| 57 | + // read so far: this is why we need the reader_helper struct. |
| 58 | + |
| 59 | + // TODO: We are potentially doing an unnecessary copy of the encoded bytes: |
| 60 | + // - 1 copy in from file to tensor (in read_file()) |
| 61 | + // - 1 copy from tensor to GIFLIB buffers (in read_from_tensor()) |
| 62 | + // Since we're vendoring GIFLIB we can potentially modify the calls to |
| 63 | + // InternalRead() and just set the `buf` pointer to the tensor data directly. |
| 64 | + // That might even save allocation of those buffers. |
| 65 | + // If we do that, we'd have to make sure the buffers are never written to by |
| 66 | + // GIFLIB, otherwise we'd be overridding the tensor data. |
| 67 | + reader_helper_t reader_helper; |
| 68 | + reader_helper.encoded_data = encoded_data.data_ptr<uint8_t>(); |
| 69 | + reader_helper.encoded_data_size = encoded_data.numel(); |
| 70 | + reader_helper.num_bytes_read = 0; |
| 71 | + GifFileType* gifFile = |
| 72 | + DGifOpen(static_cast<void*>(&reader_helper), read_from_tensor, &error); |
| 73 | + |
| 74 | + TORCH_CHECK( |
| 75 | + (gifFile != nullptr) && (error == D_GIF_SUCCEEDED), |
| 76 | + "DGifOpenFileName() failed - ", |
| 77 | + error); |
| 78 | + |
| 79 | + if (DGifSlurp(gifFile) == GIF_ERROR) { |
| 80 | + auto gifFileError = gifFile->Error; |
| 81 | + DGifCloseFile(gifFile, &error); |
| 82 | + TORCH_CHECK(false, "DGifSlurp() failed - ", gifFileError); |
| 83 | + } |
| 84 | + auto num_images = gifFile->ImageCount; |
| 85 | + |
| 86 | + // This check should already done within DGifSlurp(), just to be safe |
| 87 | + TORCH_CHECK(num_images > 0, "GIF file should contain at least one image!"); |
| 88 | + |
| 89 | + // Note: |
| 90 | + // The GIF format has this notion of "canvas" and "canvas size", where each |
| 91 | + // image could be displayed on the canvas at different offsets, forming a |
| 92 | + // mosaic/picture wall like so: |
| 93 | + // |
| 94 | + // <--- canvas W ---> |
| 95 | + // ------------------------ ^ |
| 96 | + // | | | | |
| 97 | + // | img1 | img3 | | |
| 98 | + // | |------------| canvas H |
| 99 | + // |---------- | | |
| 100 | + // | img2 | img4 | | |
| 101 | + // | | | | |
| 102 | + // ------------------------ v |
| 103 | + // The GifLib docs indicate that this is mostly vestigial |
| 104 | + // (https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html), and |
| 105 | + // modern viewers ignore the canvas size as well as image offsets. Hence, |
| 106 | + // we're ignoring that too: |
| 107 | + // - We're ignoring the canvas width and height and assume that the shape of |
| 108 | + // the canvas and of all images is the shape of the first image. |
| 109 | + // - We're enforcing that all images have the same shape. |
| 110 | + // - Left and Top offsets of each image are ignored as well and assumed to be |
| 111 | + // 0. |
| 112 | + |
| 113 | + auto out_h = gifFile->SavedImages[0].ImageDesc.Height; |
| 114 | + auto out_w = gifFile->SavedImages[0].ImageDesc.Width; |
| 115 | + |
| 116 | + // We output a channels-last tensor for consistency with other image decoders. |
| 117 | + // Torchvision's resize tends to be is faster on uint8 channels-last tensors. |
| 118 | + auto options = torch::TensorOptions() |
| 119 | + .dtype(torch::kU8) |
| 120 | + .memory_format(torch::MemoryFormat::ChannelsLast); |
| 121 | + auto out = torch::empty( |
| 122 | + {int64_t(num_images), 3, int64_t(out_h), int64_t(out_w)}, options); |
| 123 | + auto out_a = out.accessor<uint8_t, 4>(); |
| 124 | + |
| 125 | + for (int i = 0; i < num_images; i++) { |
| 126 | + const SavedImage& img = gifFile->SavedImages[i]; |
| 127 | + const GifImageDesc& desc = img.ImageDesc; |
| 128 | + TORCH_CHECK( |
| 129 | + desc.Width == out_w && desc.Height == out_h, |
| 130 | + "All images in the gif should have the same dimensions."); |
| 131 | + |
| 132 | + const ColorMapObject* cmap = |
| 133 | + desc.ColorMap ? desc.ColorMap : gifFile->SColorMap; |
| 134 | + TORCH_CHECK( |
| 135 | + cmap != nullptr, |
| 136 | + "Global and local color maps are missing. This should never happen!"); |
| 137 | + |
| 138 | + for (int h = 0; h < desc.Height; h++) { |
| 139 | + for (int w = 0; w < desc.Width; w++) { |
| 140 | + auto c = img.RasterBits[h * desc.Width + w]; |
| 141 | + GifColorType rgb = cmap->Colors[c]; |
| 142 | + out_a[i][0][h][w] = rgb.Red; |
| 143 | + out_a[i][1][h][w] = rgb.Green; |
| 144 | + out_a[i][2][h][w] = rgb.Blue; |
| 145 | + } |
| 146 | + } |
| 147 | + } |
| 148 | + out = out.squeeze(0); // remove batch dim if there's only one image |
| 149 | + |
| 150 | + DGifCloseFile(gifFile, &error); |
| 151 | + TORCH_CHECK(error == D_GIF_SUCCEEDED, "DGifCloseFile() failed - ", error); |
| 152 | + |
| 153 | + return out; |
| 154 | +} |
| 155 | + |
| 156 | +} // namespace image |
| 157 | +} // namespace vision |
0 commit comments