Skip to content

Commit 218fc58

Browse files
authored
Merge branch 'main' into main
2 parents f1aa311 + 778ce48 commit 218fc58

File tree

9 files changed

+75
-59
lines changed

9 files changed

+75
-59
lines changed

.github/scripts/setup-env.sh

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,6 @@ case $(uname) in
2222
;;
2323
esac
2424

25-
if [[ "${OS_TYPE}" == "macos" && $(uname -m) == x86_64 ]]; then
26-
echo '::group::Uninstall system JPEG libraries on macOS'
27-
# The x86 macOS runners, e.g. the GitHub Actions native "macos-12" runner, has some JPEG and PNG libraries
28-
# installed by default that interfere with our build. We uninstall them here and use the one from conda below.
29-
IMAGE_LIBS=$(brew list | grep -E "jpeg|png")
30-
for lib in $IMAGE_LIBS; do
31-
brew uninstall --ignore-dependencies --force "${lib}"
32-
done
33-
echo '::endgroup::'
34-
fi
35-
3625
echo '::group::Create build environment'
3726
# See https://github.com/pytorch/vision/issues/7296 for ffmpeg
3827
conda create \

.github/workflows/build-cmake.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ jobs:
4040
strategy:
4141
matrix:
4242
include:
43-
- runner: macos-12
4443
- runner: macos-m1-stable
4544
fail-fast: false
4645
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main

.github/workflows/tests.yml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,11 @@ jobs:
5353
- "3.10"
5454
- "3.11"
5555
- "3.12"
56-
runner: ["macos-12"]
57-
include:
58-
- python-version: "3.8"
59-
runner: macos-m1-stable
56+
runner: ["macos-m1-stable"]
6057
fail-fast: false
6158
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
6259
with:
6360
repository: pytorch/vision
64-
# We need an increased timeout here, since the macos-12 runner is the free one from GH
65-
# and needs roughly 2 hours to just run the test suite
6661
timeout: 240
6762
runner: ${{ matrix.runner }}
6863
test-infra-ref: main

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,7 @@ def run(self):
554554
zip_safe=False,
555555
install_requires=requirements,
556556
extras_require={
557+
"gdown": ["gdown>=4.7.3"],
557558
"scipy": ["scipy"],
558559
},
559560
ext_modules=get_extensions(),

test/test_image.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,9 @@ def test_pathlib_support(tmpdir):
551551
write_png(img, write_path)
552552

553553

554-
@pytest.mark.parametrize("name", ("gifgrid", "fire", "porsche", "treescap", "treescap-interlaced", "solid2", "x-trans"))
554+
@pytest.mark.parametrize(
555+
"name", ("gifgrid", "fire", "porsche", "treescap", "treescap-interlaced", "solid2", "x-trans", "earth")
556+
)
555557
@pytest.mark.parametrize("scripted", (True, False))
556558
def test_decode_gif(tmpdir, name, scripted):
557559
# Using test images from GIFLIB
@@ -560,9 +562,16 @@ def test_decode_gif(tmpdir, name, scripted):
560562
# We're not testing against "welcome2" because PIL and GIFLIB disagee on what
561563
# the background color should be (likely a difference in the way they handle
562564
# transparency?)
565+
# 'earth' image is from wikipedia, licensed under CC BY-SA 3.0
566+
# https://creativecommons.org/licenses/by-sa/3.0/
567+
# it allows to properly test for transparency, TOP-LEFT offsets, and
568+
# disposal modes.
563569

564570
path = tmpdir / f"{name}.gif"
565-
url = f"https://sourceforge.net/p/giflib/code/ci/master/tree/pic/{name}.gif?format=raw"
571+
if name == "earth":
572+
url = "https://upload.wikimedia.org/wikipedia/commons/2/2c/Rotating_earth_%28large%29.gif"
573+
else:
574+
url = f"https://sourceforge.net/p/giflib/code/ci/master/tree/pic/{name}.gif?format=raw"
566575
with open(path, "wb") as f:
567576
f.write(requests.get(url).content)
568577

test/test_utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,13 @@ def test_draw_keypoints_vanilla():
355355
assert_equal(img, img_cp)
356356

357357

358+
def test_draw_keypoins_K_equals_one():
359+
# Non-regression test for https://github.com/pytorch/vision/pull/8439
360+
img = torch.full((3, 100, 100), 0, dtype=torch.uint8)
361+
keypoints = torch.tensor([[[10, 10]]], dtype=torch.float)
362+
utils.draw_keypoints(img, keypoints)
363+
364+
358365
@pytest.mark.parametrize("colors", ["red", "#FF00FF", (1, 34, 122)])
359366
def test_draw_keypoints_colored(colors):
360367
# Keypoints is declared on top as global variable

torchvision/csrc/io/image/cpu/decode_gif.cpp

Lines changed: 50 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -86,32 +86,19 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) {
8686
// This check should already done within DGifSlurp(), just to be safe
8787
TORCH_CHECK(num_images > 0, "GIF file should contain at least one image!");
8888

89-
// Note:
90-
// The GIF format has this notion of "canvas" and "canvas size", where each
91-
// image could be displayed on the canvas at different offsets, forming a
92-
// mosaic/picture wall like so:
93-
//
94-
// <--- canvas W --->
95-
// ------------------------ ^
96-
// | | | |
97-
// | img1 | img3 | |
98-
// | |------------| canvas H
99-
// |---------- | |
100-
// | img2 | img4 | |
101-
// | | | |
102-
// ------------------------ v
103-
// The GifLib docs indicate that this is mostly vestigial
104-
// (https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html), and
105-
// modern viewers ignore the canvas size as well as image offsets. Hence,
106-
// we're ignoring that too:
107-
// - We're ignoring the canvas width and height and assume that the shape of
108-
// the canvas and of all images is the shape of the first image.
109-
// - We're enforcing that all images have the same shape.
110-
// - Left and Top offsets of each image are ignored as well and assumed to be
111-
// 0.
112-
113-
auto out_h = gifFile->SavedImages[0].ImageDesc.Height;
114-
auto out_w = gifFile->SavedImages[0].ImageDesc.Width;
89+
GifColorType bg = {0, 0, 0};
90+
if (gifFile->SColorMap) {
91+
bg = gifFile->SColorMap->Colors[gifFile->SBackGroundColor];
92+
}
93+
94+
// The GIFLIB docs say that the canvas's height and width are potentially
95+
// ignored by modern viewers, so to be on the safe side we set the output
96+
// height to max(canvas_heigh, first_image_height). Same for width.
97+
// https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html
98+
auto out_h =
99+
std::max(gifFile->SHeight, gifFile->SavedImages[0].ImageDesc.Height);
100+
auto out_w =
101+
std::max(gifFile->SWidth, gifFile->SavedImages[0].ImageDesc.Width);
115102

116103
// We output a channels-last tensor for consistency with other image decoders.
117104
// Torchvision's resize tends to be is faster on uint8 channels-last tensors.
@@ -121,30 +108,59 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) {
121108
auto out = torch::empty(
122109
{int64_t(num_images), 3, int64_t(out_h), int64_t(out_w)}, options);
123110
auto out_a = out.accessor<uint8_t, 4>();
124-
125111
for (int i = 0; i < num_images; i++) {
126112
const SavedImage& img = gifFile->SavedImages[i];
127-
const GifImageDesc& desc = img.ImageDesc;
128-
TORCH_CHECK(
129-
desc.Width == out_w && desc.Height == out_h,
130-
"All images in the gif should have the same dimensions.");
131113

114+
GraphicsControlBlock gcb;
115+
DGifSavedExtensionToGCB(gifFile, i, &gcb);
116+
117+
const GifImageDesc& desc = img.ImageDesc;
132118
const ColorMapObject* cmap =
133119
desc.ColorMap ? desc.ColorMap : gifFile->SColorMap;
134120
TORCH_CHECK(
135121
cmap != nullptr,
136122
"Global and local color maps are missing. This should never happen!");
137123

124+
// When going from one image to another, there is a "disposal method" which
125+
// specifies how to handle the transition. E.g. DISPOSE_DO_NOT means that
126+
// the current image should essentially be drawn on top of the previous
127+
// canvas. The pixels of that previous canvas will appear on the new one if
128+
// either:
129+
// - a pixel is transparent in the current image
130+
// - the current image is smaller than the canvas, hence exposing its pixels
131+
// The "background" disposal method means that the current canvas should be
132+
// set to the background color.
133+
// We only support these 2 modes and default to "background" when the
134+
// disposal method is unspecified, or when it's set to "DISPOSE_PREVIOUS"
135+
// which according to GIFLIB is not widely supported.
136+
// (https://giflib.sourceforge.net/whatsinagif/animation_and_transparency.html).
137+
if (i > 0 && gcb.DisposalMode == DISPOSE_DO_NOT) {
138+
out[i] = out[i - 1];
139+
} else {
140+
// Background. If bg wasn't defined, it will be (0, 0, 0)
141+
for (int h = 0; h < gifFile->SHeight; h++) {
142+
for (int w = 0; w < gifFile->SWidth; w++) {
143+
out_a[i][0][h][w] = bg.Red;
144+
out_a[i][1][h][w] = bg.Green;
145+
out_a[i][2][h][w] = bg.Blue;
146+
}
147+
}
148+
}
149+
138150
for (int h = 0; h < desc.Height; h++) {
139151
for (int w = 0; w < desc.Width; w++) {
140152
auto c = img.RasterBits[h * desc.Width + w];
153+
if (c == gcb.TransparentColor) {
154+
continue;
155+
}
141156
GifColorType rgb = cmap->Colors[c];
142-
out_a[i][0][h][w] = rgb.Red;
143-
out_a[i][1][h][w] = rgb.Green;
144-
out_a[i][2][h][w] = rgb.Blue;
157+
out_a[i][0][h + desc.Top][w + desc.Left] = rgb.Red;
158+
out_a[i][1][h + desc.Top][w + desc.Left] = rgb.Green;
159+
out_a[i][2][h + desc.Top][w + desc.Left] = rgb.Blue;
145160
}
146161
}
147162
}
163+
148164
out = out.squeeze(0); // remove batch dim if there's only one image
149165

150166
DGifCloseFile(gifFile, &error);

torchvision/transforms/v2/functional/_color.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,7 @@ def permute_channels(inpt: torch.Tensor, permutation: List[int]) -> torch.Tensor
687687
688688
Example:
689689
>>> rgb_image = torch.rand(3, 256, 256)
690-
>>> bgr_image = F.permutate_channels(rgb_image, permutation=[2, 1, 0])
690+
>>> bgr_image = F.permute_channels(rgb_image, permutation=[2, 1, 0])
691691
692692
Args:
693693
permutation (List[int]): Valid permutation of the input channel indices. The index of the element determines the

torchvision/utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -392,10 +392,10 @@ def draw_keypoints(
392392
# validate visibility
393393
if visibility is None: # set default
394394
visibility = torch.ones(keypoints.shape[:-1], dtype=torch.bool)
395-
# If the last dimension is 1, e.g., after calling split([2, 1], dim=-1) on the output of a keypoint-prediction
396-
# model, make sure visibility has shape (num_instances, K).
397-
# Iff K = 1, this has unwanted behavior, but K=1 does not really make sense in the first place.
398-
visibility = visibility.squeeze(-1)
395+
if visibility.ndim == 3:
396+
# If visibility was passed as pred.split([2, 1], dim=-1), it will be of shape (num_instances, K, 1).
397+
# We make sure it is of shape (num_instances, K). This isn't documented, we're just being nice.
398+
visibility = visibility.squeeze(-1)
399399
if visibility.ndim != 2:
400400
raise ValueError(f"visibility must be of shape (num_instances, K). Got ndim={visibility.ndim}")
401401
if visibility.shape != keypoints.shape[:-1]:

0 commit comments

Comments
 (0)