Merge branch 'main' into main

NicolasHug · web-flow · commit 218fc58afd86 · 2024-05-28T13:37:53.000+01:00
diff --git a/.github/scripts/setup-env.sh b/.github/scripts/setup-env.sh
@@ -22,17 +22,6 @@ case $(uname) in
     ;;
 esac
 
-if [[ "${OS_TYPE}" == "macos" && $(uname -m) == x86_64 ]]; then
-  echo '::group::Uninstall system JPEG libraries on macOS'
-  # The x86 macOS runners, e.g. the GitHub Actions native "macos-12" runner, has some JPEG and PNG libraries
-  # installed by default that interfere with our build. We uninstall them here and use the one from conda below.
-  IMAGE_LIBS=$(brew list | grep -E "jpeg|png")
-  for lib in $IMAGE_LIBS; do
-    brew uninstall --ignore-dependencies --force "${lib}"
-  done
-  echo '::endgroup::'
-fi
-
 echo '::group::Create build environment'
 # See https://github.com/pytorch/vision/issues/7296 for ffmpeg
 conda create \
diff --git a/.github/workflows/build-cmake.yml b/.github/workflows/build-cmake.yml
@@ -40,7 +40,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - runner: macos-12
           - runner: macos-m1-stable
       fail-fast: false
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -53,16 +53,11 @@ jobs:
           - "3.10"
           - "3.11"
           - "3.12"
-        runner: ["macos-12"]
-        include:
-          - python-version: "3.8"
-            runner: macos-m1-stable
+        runner: ["macos-m1-stable"]
       fail-fast: false
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
       repository: pytorch/vision
-      # We need an increased timeout here, since the macos-12 runner is the free one from GH
-      # and needs roughly 2 hours to just run the test suite
       timeout: 240
       runner: ${{ matrix.runner }}
       test-infra-ref: main
diff --git a/setup.py b/setup.py
@@ -554,6 +554,7 @@ def run(self):
         zip_safe=False,
         install_requires=requirements,
         extras_require={
+            "gdown": ["gdown>=4.7.3"],
             "scipy": ["scipy"],
         },
         ext_modules=get_extensions(),
diff --git a/test/test_image.py b/test/test_image.py
@@ -551,7 +551,9 @@ def test_pathlib_support(tmpdir):
     write_png(img, write_path)
 
 
-@pytest.mark.parametrize("name", ("gifgrid", "fire", "porsche", "treescap", "treescap-interlaced", "solid2", "x-trans"))
+@pytest.mark.parametrize(
+    "name", ("gifgrid", "fire", "porsche", "treescap", "treescap-interlaced", "solid2", "x-trans", "earth")
+)
 @pytest.mark.parametrize("scripted", (True, False))
 def test_decode_gif(tmpdir, name, scripted):
     # Using test images from GIFLIB
@@ -560,9 +562,16 @@ def test_decode_gif(tmpdir, name, scripted):
     # We're not testing against "welcome2" because PIL and GIFLIB disagee on what
     # the background color should be (likely a difference in the way they handle
     # transparency?)
+    # 'earth' image is from wikipedia, licensed under CC BY-SA 3.0
+    # https://creativecommons.org/licenses/by-sa/3.0/
+    # it allows to properly test for transparency, TOP-LEFT offsets, and
+    # disposal modes.
 
     path = tmpdir / f"{name}.gif"
-    url = f"https://sourceforge.net/p/giflib/code/ci/master/tree/pic/{name}.gif?format=raw"
+    if name == "earth":
+        url = "https://upload.wikimedia.org/wikipedia/commons/2/2c/Rotating_earth_%28large%29.gif"
+    else:
+        url = f"https://sourceforge.net/p/giflib/code/ci/master/tree/pic/{name}.gif?format=raw"
     with open(path, "wb") as f:
         f.write(requests.get(url).content)
 
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -355,6 +355,13 @@ def test_draw_keypoints_vanilla():
     assert_equal(img, img_cp)
 
 
+def test_draw_keypoins_K_equals_one():
+    # Non-regression test for https://github.com/pytorch/vision/pull/8439
+    img = torch.full((3, 100, 100), 0, dtype=torch.uint8)
+    keypoints = torch.tensor([[[10, 10]]], dtype=torch.float)
+    utils.draw_keypoints(img, keypoints)
+
+
 @pytest.mark.parametrize("colors", ["red", "#FF00FF", (1, 34, 122)])
 def test_draw_keypoints_colored(colors):
     # Keypoints is declared on top as global variable
diff --git a/torchvision/csrc/io/image/cpu/decode_gif.cpp b/torchvision/csrc/io/image/cpu/decode_gif.cpp
@@ -86,32 +86,19 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) {
   // This check should already done within DGifSlurp(), just to be safe
   TORCH_CHECK(num_images > 0, "GIF file should contain at least one image!");
 
-  // Note:
-  // The GIF format has this notion of "canvas" and "canvas size", where each
-  // image could be displayed on the canvas at different offsets, forming a
-  // mosaic/picture wall like so:
-  //
-  // <---    canvas W    --->
-  // ------------------------     ^
-  // |         |            |     |
-  // |   img1  |    img3    |     |
-  // |         |------------|  canvas H
-  // |----------            |     |
-  // |   img2  |    img4    |     |
-  // |         |            |     |
-  // ------------------------     v
-  // The GifLib docs indicate that this is mostly vestigial
-  // (https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html), and
-  // modern viewers ignore the canvas size as well as image offsets. Hence,
-  // we're ignoring that too:
-  // - We're ignoring the canvas width and height and assume that the shape of
-  // the canvas and of all images is the shape of the first image.
-  // - We're enforcing that all images have the same shape.
-  // - Left and Top offsets of each image are ignored as well and assumed to be
-  // 0.
-
-  auto out_h = gifFile->SavedImages[0].ImageDesc.Height;
-  auto out_w = gifFile->SavedImages[0].ImageDesc.Width;
+  GifColorType bg = {0, 0, 0};
+  if (gifFile->SColorMap) {
+    bg = gifFile->SColorMap->Colors[gifFile->SBackGroundColor];
+  }
+
+  // The GIFLIB docs say that the canvas's height and width are potentially
+  // ignored by modern viewers, so to be on the safe side we set the output
+  // height to max(canvas_heigh, first_image_height). Same for width.
+  // https://giflib.sourceforge.net/whatsinagif/bits_and_bytes.html
+  auto out_h =
+      std::max(gifFile->SHeight, gifFile->SavedImages[0].ImageDesc.Height);
+  auto out_w =
+      std::max(gifFile->SWidth, gifFile->SavedImages[0].ImageDesc.Width);
 
   // We output a channels-last tensor for consistency with other image decoders.
   // Torchvision's resize tends to be is faster on uint8 channels-last tensors.
@@ -121,30 +108,59 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) {
   auto out = torch::empty(
       {int64_t(num_images), 3, int64_t(out_h), int64_t(out_w)}, options);
   auto out_a = out.accessor<uint8_t, 4>();
-
   for (int i = 0; i < num_images; i++) {
     const SavedImage& img = gifFile->SavedImages[i];
-    const GifImageDesc& desc = img.ImageDesc;
-    TORCH_CHECK(
-        desc.Width == out_w && desc.Height == out_h,
-        "All images in the gif should have the same dimensions.");
 
+    GraphicsControlBlock gcb;
+    DGifSavedExtensionToGCB(gifFile, i, &gcb);
+
+    const GifImageDesc& desc = img.ImageDesc;
     const ColorMapObject* cmap =
         desc.ColorMap ? desc.ColorMap : gifFile->SColorMap;
     TORCH_CHECK(
         cmap != nullptr,
         "Global and local color maps are missing. This should never happen!");
 
+    // When going from one image to another, there is a "disposal method" which
+    // specifies how to handle the transition. E.g. DISPOSE_DO_NOT means that
+    // the current image should essentially be drawn on top of the previous
+    // canvas. The pixels of that previous canvas will appear on the new one if
+    // either:
+    // - a pixel is transparent in the current image
+    // - the current image is smaller than the canvas, hence exposing its pixels
+    // The "background" disposal method means that the current canvas should be
+    // set to the background color.
+    // We only support these 2 modes and default to "background" when the
+    // disposal method is unspecified, or when it's set to "DISPOSE_PREVIOUS"
+    // which according to GIFLIB is not widely supported.
+    // (https://giflib.sourceforge.net/whatsinagif/animation_and_transparency.html).
+    if (i > 0 && gcb.DisposalMode == DISPOSE_DO_NOT) {
+      out[i] = out[i - 1];
+    } else {
+      // Background. If bg wasn't defined, it will be (0, 0, 0)
+      for (int h = 0; h < gifFile->SHeight; h++) {
+        for (int w = 0; w < gifFile->SWidth; w++) {
+          out_a[i][0][h][w] = bg.Red;
+          out_a[i][1][h][w] = bg.Green;
+          out_a[i][2][h][w] = bg.Blue;
+        }
+      }
+    }
+
     for (int h = 0; h < desc.Height; h++) {
       for (int w = 0; w < desc.Width; w++) {
         auto c = img.RasterBits[h * desc.Width + w];
+        if (c == gcb.TransparentColor) {
+          continue;
+        }
         GifColorType rgb = cmap->Colors[c];
-        out_a[i][0][h][w] = rgb.Red;
-        out_a[i][1][h][w] = rgb.Green;
-        out_a[i][2][h][w] = rgb.Blue;
+        out_a[i][0][h + desc.Top][w + desc.Left] = rgb.Red;
+        out_a[i][1][h + desc.Top][w + desc.Left] = rgb.Green;
+        out_a[i][2][h + desc.Top][w + desc.Left] = rgb.Blue;
       }
     }
   }
+
   out = out.squeeze(0); // remove batch dim if there's only one image
 
   DGifCloseFile(gifFile, &error);
diff --git a/torchvision/transforms/v2/functional/_color.py b/torchvision/transforms/v2/functional/_color.py
@@ -687,7 +687,7 @@ def permute_channels(inpt: torch.Tensor, permutation: List[int]) -> torch.Tensor
 
     Example:
         >>> rgb_image = torch.rand(3, 256, 256)
-        >>> bgr_image = F.permutate_channels(rgb_image, permutation=[2, 1, 0])
+        >>> bgr_image = F.permute_channels(rgb_image, permutation=[2, 1, 0])
 
     Args:
         permutation (List[int]): Valid permutation of the input channel indices. The index of the element determines the
diff --git a/torchvision/utils.py b/torchvision/utils.py
@@ -392,10 +392,10 @@ def draw_keypoints(
     # validate visibility
     if visibility is None:  # set default
         visibility = torch.ones(keypoints.shape[:-1], dtype=torch.bool)
-    # If the last dimension is 1, e.g., after calling split([2, 1], dim=-1) on the output of a keypoint-prediction
-    # model, make sure visibility has shape (num_instances, K).
-    # Iff K = 1, this has unwanted behavior, but K=1 does not really make sense in the first place.
-    visibility = visibility.squeeze(-1)
+    if visibility.ndim == 3:
+        # If visibility was passed as pred.split([2, 1], dim=-1), it will be of shape (num_instances, K, 1).
+        # We make sure it is of shape (num_instances, K). This isn't documented, we're just being nice.
+        visibility = visibility.squeeze(-1)
     if visibility.ndim != 2:
         raise ValueError(f"visibility must be of shape (num_instances, K). Got ndim={visibility.ndim}")
     if visibility.shape != keypoints.shape[:-1]: