From 3d948dc547ab569fe986471ff10aa0cb8dfd95b1 Mon Sep 17 00:00:00 2001
From: frgfm <fgfm03@hotmail.fr>
Date: Tue, 13 Jul 2021 01:15:51 +0200
Subject: [PATCH 1/7] style: Fixed last missing typing annotation

---
 torchvision/datasets/video_utils.py | 67 +++++++++++++++--------------
 1 file changed, 34 insertions(+), 33 deletions(-)

diff --git a/torchvision/datasets/video_utils.py b/torchvision/datasets/video_utils.py
index 987270c4cd4..7c0a100bb22 100644
--- a/torchvision/datasets/video_utils.py
+++ b/torchvision/datasets/video_utils.py
@@ -2,7 +2,7 @@
 import math
 import warnings
 from fractions import Fraction
-from typing import List
+from typing import Any, Dict, List, Optional
 
 import torch
 from torchvision.io import (
@@ -10,6 +10,7 @@
     _read_video_from_file,
     read_video,
     read_video_timestamps,
+    VideoMetaData,
 )
 
 from .utils import tqdm
@@ -27,7 +28,7 @@ def pts_convert(pts, timebase_from, timebase_to, round_func=math.floor):
     return round_func(new_pts)
 
 
-def unfold(tensor, size, step, dilation=1):
+def unfold(tensor: Tensor, size: int, step: int, dilation=1) -> List[int]:
     """
     similar to tensor.unfold, but with the dilation
     and specialized for 1d tensors
@@ -55,17 +56,17 @@ class _VideoTimestampsDataset(object):
     pickled when forking.
     """
 
-    def __init__(self, video_paths: List[str]):
+    def __init__(self, video_paths: List[str]) -> None:
         self.video_paths = video_paths
 
-    def __len__(self):
+    def __len__(self) -> int:
         return len(self.video_paths)
 
-    def __getitem__(self, idx):
+    def __getitem__(self, idx: int) -> Tuple[List[int], Optional[float]]:
         return read_video_timestamps(self.video_paths[idx])
 
 
-def _collate_fn(x):
+def _collate_fn(x: Any) -> Any:
     """
     Dummy collate function to be used with _VideoTimestampsDataset
     """
@@ -100,19 +101,19 @@ class VideoClips(object):
 
     def __init__(
         self,
-        video_paths,
-        clip_length_in_frames=16,
-        frames_between_clips=1,
-        frame_rate=None,
-        _precomputed_metadata=None,
-        num_workers=0,
-        _video_width=0,
-        _video_height=0,
-        _video_min_dimension=0,
-        _video_max_dimension=0,
-        _audio_samples=0,
-        _audio_channels=0,
-    ):
+        video_paths: List[str],
+        clip_length_in_frames: int = 16,
+        frames_between_clips: int = 1,
+        frame_rate: Optional[int] = None,
+        _precomputed_metadata: Optional[Dict[str, Any]] = None,
+        num_workers: int = 0,
+        _video_width: int = 0,
+        _video_height: int = 0,
+        _video_min_dimension: int = 0,
+        _video_max_dimension: int = 0,
+        _audio_samples: int = 0,
+        _audio_channels: int = 0,
+    ) -> None:
 
         self.video_paths = video_paths
         self.num_workers = num_workers
@@ -131,7 +132,7 @@ def __init__(
             self._init_from_metadata(_precomputed_metadata)
         self.compute_clips(clip_length_in_frames, frames_between_clips, frame_rate)
 
-    def _compute_frame_pts(self):
+    def _compute_frame_pts(self) -> None:
         self.video_pts = []
         self.video_fps = []
 
@@ -157,7 +158,7 @@ def _compute_frame_pts(self):
                 self.video_pts.extend(clips)
                 self.video_fps.extend(fps)
 
-    def _init_from_metadata(self, metadata):
+    def _init_from_metadata(self, metadata: Dict[str, Any]) -> None:
         self.video_paths = metadata["video_paths"]
         assert len(self.video_paths) == len(metadata["video_pts"])
         self.video_pts = metadata["video_pts"]
@@ -165,7 +166,7 @@ def _init_from_metadata(self, metadata):
         self.video_fps = metadata["video_fps"]
 
     @property
-    def metadata(self):
+    def metadata(self) -> Dict[str, Any]:
         _metadata = {
             "video_paths": self.video_paths,
             "video_pts": self.video_pts,
@@ -173,7 +174,7 @@ def metadata(self):
         }
         return _metadata
 
-    def subset(self, indices):
+    def subset(self, indices: List[int]) -> Any:
         video_paths = [self.video_paths[i] for i in indices]
         video_pts = [self.video_pts[i] for i in indices]
         video_fps = [self.video_fps[i] for i in indices]
@@ -198,7 +199,7 @@ def subset(self, indices):
         )
 
     @staticmethod
-    def compute_clips_for_video(video_pts, num_frames, step, fps, frame_rate):
+    def compute_clips_for_video(video_pts, num_frames, step, fps, frame_rate) -> Tuple[, List[int]]:
         if fps is None:
             # if for some reason the video doesn't have fps (because doesn't have a video stream)
             # set the fps to 1. The value doesn't matter, because video_pts is empty anyway
@@ -220,7 +221,7 @@ def compute_clips_for_video(video_pts, num_frames, step, fps, frame_rate):
             idxs = unfold(idxs, num_frames, step)
         return clips, idxs
 
-    def compute_clips(self, num_frames, step, frame_rate=None):
+    def compute_clips(self, num_frames: int, step, frame_rate: Optional[int] = None) -> None:
         """
         Compute all consecutive sequences of clips from video_pts.
         Always returns clips of size `num_frames`, meaning that the
@@ -245,19 +246,19 @@ def compute_clips(self, num_frames, step, frame_rate=None):
         clip_lengths = torch.as_tensor([len(v) for v in self.clips])
         self.cumulative_sizes = clip_lengths.cumsum(0).tolist()
 
-    def __len__(self):
+    def __len__(self) -> int:
         return self.num_clips()
 
-    def num_videos(self):
+    def num_videos(self) -> int:
         return len(self.video_paths)
 
-    def num_clips(self):
+    def num_clips(self) -> int:
         """
         Number of subclips that are available in the video list.
         """
         return self.cumulative_sizes[-1]
 
-    def get_clip_location(self, idx):
+    def get_clip_location(self, idx: int) -> Tuple[int, int]:
         """
         Converts a flattened representation of the indices into a video_idx, clip_idx
         representation.
@@ -270,7 +271,7 @@ def get_clip_location(self, idx):
         return video_idx, clip_idx
 
     @staticmethod
-    def _resample_video_idx(num_frames, original_fps, new_fps):
+    def _resample_video_idx(num_frames: int, original_fps: int, new_fps: int) -> Tensor:
         step = float(original_fps) / new_fps
         if step.is_integer():
             # optimization: if step is integer, don't need to perform
@@ -281,7 +282,7 @@ def _resample_video_idx(num_frames, original_fps, new_fps):
         idxs = idxs.floor().to(torch.int64)
         return idxs
 
-    def get_clip(self, idx):
+    def get_clip(self, idx: int) -> Tuple[Tensor, Tensor, VideoMetaData, int]:
         """
         Gets a subclip from a list of videos.
 
@@ -381,7 +382,7 @@ def get_clip(self, idx):
         )
         return video, audio, info, video_idx
 
-    def __getstate__(self):
+    def __getstate__(self) -> Dict[str, Any]:
         video_pts_sizes = [len(v) for v in self.video_pts]
         # To be back-compatible, we convert data to dtype torch.long as needed
         # because for empty list, in legacy implementation, torch.as_tensor will
@@ -409,7 +410,7 @@ def __getstate__(self):
         d["_version"] = 2
         return d
 
-    def __setstate__(self, d):
+    def __setstate__(self, d: Dict[str, Any]) -> None:
         # for backwards-compatibility
         if "_version" not in d:
             self.__dict__ = d

From ef8ef3b41d0215089c8797bd645169b1c14aca26 Mon Sep 17 00:00:00 2001
From: frgfm <fgfm03@hotmail.fr>
Date: Tue, 13 Jul 2021 11:47:40 +0200
Subject: [PATCH 2/7] style: Fixed typing

---
 torchvision/datasets/video_utils.py | 41 ++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/torchvision/datasets/video_utils.py b/torchvision/datasets/video_utils.py
index 7c0a100bb22..84d65df4247 100644
--- a/torchvision/datasets/video_utils.py
+++ b/torchvision/datasets/video_utils.py
@@ -2,7 +2,7 @@
 import math
 import warnings
 from fractions import Fraction
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Callable, Union, Tuple
 
 import torch
 from torchvision.io import (
@@ -16,7 +16,12 @@
 from .utils import tqdm
 
 
-def pts_convert(pts, timebase_from, timebase_to, round_func=math.floor):
+def pts_convert(
+    pts: int,
+    timebase_from: Fraction,
+    timebase_to: Fraction,
+    round_func: Callable = math.floor
+) -> float:
     """convert pts between different time bases
     Args:
         pts: presentation timestamp, float
@@ -28,7 +33,7 @@ def pts_convert(pts, timebase_from, timebase_to, round_func=math.floor):
     return round_func(new_pts)
 
 
-def unfold(tensor: Tensor, size: int, step: int, dilation=1) -> List[int]:
+def unfold(tensor: torch.Tensor, size: int, step: int, dilation: int = 1) -> torch.Tensor:
     """
     similar to tensor.unfold, but with the dilation
     and specialized for 1d tensors
@@ -199,7 +204,13 @@ def subset(self, indices: List[int]) -> Any:
         )
 
     @staticmethod
-    def compute_clips_for_video(video_pts, num_frames, step, fps, frame_rate) -> Tuple[, List[int]]:
+    def compute_clips_for_video(
+        video_pts: torch.Tensor,
+        num_frames: int,
+        step: int,
+        fps: int,
+        frame_rate: Optional[int] = None
+    ) -> Tuple[torch.Tensor, Union[List[slice], torch.Tensor]]:
         if fps is None:
             # if for some reason the video doesn't have fps (because doesn't have a video stream)
             # set the fps to 1. The value doesn't matter, because video_pts is empty anyway
@@ -221,7 +232,7 @@ def compute_clips_for_video(video_pts, num_frames, step, fps, frame_rate) -> Tup
             idxs = unfold(idxs, num_frames, step)
         return clips, idxs
 
-    def compute_clips(self, num_frames: int, step, frame_rate: Optional[int] = None) -> None:
+    def compute_clips(self, num_frames: int, step: int, frame_rate: Optional[int] = None) -> None:
         """
         Compute all consecutive sequences of clips from video_pts.
         Always returns clips of size `num_frames`, meaning that the
@@ -271,7 +282,7 @@ def get_clip_location(self, idx: int) -> Tuple[int, int]:
         return video_idx, clip_idx
 
     @staticmethod
-    def _resample_video_idx(num_frames: int, original_fps: int, new_fps: int) -> Tensor:
+    def _resample_video_idx(num_frames: int, original_fps: int, new_fps: int) -> Union[slice, torch.Tensor]:
         step = float(original_fps) / new_fps
         if step.is_integer():
             # optimization: if step is integer, don't need to perform
@@ -282,7 +293,7 @@ def _resample_video_idx(num_frames: int, original_fps: int, new_fps: int) -> Ten
         idxs = idxs.floor().to(torch.int64)
         return idxs
 
-    def get_clip(self, idx: int) -> Tuple[Tensor, Tensor, VideoMetaData, int]:
+    def get_clip(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any], int]:
         """
         Gets a subclip from a list of videos.
 
@@ -330,21 +341,21 @@ def get_clip(self, idx: int) -> Tuple[Tensor, Tensor, VideoMetaData, int]:
             end_pts = clip_pts[-1].item()
             video, audio, info = read_video(video_path, start_pts, end_pts)
         else:
-            info = _probe_video_from_file(video_path)
-            video_fps = info.video_fps
+            _info = _probe_video_from_file(video_path)
+            video_fps = _info.video_fps
             audio_fps = None
 
-            video_start_pts = clip_pts[0].item()
-            video_end_pts = clip_pts[-1].item()
+            video_start_pts: int = clip_pts[0].item()
+            video_end_pts: int = clip_pts[-1].item()
 
             audio_start_pts, audio_end_pts = 0, -1
             audio_timebase = Fraction(0, 1)
             video_timebase = Fraction(
-                info.video_timebase.numerator, info.video_timebase.denominator
+                _info.video_timebase.numerator, _info.video_timebase.denominator
             )
-            if info.has_audio:
+            if _info.has_audio:
                 audio_timebase = Fraction(
-                    info.audio_timebase.numerator, info.audio_timebase.denominator
+                    _info.audio_timebase.numerator, _info.audio_timebase.denominator
                 )
                 audio_start_pts = pts_convert(
                     video_start_pts, video_timebase, audio_timebase, math.floor
@@ -352,7 +363,7 @@ def get_clip(self, idx: int) -> Tuple[Tensor, Tensor, VideoMetaData, int]:
                 audio_end_pts = pts_convert(
                     video_end_pts, video_timebase, audio_timebase, math.ceil
                 )
-                audio_fps = info.audio_sample_rate
+                audio_fps = _info.audio_sample_rate
             video, audio, info = _read_video_from_file(
                 video_path,
                 video_width=self._video_width,

From c40bac5d04d873374f7876bee58d3dd912ef621a Mon Sep 17 00:00:00 2001
From: frgfm <fgfm03@hotmail.fr>
Date: Tue, 13 Jul 2021 12:01:00 +0200
Subject: [PATCH 3/7] style: Fixed remaining typing annotations

---
 torchvision/datasets/video_utils.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/torchvision/datasets/video_utils.py b/torchvision/datasets/video_utils.py
index 84d65df4247..501c78d8b7e 100644
--- a/torchvision/datasets/video_utils.py
+++ b/torchvision/datasets/video_utils.py
@@ -21,7 +21,7 @@ def pts_convert(
     timebase_from: Fraction,
     timebase_to: Fraction,
     round_func: Callable = math.floor
-) -> float:
+) -> int:
     """convert pts between different time bases
     Args:
         pts: presentation timestamp, float
@@ -145,8 +145,8 @@ def _compute_frame_pts(self) -> None:
         # so need to create a dummy dataset first
         import torch.utils.data
 
-        dl = torch.utils.data.DataLoader(
-            _VideoTimestampsDataset(self.video_paths),
+        dl: torch.utils.data.DataLoader = torch.utils.data.DataLoader(
+            _VideoTimestampsDataset(self.video_paths),  # type: ignore[arg-type]
             batch_size=16,
             num_workers=self.num_workers,
             collate_fn=_collate_fn,
@@ -227,10 +227,10 @@ def compute_clips_for_video(
             warnings.warn("There aren't enough frames in the current video to get a clip for the given clip length and "
                           "frames between clips. The video (and potentially others) will be skipped.")
         if isinstance(idxs, slice):
-            idxs = [idxs] * len(clips)
+            idxs = [idxs] * len(clips)  # type: ignore[assignment]
         else:
             idxs = unfold(idxs, num_frames, step)
-        return clips, idxs
+        return clips, idxs  # type: ignore[return-value]
 
     def compute_clips(self, num_frames: int, step: int, frame_rate: Optional[int] = None) -> None:
         """
@@ -345,8 +345,8 @@ def get_clip(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any]
             video_fps = _info.video_fps
             audio_fps = None
 
-            video_start_pts: int = clip_pts[0].item()
-            video_end_pts: int = clip_pts[-1].item()
+            video_start_pts = int(clip_pts[0].item())
+            video_end_pts = int(clip_pts[-1].item())
 
             audio_start_pts, audio_end_pts = 0, -1
             audio_timebase = Fraction(0, 1)
@@ -402,10 +402,10 @@ def __getstate__(self) -> Dict[str, Any]:
         video_pts = [x.to(torch.int64) for x in self.video_pts]
         # video_pts can be an empty list if no frames have been decoded
         if video_pts:
-            video_pts = torch.cat(video_pts)
+            video_pts = torch.cat(video_pts)  # type: ignore[assignment]
             # avoid bug in https://github.com/pytorch/pytorch/issues/32351
             # TODO: Revert it once the bug is fixed.
-            video_pts = video_pts.numpy()
+            video_pts = video_pts.numpy()  # type: ignore[attr-defined]
 
         # make a copy of the fields of self
         d = self.__dict__.copy()

From d2e85eaa1b14102a32166c863ee9f786b83d9041 Mon Sep 17 00:00:00 2001
From: frgfm <fgfm03@hotmail.fr>
Date: Fri, 30 Jul 2021 10:42:57 +0200
Subject: [PATCH 4/7] style: Fixed typing

---
 torchvision/datasets/video_utils.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/torchvision/datasets/video_utils.py b/torchvision/datasets/video_utils.py
index 501c78d8b7e..cd570773d2a 100644
--- a/torchvision/datasets/video_utils.py
+++ b/torchvision/datasets/video_utils.py
@@ -2,7 +2,7 @@
 import math
 import warnings
 from fractions import Fraction
-from typing import Any, Dict, List, Optional, Callable, Union, Tuple
+from typing import Any, Dict, List, Optional, Callable, Union, Tuple, TypeVar, cast
 
 import torch
 from torchvision.io import (
@@ -15,6 +15,8 @@
 
 from .utils import tqdm
 
+T = TypeVar("T")
+
 
 def pts_convert(
     pts: int,
@@ -71,7 +73,7 @@ def __getitem__(self, idx: int) -> Tuple[List[int], Optional[float]]:
         return read_video_timestamps(self.video_paths[idx])
 
 
-def _collate_fn(x: Any) -> Any:
+def _collate_fn(x: T) -> T:
     """
     Dummy collate function to be used with _VideoTimestampsDataset
     """
@@ -179,7 +181,7 @@ def metadata(self) -> Dict[str, Any]:
         }
         return _metadata
 
-    def subset(self, indices: List[int]) -> Any:
+    def subset(self, indices: List[int]) -> "VideoClips":
         video_paths = [self.video_paths[i] for i in indices]
         video_pts = [self.video_pts[i] for i in indices]
         video_fps = [self.video_fps[i] for i in indices]
@@ -218,19 +220,20 @@ def compute_clips_for_video(
         if frame_rate is None:
             frame_rate = fps
         total_frames = len(video_pts) * (float(frame_rate) / fps)
-        idxs = VideoClips._resample_video_idx(
+        _idxs = VideoClips._resample_video_idx(
             int(math.floor(total_frames)), fps, frame_rate
         )
-        video_pts = video_pts[idxs]
+        video_pts = video_pts[_idxs]
         clips = unfold(video_pts, num_frames, step)
         if not clips.numel():
             warnings.warn("There aren't enough frames in the current video to get a clip for the given clip length and "
                           "frames between clips. The video (and potentially others) will be skipped.")
-        if isinstance(idxs, slice):
-            idxs = [idxs] * len(clips)  # type: ignore[assignment]
+        idxs: Union[List[slice], torch.Tensor]
+        if isinstance(_idxs, slice):
+            idxs = [_idxs] * len(clips)
         else:
-            idxs = unfold(idxs, num_frames, step)
-        return clips, idxs  # type: ignore[return-value]
+            idxs = unfold(_idxs, num_frames, step)
+        return clips, idxs
 
     def compute_clips(self, num_frames: int, step: int, frame_rate: Optional[int] = None) -> None:
         """
@@ -345,8 +348,8 @@ def get_clip(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any]
             video_fps = _info.video_fps
             audio_fps = None
 
-            video_start_pts = int(clip_pts[0].item())
-            video_end_pts = int(clip_pts[-1].item())
+            video_start_pts = cast(int, clip_pts[0].item())
+            video_end_pts = cast(int, clip_pts[-1].item())
 
             audio_start_pts, audio_end_pts = 0, -1
             audio_timebase = Fraction(0, 1)

From a29a1c8302987a5d8e461ff0e4c351320c8c1e84 Mon Sep 17 00:00:00 2001
From: frgfm <fgfm03@hotmail.fr>
Date: Thu, 18 Nov 2021 10:16:01 +0100
Subject: [PATCH 5/7] style: Fixed typing

---
 torchvision/datasets/video_utils.py | 33 +++++++++++------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/torchvision/datasets/video_utils.py b/torchvision/datasets/video_utils.py
index 784ef78dd84..06a11f315cd 100644
--- a/torchvision/datasets/video_utils.py
+++ b/torchvision/datasets/video_utils.py
@@ -18,12 +18,7 @@
 T = TypeVar("T")
 
 
-def pts_convert(
-    pts: int,
-    timebase_from: Fraction,
-    timebase_to: Fraction,
-    round_func: Callable = math.floor
-) -> int:
+def pts_convert(pts: int, timebase_from: Fraction, timebase_to: Fraction, round_func: Callable = math.floor) -> int:
     """convert pts between different time bases
     Args:
         pts: presentation timestamp, float
@@ -207,11 +202,7 @@ def subset(self, indices: List[int]) -> "VideoClips":
 
     @staticmethod
     def compute_clips_for_video(
-        video_pts: torch.Tensor,
-        num_frames: int,
-        step: int,
-        fps: int,
-        frame_rate: Optional[int] = None
+        video_pts: torch.Tensor, num_frames: int, step: int, fps: int, frame_rate: Optional[int] = None
     ) -> Tuple[torch.Tensor, Union[List[slice], torch.Tensor]]:
         if fps is None:
             # if for some reason the video doesn't have fps (because doesn't have a video stream)
@@ -220,7 +211,7 @@ def compute_clips_for_video(
         if frame_rate is None:
             frame_rate = fps
         total_frames = len(video_pts) * (float(frame_rate) / fps)
-        idxs = VideoClips._resample_video_idx(int(math.floor(total_frames)), fps, frame_rate)
+        _idxs = VideoClips._resample_video_idx(int(math.floor(total_frames)), fps, frame_rate)
         video_pts = video_pts[idxs]
         clips = unfold(video_pts, num_frames, step)
         if not clips.numel():
@@ -228,11 +219,11 @@ def compute_clips_for_video(
                 "There aren't enough frames in the current video to get a clip for the given clip length and "
                 "frames between clips. The video (and potentially others) will be skipped."
             )
-        # idxs: Union[List[slice], torch.Tensor]
-        if isinstance(idxs, slice):
-            idxs = [idxs] * len(clips)
+        idxs: Union[List[slice], torch.Tensor]
+        if isinstance(_idxs, slice):
+            idxs = [_idxs] * len(clips)
         else:
-            idxs = unfold(idxs, num_frames, step)
+            idxs = unfold(_idxs, num_frames, step)
         return clips, idxs
 
     def compute_clips(self, num_frames: int, step: int, frame_rate: Optional[int] = None) -> None:
@@ -344,13 +335,13 @@ def get_clip(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, Any]
 
             audio_start_pts, audio_end_pts = 0, -1
             audio_timebase = Fraction(0, 1)
-            video_timebase = Fraction(info.video_timebase.numerator, info.video_timebase.denominator)
-            if info.has_audio:
-                audio_timebase = Fraction(info.audio_timebase.numerator, info.audio_timebase.denominator)
+            video_timebase = Fraction(_info.video_timebase.numerator, _info.video_timebase.denominator)
+            if _info.has_audio:
+                audio_timebase = Fraction(_info.audio_timebase.numerator, _info.audio_timebase.denominator)
                 audio_start_pts = pts_convert(video_start_pts, video_timebase, audio_timebase, math.floor)
                 audio_end_pts = pts_convert(video_end_pts, video_timebase, audio_timebase, math.ceil)
-                audio_fps = info.audio_sample_rate
-            video, audio, info = _read_video_from_file(
+                audio_fps = _info.audio_sample_rate
+            video, audio, _ = _read_video_from_file(
                 video_path,
                 video_width=self._video_width,
                 video_height=self._video_height,

From 01620d34099fc2bf42ff4211f3dcb24d23f47b3d Mon Sep 17 00:00:00 2001
From: frgfm <fgfm03@hotmail.fr>
Date: Thu, 18 Nov 2021 14:29:07 +0100
Subject: [PATCH 6/7] refactor: Removed unused import

---
 torchvision/datasets/video_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/torchvision/datasets/video_utils.py b/torchvision/datasets/video_utils.py
index 06a11f315cd..38677afd1a3 100644
--- a/torchvision/datasets/video_utils.py
+++ b/torchvision/datasets/video_utils.py
@@ -10,7 +10,6 @@
     _read_video_from_file,
     read_video,
     read_video_timestamps,
-    VideoMetaData,
 )
 
 from .utils import tqdm

From bad5f6185a8fdf9dffaebcd6a3aaf1eebc1fe7fd Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Thu, 18 Nov 2021 17:07:28 +0100
Subject: [PATCH 7/7] Update torchvision/datasets/video_utils.py

---
 torchvision/datasets/video_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchvision/datasets/video_utils.py b/torchvision/datasets/video_utils.py
index 38677afd1a3..efa3836c8d1 100644
--- a/torchvision/datasets/video_utils.py
+++ b/torchvision/datasets/video_utils.py
@@ -211,7 +211,7 @@ def compute_clips_for_video(
             frame_rate = fps
         total_frames = len(video_pts) * (float(frame_rate) / fps)
         _idxs = VideoClips._resample_video_idx(int(math.floor(total_frames)), fps, frame_rate)
-        video_pts = video_pts[idxs]
+        video_pts = video_pts[_idxs]
         clips = unfold(video_pts, num_frames, step)
         if not clips.numel():
             warnings.warn(