Skip to content

Added typing annotations to io/_video_opts #4173

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
Nov 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
29a1c92
style: Added typing annotations
frgfm Jul 13, 2021
add76b3
Merge branch 'master' into video-opt-typing
pmeier Jul 15, 2021
a3f9748
style: Fixed lint
frgfm Jul 19, 2021
7678445
Merge branch 'master' into video-opt-typing
frgfm Jul 19, 2021
eeadcab
Merge branch 'video-opt-typing' of https://github.com/frgfm/vision in…
frgfm Jul 19, 2021
8c49207
Merge branch 'master' into video-opt-typing
frgfm Jul 20, 2021
0797f2f
style: Fixed typing
frgfm Jul 31, 2021
496984c
chore: Updated mypy.ini
frgfm Jul 31, 2021
f6489b9
style: Fixed typing
frgfm Jul 31, 2021
a2483c4
Merge branch 'master' into video-opt-typing
frgfm Jul 31, 2021
a6fe091
chore: Updated mypy.ini
frgfm Aug 3, 2021
c23cf7d
style: Fixed typing compatibility with jit
frgfm Aug 3, 2021
da0949f
Merge branch 'master' into video-opt-typing
pmeier Aug 16, 2021
60f5bfe
style: Fixed typing
frgfm Aug 23, 2021
752015e
Merge branch 'master' into video-opt-typing
frgfm Aug 23, 2021
aff69b1
Merge branch 'video-opt-typing' of https://github.com/frgfm/vision in…
frgfm Aug 23, 2021
ce47bb1
style: Fixed typing
frgfm Sep 18, 2021
6cfabdc
style: Fixed missing import
frgfm Sep 18, 2021
56bd64c
Merge branch 'main' into video-opt-typing
frgfm Sep 18, 2021
642fa6d
style: Fixed typing of __iter__
frgfm Sep 20, 2021
a943e73
Merge branch 'main' into video-opt-typing
frgfm Nov 16, 2021
a4a49f9
style: Fixed typing
frgfm Nov 16, 2021
cd6fb5a
Merge branch 'main' into video-opt-typing
frgfm Nov 16, 2021
f6e7e03
style: Fixed lint
frgfm Nov 17, 2021
0aab926
style: Finished typing
frgfm Nov 17, 2021
4c5246c
style: ufmt the file
frgfm Nov 17, 2021
8a9dab3
Merge branch 'main' into video-opt-typing
prabhat00155 Nov 17, 2021
c65cb53
style: Removed unnecessary typing
frgfm Nov 17, 2021
b6aca1f
style: Fixed typing of iterator
frgfm Nov 17, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ warn_unreachable = True
; miscellaneous strictness flags
allow_redefinition = True

[mypy-torchvision.io._video_opt.*]
[mypy-torchvision.io.image.*]

ignore_errors = True

[mypy-torchvision.io.*]
[mypy-torchvision.io.video.*]

ignore_errors = True

Expand Down
2 changes: 1 addition & 1 deletion torchvision/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def __next__(self) -> Dict[str, Any]:
raise StopIteration
return {"data": frame, "pts": pts}

def __iter__(self) -> Iterator["VideoReader"]:
def __iter__(self) -> Iterator[Dict[str, Any]]:
return self

def seek(self, time_s: float, keyframes_only: bool = False) -> "VideoReader":
Expand Down
123 changes: 70 additions & 53 deletions torchvision/io/_video_opt.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import math
import warnings
from fractions import Fraction
from typing import List, Tuple
from typing import List, Tuple, Dict, Optional, Union

import torch

Expand All @@ -26,10 +26,9 @@ class Timebase:

def __init__(
self,
numerator, # type: int
denominator, # type: int
):
# type: (...) -> None
numerator: int,
denominator: int,
) -> None:
self.numerator = numerator
self.denominator = denominator

Expand All @@ -56,7 +55,7 @@ class VideoMetaData:
"audio_sample_rate",
]

def __init__(self):
def __init__(self) -> None:
self.has_video = False
self.video_timebase = Timebase(0, 1)
self.video_duration = 0.0
Expand All @@ -67,8 +66,7 @@ def __init__(self):
self.audio_sample_rate = 0.0


def _validate_pts(pts_range):
# type: (List[int]) -> None
def _validate_pts(pts_range: Tuple[int, int]) -> None:

if pts_range[1] > 0:
assert (
Expand All @@ -80,8 +78,14 @@ def _validate_pts(pts_range):
)


def _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration):
# type: (torch.Tensor,torch.Tensor,torch.Tensor,torch.Tensor,torch.Tensor,torch.Tensor) -> VideoMetaData
def _fill_info(
vtimebase: torch.Tensor,
vfps: torch.Tensor,
vduration: torch.Tensor,
atimebase: torch.Tensor,
asample_rate: torch.Tensor,
aduration: torch.Tensor,
) -> VideoMetaData:
"""
Build update VideoMetaData struct with info about the video
"""
Expand All @@ -106,8 +110,9 @@ def _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration):
return meta


def _align_audio_frames(aframes, aframe_pts, audio_pts_range):
# type: (torch.Tensor, torch.Tensor, List[int]) -> torch.Tensor
def _align_audio_frames(
aframes: torch.Tensor, aframe_pts: torch.Tensor, audio_pts_range: Tuple[int, int]
) -> torch.Tensor:
start, end = aframe_pts[0], aframe_pts[-1]
num_samples = aframes.size(0)
step_per_aframe = float(end - start + 1) / float(num_samples)
Expand All @@ -121,21 +126,21 @@ def _align_audio_frames(aframes, aframe_pts, audio_pts_range):


def _read_video_from_file(
filename,
seek_frame_margin=0.25,
read_video_stream=True,
video_width=0,
video_height=0,
video_min_dimension=0,
video_max_dimension=0,
video_pts_range=(0, -1),
video_timebase=default_timebase,
read_audio_stream=True,
audio_samples=0,
audio_channels=0,
audio_pts_range=(0, -1),
audio_timebase=default_timebase,
):
filename: str,
seek_frame_margin: float = 0.25,
read_video_stream: bool = True,
video_width: int = 0,
video_height: int = 0,
video_min_dimension: int = 0,
video_max_dimension: int = 0,
video_pts_range: Tuple[int, int] = (0, -1),
video_timebase: Fraction = default_timebase,
read_audio_stream: bool = True,
audio_samples: int = 0,
audio_channels: int = 0,
audio_pts_range: Tuple[int, int] = (0, -1),
audio_timebase: Fraction = default_timebase,
) -> Tuple[torch.Tensor, torch.Tensor, VideoMetaData]:
"""
Reads a video from a file, returning both the video frames as well as
the audio frames
Expand Down Expand Up @@ -217,7 +222,7 @@ def _read_video_from_file(
return vframes, aframes, info


def _read_video_timestamps_from_file(filename):
def _read_video_timestamps_from_file(filename: str) -> Tuple[List[int], List[int], VideoMetaData]:
"""
Decode all video- and audio frames in the video. Only pts
(presentation timestamp) is returned. The actual frame pixel data is not
Expand Down Expand Up @@ -252,7 +257,7 @@ def _read_video_timestamps_from_file(filename):
return vframe_pts, aframe_pts, info


def _probe_video_from_file(filename):
def _probe_video_from_file(filename: str) -> VideoMetaData:
"""
Probe a video file and return VideoMetaData with info about the video
"""
Expand All @@ -263,24 +268,23 @@ def _probe_video_from_file(filename):


def _read_video_from_memory(
video_data, # type: torch.Tensor
seek_frame_margin=0.25, # type: float
read_video_stream=1, # type: int
video_width=0, # type: int
video_height=0, # type: int
video_min_dimension=0, # type: int
video_max_dimension=0, # type: int
video_pts_range=(0, -1), # type: List[int]
video_timebase_numerator=0, # type: int
video_timebase_denominator=1, # type: int
read_audio_stream=1, # type: int
audio_samples=0, # type: int
audio_channels=0, # type: int
audio_pts_range=(0, -1), # type: List[int]
audio_timebase_numerator=0, # type: int
audio_timebase_denominator=1, # type: int
):
# type: (...) -> Tuple[torch.Tensor, torch.Tensor]
video_data: torch.Tensor,
seek_frame_margin: float = 0.25,
read_video_stream: int = 1,
video_width: int = 0,
video_height: int = 0,
video_min_dimension: int = 0,
video_max_dimension: int = 0,
video_pts_range: Tuple[int, int] = (0, -1),
video_timebase_numerator: int = 0,
video_timebase_denominator: int = 1,
read_audio_stream: int = 1,
audio_samples: int = 0,
audio_channels: int = 0,
audio_pts_range: Tuple[int, int] = (0, -1),
audio_timebase_numerator: int = 0,
audio_timebase_denominator: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Reads a video from memory, returning both the video frames as well as
the audio frames
Expand Down Expand Up @@ -370,7 +374,9 @@ def _read_video_from_memory(
return vframes, aframes


def _read_video_timestamps_from_memory(video_data):
def _read_video_timestamps_from_memory(
video_data: torch.Tensor,
) -> Tuple[List[int], List[int], VideoMetaData]:
"""
Decode all frames in the video. Only pts (presentation timestamp) is returned.
The actual frame pixel data is not copied. Thus, read_video_timestamps(...)
Expand Down Expand Up @@ -407,8 +413,9 @@ def _read_video_timestamps_from_memory(video_data):
return vframe_pts, aframe_pts, info


def _probe_video_from_memory(video_data):
# type: (torch.Tensor) -> VideoMetaData
def _probe_video_from_memory(
video_data: torch.Tensor,
) -> VideoMetaData:
"""
Probe a video in memory and return VideoMetaData with info about the video
This function is torchscriptable
Expand All @@ -421,15 +428,22 @@ def _probe_video_from_memory(video_data):
return info


def _convert_to_sec(start_pts, end_pts, pts_unit, time_base):
def _convert_to_sec(
start_pts: Union[float, Fraction], end_pts: Union[float, Fraction], pts_unit: str, time_base: Fraction
) -> Tuple[Union[float, Fraction], Union[float, Fraction], str]:
if pts_unit == "pts":
start_pts = float(start_pts * time_base)
end_pts = float(end_pts * time_base)
pts_unit = "sec"
return start_pts, end_pts, pts_unit


def _read_video(filename, start_pts=0, end_pts=None, pts_unit="pts"):
def _read_video(
filename: str,
start_pts: Union[float, Fraction] = 0,
end_pts: Optional[Union[float, Fraction]] = None,
pts_unit: str = "pts",
) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, float]]:
if end_pts is None:
end_pts = float("inf")

Expand Down Expand Up @@ -495,13 +509,16 @@ def get_pts(time_base):
return vframes, aframes, _info


def _read_video_timestamps(filename, pts_unit="pts"):
def _read_video_timestamps(
filename: str, pts_unit: str = "pts"
) -> Tuple[Union[List[int], List[Fraction]], Optional[float]]:
if pts_unit == "pts":
warnings.warn(
"The pts_unit 'pts' gives wrong results and will be removed in a "
+ "follow-up version. Please use pts_unit 'sec'."
)

pts: Union[List[int], List[Fraction]]
pts, _, info = _read_video_timestamps_from_file(filename)

if pts_unit == "sec":
Expand Down