Skip to content

Commit 9e3a219

Browse files
ahmadsharif1facebook-github-bot
authored andcommitted
[torchcodec] Add properties for duration and average_fps (#122)
Summary: Pull Request resolved: #122 These properties use the content data if possible. Otherwise they fallback to the header data. Reviewed By: NicolasHug Differential Revision: D60384168 fbshipit-source-id: e1a6cd10652d9000d306ec4400e736b8dd338cf5
1 parent 78d2acb commit 9e3a219

File tree

4 files changed

+84
-23
lines changed

4 files changed

+84
-23
lines changed

src/torchcodec/decoders/_core/_metadata.py

Lines changed: 62 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@
2323
class VideoStreamMetadata:
2424
"""Metadata of a single video stream."""
2525

26-
duration_seconds: Optional[float]
27-
"""Duration of the stream, in seconds (float or None)."""
26+
duration_seconds_from_header: Optional[float]
27+
"""Duration of the stream, in seconds obtained from the header (float or
28+
None). This could be inaccurate."""
2829
bit_rate: Optional[float]
2930
"""Bit rate of the stream, in seconds (float or None)."""
3031
num_frames_from_header: Optional[int]
@@ -36,17 +37,27 @@ class VideoStreamMetadata:
3637
content (the scan doesn't involve decoding). This is more accurate
3738
than ``num_frames_from_header``. We recommend using the
3839
``num_frames`` attribute instead. (int or None)."""
39-
min_pts_seconds: Optional[float]
40-
"""Minimum :term:`pts` of any frame in the stream (float or None)."""
41-
max_pts_seconds: Optional[float]
42-
"""Maximum :term:`pts` of any frame in the stream (float or None)."""
40+
begin_stream_from_content_seconds: Optional[float]
41+
"""Beginning of the stream in seconds (float or None).
42+
This is min(frame.pts) for all frames in this stream."""
43+
end_stream_from_content_seconds: Optional[float]
44+
"""End of the stream in seconds (float or None).
45+
This is max(frame.pts + frame.duration) for all frames in this stream.
46+
Note that frames have a pts and duration and the interval defined by
47+
[pts, pts + duration) is a half-open interval (the right boundary is open).
48+
Therefore no frame is displayed at this time value.
49+
Calling
50+
SimpleVideoDecoder.get_frame_displayed_at(end_stream_from_content_seconds)
51+
will raise a StopIteration exception.
52+
If you want to get the last frame you can use [-1] on a SimpleVideoDecoder
53+
object."""
4354
codec: Optional[str]
4455
"""Codec (str or None)."""
4556
width: Optional[int]
4657
"""Width of the frames (int or None)."""
4758
height: Optional[int]
4859
"""Height of the frames (int or None)."""
49-
average_fps: Optional[float]
60+
average_fps_from_header: Optional[float]
5061
"""Averate fps of the stream (float or None)."""
5162
stream_index: int
5263
"""Index of the stream within the video (int)."""
@@ -62,11 +73,46 @@ def num_frames(self) -> Optional[int]:
6273
else:
6374
return self.num_frames_from_header
6475

76+
@property
77+
def duration_seconds(self) -> Optional[float]:
78+
"""Duration of the stream in seconds. We try to calculate the duration
79+
from the actual frames if we scanned the frames. Otherwise we fall back
80+
to the duration obtained from the header.
81+
"""
82+
if (
83+
self.end_stream_from_content_seconds is None
84+
or self.begin_stream_from_content_seconds is None
85+
):
86+
return self.duration_seconds_from_header
87+
return (
88+
self.end_stream_from_content_seconds
89+
- self.begin_stream_from_content_seconds
90+
)
91+
92+
@property
93+
def average_fps(self) -> Optional[float]:
94+
"""Average fps of the stream. We try to get the average fps from the
95+
actual frames if we scanned the frames. Otherwise we fall back to the
96+
fps obtained from the header.
97+
"""
98+
if (
99+
self.end_stream_from_content_seconds is None
100+
or self.begin_stream_from_content_seconds is None
101+
or self.num_frames is None
102+
):
103+
return self.average_fps_from_header
104+
return self.num_frames / (
105+
self.end_stream_from_content_seconds
106+
- self.begin_stream_from_content_seconds
107+
)
108+
65109
def __repr__(self):
66-
# Overridden because `num_frames` wouldn't be printed by default.
110+
# Overridden because properites are not printed by default.
67111
s = self.__class__.__name__ + ":\n"
68112
spaces = " "
69113
s += f"{spaces}num_frames: {self.num_frames}\n"
114+
s += f"{spaces}duration_seconds: {self.duration_seconds}\n"
115+
s += f"{spaces}average_fps: {self.average_fps}\n"
70116
for field in dataclasses.fields(self):
71117
s += f"{spaces}{field.name}: {getattr(self, field.name)}\n"
72118
return s
@@ -109,18 +155,22 @@ def get_video_metadata(decoder: torch.Tensor) -> VideoMetadata:
109155
stream_dict = json.loads(_get_stream_json_metadata(decoder, stream_index))
110156
streams_metadata.append(
111157
VideoStreamMetadata(
112-
duration_seconds=stream_dict.get("durationSeconds"),
158+
duration_seconds_from_header=stream_dict.get("durationSeconds"),
113159
bit_rate=stream_dict.get("bitRate"),
114160
# TODO_OPEN_ISSUE: We should align the C++ names and the json
115161
# keys with the Python names
116162
num_frames_from_header=stream_dict.get("numFrames"),
117163
num_frames_from_content=stream_dict.get("numFramesFromScan"),
118-
min_pts_seconds=stream_dict.get("minPtsSecondsFromScan"),
119-
max_pts_seconds=stream_dict.get("maxPtsSecondsFromScan"),
164+
begin_stream_from_content_seconds=stream_dict.get(
165+
"minPtsSecondsFromScan"
166+
),
167+
end_stream_from_content_seconds=stream_dict.get(
168+
"maxPtsSecondsFromScan"
169+
),
120170
codec=stream_dict.get("codec"),
121171
width=stream_dict.get("width"),
122172
height=stream_dict.get("height"),
123-
average_fps=stream_dict.get("averageFps"),
173+
average_fps_from_header=stream_dict.get("averageFps"),
124174
stream_index=stream_index,
125175
)
126176
)

src/torchcodec/decoders/_simple_video_decoder.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -140,19 +140,23 @@ def __init__(
140140
)
141141
self._num_frames = self.metadata.num_frames_from_content
142142

143-
if self.metadata.min_pts_seconds is None:
143+
if self.metadata.begin_stream_from_content_seconds is None:
144144
raise ValueError(
145145
"The minimum pts value in seconds is unknown. "
146146
+ _ERROR_REPORTING_INSTRUCTIONS
147147
)
148-
self._min_pts_seconds = self.metadata.min_pts_seconds
148+
self._begin_stream_from_content_seconds = (
149+
self.metadata.begin_stream_from_content_seconds
150+
)
149151

150-
if self.metadata.max_pts_seconds is None:
152+
if self.metadata.end_stream_from_content_seconds is None:
151153
raise ValueError(
152154
"The maximum pts value in seconds is unknown. "
153155
+ _ERROR_REPORTING_INSTRUCTIONS
154156
)
155-
self._max_pts_seconds = self.metadata.max_pts_seconds
157+
self._end_stream_from_content_seconds = (
158+
self.metadata.end_stream_from_content_seconds
159+
)
156160

157161
def __len__(self) -> int:
158162
return self._num_frames
@@ -270,11 +274,15 @@ def get_frame_displayed_at(self, seconds: float) -> Frame:
270274
Returns:
271275
Frame: The frame that is displayed at ``seconds``.
272276
"""
273-
if not self._min_pts_seconds <= seconds < self._max_pts_seconds:
277+
if (
278+
not self._begin_stream_from_content_seconds
279+
<= seconds
280+
< self._end_stream_from_content_seconds
281+
):
274282
raise IndexError(
275283
f"Invalid pts in seconds: {seconds}. "
276-
f"It must be greater than or equal to {self._min_pts_seconds} "
277-
f"and less than or equal to {self._max_pts_seconds}."
284+
f"It must be greater than or equal to {self._begin_stream_from_content_seconds} "
285+
f"and less than {self._end_stream_from_content_seconds}."
278286
)
279287
data, pts_seconds, duration_seconds = core.get_frame_at_pts(
280288
self._decoder, seconds

test/decoders/test_metadata.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,16 +88,16 @@ def test_num_frames_fallback(
8888
):
8989
"""Check that num_frames_from_content always has priority when accessing `.num_frames`"""
9090
metadata = VideoStreamMetadata(
91-
duration_seconds=4,
91+
duration_seconds_from_header=4,
9292
bit_rate=123,
9393
num_frames_from_header=num_frames_from_header,
9494
num_frames_from_content=num_frames_from_content,
95-
min_pts_seconds=0,
96-
max_pts_seconds=4,
95+
begin_stream_from_content_seconds=0,
96+
end_stream_from_content_seconds=4,
9797
codec="whatever",
9898
width=123,
9999
height=321,
100-
average_fps=30,
100+
average_fps_from_header=30,
101101
stream_index=0,
102102
)
103103

test/decoders/test_simple_video_decoder.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ def test_create(self, source_kind):
3737
== 390
3838
)
3939
assert decoder._stream_index == decoder.metadata.stream_index == 3
40+
assert decoder.metadata.duration_seconds == pytest.approx(13.013)
41+
assert decoder.metadata.average_fps == pytest.approx(29.970029)
42+
assert decoder.metadata.num_frames == 390
4043

4144
def test_create_fails(self):
4245
with pytest.raises(TypeError, match="Unknown source type"):

0 commit comments

Comments
 (0)