Skip to content

Commit 7cb2271

Browse files
committed
Merge branch 'main' of github.com:pytorch/torchcodec into sample_rate
2 parents 975b0fb + ae19a78 commit 7cb2271

File tree

6 files changed

+210
-3
lines changed

6 files changed

+210
-3
lines changed

src/torchcodec/decoders/_core/VideoDecoder.cpp

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "src/torchcodec/decoders/_core/VideoDecoder.h"
88
#include <cstdint>
99
#include <cstdio>
10+
#include <cstdlib>
1011
#include <iostream>
1112
#include <limits>
1213
#include <sstream>
@@ -67,7 +68,7 @@ std::vector<std::string> splitStringWithDelimiters(
6768

6869
VideoDecoder::VideoDecoder(const std::string& videoFilePath, SeekMode seekMode)
6970
: seekMode_(seekMode) {
70-
av_log_set_level(AV_LOG_QUIET);
71+
setFFmpegLogLevel();
7172

7273
AVFormatContext* rawContext = nullptr;
7374
int status =
@@ -86,7 +87,7 @@ VideoDecoder::VideoDecoder(const void* data, size_t length, SeekMode seekMode)
8687
: seekMode_(seekMode) {
8788
TORCH_CHECK(data != nullptr, "Video data buffer cannot be nullptr!");
8889

89-
av_log_set_level(AV_LOG_QUIET);
90+
setFFmpegLogLevel();
9091

9192
constexpr int bufferSize = 64 * 1024;
9293
ioBytesContext_.reset(new AVIOBytesContext(data, length, bufferSize));
@@ -206,6 +207,39 @@ void VideoDecoder::initializeDecoder() {
206207
initialized_ = true;
207208
}
208209

210+
void VideoDecoder::setFFmpegLogLevel() {
211+
auto logLevel = AV_LOG_QUIET;
212+
const char* logLevelEnv = std::getenv("TORCHCODEC_FFMPEG_LOG_LEVEL");
213+
if (logLevelEnv != nullptr) {
214+
if (std::strcmp(logLevelEnv, "QUIET") == 0) {
215+
logLevel = AV_LOG_QUIET;
216+
} else if (std::strcmp(logLevelEnv, "PANIC") == 0) {
217+
logLevel = AV_LOG_PANIC;
218+
} else if (std::strcmp(logLevelEnv, "FATAL") == 0) {
219+
logLevel = AV_LOG_FATAL;
220+
} else if (std::strcmp(logLevelEnv, "ERROR") == 0) {
221+
logLevel = AV_LOG_ERROR;
222+
} else if (std::strcmp(logLevelEnv, "WARNING") == 0) {
223+
logLevel = AV_LOG_WARNING;
224+
} else if (std::strcmp(logLevelEnv, "INFO") == 0) {
225+
logLevel = AV_LOG_INFO;
226+
} else if (std::strcmp(logLevelEnv, "VERBOSE") == 0) {
227+
logLevel = AV_LOG_VERBOSE;
228+
} else if (std::strcmp(logLevelEnv, "DEBUG") == 0) {
229+
logLevel = AV_LOG_DEBUG;
230+
} else if (std::strcmp(logLevelEnv, "TRACE") == 0) {
231+
logLevel = AV_LOG_TRACE;
232+
} else {
233+
TORCH_CHECK(
234+
false,
235+
"Invalid TORCHCODEC_FFMPEG_LOG_LEVEL: ",
236+
logLevelEnv,
237+
". Use e.g. 'QUIET', 'PANIC', 'VERBOSE', etc.");
238+
}
239+
}
240+
av_log_set_level(logLevel);
241+
}
242+
209243
int VideoDecoder::getBestStreamIndex(AVMediaType mediaType) {
210244
AVCodecOnlyUseForCallingAVFindBestStream avCodec = nullptr;
211245
int streamIndex =
@@ -1750,7 +1784,10 @@ void VideoDecoder::createSwrContext(
17501784
TORCH_CHECK(
17511785
status == AVSUCCESS,
17521786
"Couldn't initialize SwrContext: ",
1753-
getFFMPEGErrorStringFromErrorCode(status));
1787+
getFFMPEGErrorStringFromErrorCode(status),
1788+
". If the error says 'Invalid argument', it's likely that you are using "
1789+
"a buggy FFmpeg version. FFmpeg4 is known to fail here in some "
1790+
"valid scenarios. Try to upgrade FFmpeg?");
17541791
streamInfo.swrContext.reset(swrContext);
17551792
}
17561793

src/torchcodec/decoders/_core/VideoDecoder.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ class VideoDecoder {
363363
// --------------------------------------------------------------------------
364364

365365
void initializeDecoder();
366+
void setFFmpegLogLevel();
366367
// --------------------------------------------------------------------------
367368
// DECODING APIS AND RELATED UTILS
368369
// --------------------------------------------------------------------------

test/decoders/test_decoders.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
NASA_AUDIO,
2626
NASA_AUDIO_MP3,
2727
NASA_VIDEO,
28+
SINE_MONO_S16,
2829
SINE_MONO_S32,
2930
SINE_MONO_S32_44100,
3031
SINE_MONO_S32_8000,
@@ -1155,3 +1156,24 @@ def test_sample_rate_conversion(self, start_seconds, stop_seconds):
11551156
atol=atol,
11561157
rtol=rtol,
11571158
)
1159+
1160+
def test_s16_ffmpeg4_bug(self):
1161+
# s16 fails on FFmpeg4 but can be decoded on other versions.
1162+
# Debugging logs show that we're hitting:
1163+
# [SWR @ 0x560a7abdaf80] Input channel count and layout are unset
1164+
# which seems to point to:
1165+
# https://github.com/FFmpeg/FFmpeg/blob/40a6963fbd0c47be358a3760480180b7b532e1e9/libswresample/swresample.c#L293-L305
1166+
# ¯\_(ツ)_/¯
1167+
1168+
asset = SINE_MONO_S16
1169+
decoder = AudioDecoder(asset.path)
1170+
assert decoder.metadata.sample_rate == asset.sample_rate
1171+
assert decoder.metadata.sample_format == asset.sample_format
1172+
1173+
cm = (
1174+
pytest.raises(RuntimeError, match="Invalid argument")
1175+
if get_ffmpeg_major_version() == 4
1176+
else contextlib.nullcontext()
1177+
)
1178+
with cm:
1179+
decoder.get_samples_played_in_range(start_seconds=0)

test/resources/sine_mono_s16.wav

125 KB
Binary file not shown.
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
[
2+
{
3+
"duration_time": "0.128000",
4+
"pts_time": "0.000000"
5+
},
6+
{
7+
"duration_time": "0.128000",
8+
"pts_time": "0.128000"
9+
},
10+
{
11+
"duration_time": "0.128000",
12+
"pts_time": "0.256000"
13+
},
14+
{
15+
"duration_time": "0.128000",
16+
"pts_time": "0.384000"
17+
},
18+
{
19+
"duration_time": "0.128000",
20+
"pts_time": "0.512000"
21+
},
22+
{
23+
"duration_time": "0.128000",
24+
"pts_time": "0.640000"
25+
},
26+
{
27+
"duration_time": "0.128000",
28+
"pts_time": "0.768000"
29+
},
30+
{
31+
"duration_time": "0.128000",
32+
"pts_time": "0.896000"
33+
},
34+
{
35+
"duration_time": "0.128000",
36+
"pts_time": "1.024000"
37+
},
38+
{
39+
"duration_time": "0.128000",
40+
"pts_time": "1.152000"
41+
},
42+
{
43+
"duration_time": "0.128000",
44+
"pts_time": "1.280000"
45+
},
46+
{
47+
"duration_time": "0.128000",
48+
"pts_time": "1.408000"
49+
},
50+
{
51+
"duration_time": "0.128000",
52+
"pts_time": "1.536000"
53+
},
54+
{
55+
"duration_time": "0.128000",
56+
"pts_time": "1.664000"
57+
},
58+
{
59+
"duration_time": "0.128000",
60+
"pts_time": "1.792000"
61+
},
62+
{
63+
"duration_time": "0.128000",
64+
"pts_time": "1.920000"
65+
},
66+
{
67+
"duration_time": "0.128000",
68+
"pts_time": "2.048000"
69+
},
70+
{
71+
"duration_time": "0.128000",
72+
"pts_time": "2.176000"
73+
},
74+
{
75+
"duration_time": "0.128000",
76+
"pts_time": "2.304000"
77+
},
78+
{
79+
"duration_time": "0.128000",
80+
"pts_time": "2.432000"
81+
},
82+
{
83+
"duration_time": "0.128000",
84+
"pts_time": "2.560000"
85+
},
86+
{
87+
"duration_time": "0.128000",
88+
"pts_time": "2.688000"
89+
},
90+
{
91+
"duration_time": "0.128000",
92+
"pts_time": "2.816000"
93+
},
94+
{
95+
"duration_time": "0.128000",
96+
"pts_time": "2.944000"
97+
},
98+
{
99+
"duration_time": "0.128000",
100+
"pts_time": "3.072000"
101+
},
102+
{
103+
"duration_time": "0.128000",
104+
"pts_time": "3.200000"
105+
},
106+
{
107+
"duration_time": "0.128000",
108+
"pts_time": "3.328000"
109+
},
110+
{
111+
"duration_time": "0.128000",
112+
"pts_time": "3.456000"
113+
},
114+
{
115+
"duration_time": "0.128000",
116+
"pts_time": "3.584000"
117+
},
118+
{
119+
"duration_time": "0.128000",
120+
"pts_time": "3.712000"
121+
},
122+
{
123+
"duration_time": "0.128000",
124+
"pts_time": "3.840000"
125+
},
126+
{
127+
"duration_time": "0.032000",
128+
"pts_time": "3.968000"
129+
}
130+
]

test/utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,23 @@ def sample_format(self) -> str:
496496
},
497497
)
498498

499+
# Same sample rate as SINE_MONO_S32, but encoded as s16 instead of s32. Generated with:
500+
# ffmpeg -i test/resources/sine_mono_s32.wav -ar 16000 -c:a pcm_s16le test/resources/sine_mono_s16.wav
501+
SINE_MONO_S16 = TestAudio(
502+
filename="sine_mono_s16.wav",
503+
default_stream_index=0,
504+
frames={}, # Automatically loaded from json file
505+
stream_infos={
506+
0: TestAudioStreamInfo(
507+
sample_rate=16_000,
508+
num_channels=1,
509+
duration_seconds=4,
510+
num_frames=63,
511+
sample_format="s16",
512+
)
513+
},
514+
)
515+
499516
H265_VIDEO = TestVideo(
500517
filename="h265_video.mp4",
501518
default_stream_index=0,

0 commit comments

Comments
 (0)