Skip to content

Commit 635406c

Browse files
andfoyfmassa
andauthored
PR: Add PyTorch FFmpeg to wheel and conda distributions (#2596)
* Add PyTorch FFmpeg to wheel and conda distributions * Try to install wget from conda * Add yq flag on Mac * Correct copy instructions * Use cURL on Windows * Call bzip2 directly due to msys2/MSYS2-packages#1548 * Copy ffmpeg binaries to system-wide directories * Try to use std:c++17 on Windows * Try to define ssize_t on Windows * Use C++14 * Declare AVRational structs explicitly * Initialize AVRational explicitly * Replace macro to prevent errors on Windows * Replace AV_TIME_BASE_Q * Add library paths for video extension * Force ffmpeg from pytorch channels? * Fix clang style warnings * Update CONDA_CHANNEL_FLAGS * Fix clang style issues * Update unittest * Use FFmpeg 4.2 * Install correct version on Mac * Pin av version to 8.0.0 * Fix string formatting issue * Fix pip pinning * Try with 8.0.1 * Use av 8.0.2 * Remove trailling whitespaces * Disable test_io_opt.py * Disable test_datasets_video_utils Co-authored-by: Francisco Massa <[email protected]>
1 parent 2b2dedc commit 635406c

File tree

15 files changed

+85
-31
lines changed

15 files changed

+85
-31
lines changed

.circleci/unittest/linux/scripts/environment.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
channels:
2+
- pytorch
23
- defaults
34
dependencies:
45
- numpy
@@ -8,6 +9,7 @@ dependencies:
89
- pip
910
- libpng
1011
- jpeg
12+
- ffmpeg=4.2
1113
- ca-certificates
1214
- pip:
1315
- future

.circleci/unittest/windows/scripts/environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
channels:
2+
- pytorch
23
- defaults
34
dependencies:
45
- numpy

packaging/build_wheel.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ else
3232
cp "/usr/lib64/libjpeg.so" torchvision
3333
fi
3434

35+
download_copy_ffmpeg
36+
3537
if [[ "$OSTYPE" == "msys" ]]; then
3638
IS_WHEEL=1 "$script_dir/windows/internal/vc_env_helper.bat" python setup.py bdist_wheel
3739
else

packaging/conda/build_vision.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ else
127127
fi
128128

129129
if [[ -z "$PYTORCH_VERSION" ]]; then
130-
export CONDA_CHANNEL_FLAGS="-c pytorch-nightly"
130+
export CONDA_CHANNEL_FLAGS="-c pytorch-nightly -c pytorch"
131131
export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \
132132
python -c "import os, sys, json, re; cuver = '$cuver'; \
133133
cuver = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \

packaging/pkg_helpers.bash

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ setup_pip_pytorch_version() {
240240
# You MUST have populated PYTORCH_VERSION_SUFFIX before hand.
241241
setup_conda_pytorch_constraint() {
242242
if [[ -z "$PYTORCH_VERSION" ]]; then
243-
export CONDA_CHANNEL_FLAGS="-c pytorch-nightly"
243+
export CONDA_CHANNEL_FLAGS="-c pytorch-nightly -c pytorch"
244244
export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \
245245
python -c "import os, sys, json, re; cuver = os.environ.get('CU_VERSION'); \
246246
cuver_1 = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \
@@ -350,3 +350,39 @@ setup_junit_results_folder() {
350350
export CONDA_PYTORCH_BUILD_RESULTS_DIRECTORY="${SOURCE_ROOT_DIR}/build_results/results.xml"
351351
fi
352352
}
353+
354+
355+
download_copy_ffmpeg() {
356+
mkdir ffmpeg_tmp
357+
cd ffmpeg_tmp
358+
if [[ "$OSTYPE" == "msys" ]]; then
359+
# conda install -yq ffmpeg -c pytorch
360+
# curl -L -q https://anaconda.org/pytorch/ffmpeg/4.3/download/win-64/ffmpeg-4.3-ha925a31_0.tar.bz2 --output ffmpeg-4.3-ha925a31_0.tar.bz2
361+
# bzip2 --decompress --stdout ffmpeg-4.3-ha925a31_0.tar.bz2 | tar -x --file=-
362+
# cp Library/bin/*.dll ../torchvision
363+
echo "FFmpeg is disabled currently on Windows"
364+
else
365+
if [[ "$(uname)" == Darwin ]]; then
366+
conda install -yq ffmpeg=4.2 -c pytorch
367+
conda install -yq wget
368+
wget -q https://anaconda.org/pytorch/ffmpeg/4.2/download/osx-64/ffmpeg-4.2-h0a44026_0.tar.bz2
369+
tar -xjvf ffmpeg-4.2-h0a44026_0.tar.bz2
370+
for f in lib/*.dylib; do
371+
if [[ $f =~ ([a-z])+\.dylib ]]; then
372+
cp $f ../torchvision
373+
fi
374+
done
375+
else
376+
wget -q https://anaconda.org/pytorch/ffmpeg/4.2/download/linux-64/ffmpeg-4.2-hf484d3e_0.tar.bz2
377+
tar -xjvf ffmpeg-4.2-hf484d3e_0.tar.bz2
378+
cp lib/*.so ../torchvision
379+
cp -r lib/* /usr/lib
380+
cp -r bin/* /usr/bin
381+
cp -r include/* /usr/include
382+
ldconfig
383+
which ffmpeg
384+
fi
385+
fi
386+
cd ..
387+
rm -rf ffmpeg_tmp
388+
}

packaging/torchvision/conda_build_config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
channel_sources:
2+
- pytorch-nightly,pytorch,defaults
13
blas_impl:
24
- mkl # [x86_64]
35
c_compiler:

packaging/torchvision/meta.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ requirements:
1010
- {{ compiler('c') }} # [win]
1111
- libpng
1212
- jpeg
13+
- ffmpeg =4.2 # [not win]
1314

1415
host:
1516
- python
@@ -21,6 +22,7 @@ requirements:
2122
run:
2223
- python
2324
- libpng
25+
- ffmpeg =4.2 # [not win]
2426
- jpeg
2527
- pillow >=4.1.1
2628
- numpy >=1.11
@@ -48,7 +50,7 @@ test:
4850
requires:
4951
- pytest
5052
- scipy
51-
- av
53+
- av =8.0.1
5254
- ca-certificates
5355
{{ environ.get('CONDA_TYPING_CONSTRAINT') }}
5456

setup.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,9 @@ def get_extensions():
337337
ffmpeg_bin = os.path.dirname(ffmpeg_exe)
338338
ffmpeg_root = os.path.dirname(ffmpeg_bin)
339339
ffmpeg_include_dir = os.path.join(ffmpeg_root, 'include')
340+
ffmpeg_library_dir = os.path.join(ffmpeg_root, 'lib')
340341
print("ffmpeg include path: {}".format(ffmpeg_include_dir))
342+
print("ffmpeg library_dir: {}".format(ffmpeg_library_dir))
341343

342344
# TorchVision base decoder + video reader
343345
video_reader_src_dir = os.path.join(this_dir, 'torchvision', 'csrc', 'cpu', 'video_reader')
@@ -360,16 +362,16 @@ def get_extensions():
360362
ffmpeg_include_dir,
361363
extensions_dir,
362364
],
363-
library_dirs=library_dirs,
365+
library_dirs=[ffmpeg_library_dir] + library_dirs,
364366
libraries=[
365367
'avcodec',
366368
'avformat',
367369
'avutil',
368370
'swresample',
369371
'swscale',
370372
],
371-
extra_compile_args=["-std=c++14"],
372-
extra_link_args=["-std=c++14"],
373+
extra_compile_args=["-std=c++14"] if os.name != 'nt' else ['/std:c++14', '/MP'],
374+
extra_link_args=["-std=c++14" if os.name != 'nt' else '/std:c++14'],
373375
)
374376
)
375377

test/test_datasets_video_utils_opt.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
from torchvision import set_video_backend
33
import test_datasets_video_utils
44

5-
6-
set_video_backend('video_reader')
5+
# Disabling the video backend switching temporarily
6+
# set_video_backend('video_reader')
77

88

99
if __name__ == '__main__':

test/test_io_opt.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
import test_io
44

55

6-
set_video_backend('video_reader')
6+
# Disabling the video backend switching temporarily
7+
# set_video_backend('video_reader')
78

89

910
if __name__ == '__main__':

torchvision/csrc/cpu/decoder/decoder.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
#include "seekable_buffer.h"
66
#include "stream.h"
77

8+
#if defined(_MSC_VER)
9+
#include <BaseTsd.h>
10+
typedef SSIZE_T ssize_t;
11+
#endif
12+
813
namespace ffmpeg {
914

1015
/**

torchvision/csrc/cpu/decoder/stream.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "util.h"
44

55
namespace ffmpeg {
6+
const AVRational timeBaseQ = AVRational{1, AV_TIME_BASE};
67

78
Stream::Stream(
89
AVFormatContext* inputCtx,
@@ -85,7 +86,7 @@ int Stream::openCodec(std::vector<DecoderMetadata>* metadata) {
8586
header.num = steam->time_base.num;
8687
header.den = steam->time_base.den;
8788
header.duration =
88-
av_rescale_q(steam->duration, steam->time_base, AV_TIME_BASE_Q);
89+
av_rescale_q(steam->duration, steam->time_base, timeBaseQ);
8990
metadata->push_back(header);
9091
}
9192

@@ -238,7 +239,7 @@ void Stream::setFramePts(DecoderHeader* header, bool flush) {
238239
header->pts = av_rescale_q(
239240
header->pts,
240241
inputCtx_->streams[format_.stream]->time_base,
241-
AV_TIME_BASE_Q);
242+
timeBaseQ);
242243
}
243244

244245
switch (format_.type) {

torchvision/csrc/cpu/decoder/subtitle_stream.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "util.h"
55

66
namespace ffmpeg {
7+
const AVRational timeBaseQ = AVRational{1, AV_TIME_BASE};
78

89
SubtitleStream::SubtitleStream(
910
AVFormatContext* inputCtx,
@@ -65,7 +66,7 @@ int SubtitleStream::analyzePacket(const AVPacket* packet, bool* gotFrame) {
6566
// set proper pts in us
6667
if (gotFramePtr) {
6768
sub_.pts = av_rescale_q(
68-
pkt.pts, inputCtx_->streams[format_.stream]->time_base, AV_TIME_BASE_Q);
69+
pkt.pts, inputCtx_->streams[format_.stream]->time_base, timeBaseQ);
6970
}
7071

7172
return result;

torchvision/csrc/cpu/video_reader/VideoReader.cpp

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ namespace video_reader {
2929

3030
const AVPixelFormat defaultVideoPixelFormat = AV_PIX_FMT_RGB24;
3131
const AVSampleFormat defaultAudioSampleFormat = AV_SAMPLE_FMT_FLT;
32+
const AVRational timeBaseQ = AVRational{1, AV_TIME_BASE};
3233
const size_t decoderTimeoutMs = 600000;
3334
// A jitter can be added to the end of the range to avoid conversion/rounding
3435
// error, small value 100us won't be enough to select the next frame, but enough
@@ -99,8 +100,8 @@ size_t fillTensor(
99100
for (size_t i = 0; i < msgs.size(); ++i) {
100101
const auto& msg = msgs[i];
101102
// convert pts into original time_base
102-
AVRational avr = {(int)num, (int)den};
103-
framePtsData[i] = av_rescale_q(msg.header.pts, AV_TIME_BASE_Q, avr);
103+
AVRational avr = AVRational{(int)num, (int)den};
104+
framePtsData[i] = av_rescale_q(msg.header.pts, timeBaseQ, avr);
104105
VLOG(2) << "PTS type: " << sizeof(T) << ", us: " << msg.header.pts
105106
<< ", original: " << framePtsData[i];
106107

@@ -156,28 +157,26 @@ void offsetsToUs(
156157
videoEndUs = -1;
157158

158159
if (readVideoStream) {
159-
AVRational vr = {(int)videoTimeBaseNum, (int)videoTimeBaseDen};
160+
AVRational vr = AVRational{(int)videoTimeBaseNum, (int)videoTimeBaseDen};
160161
if (videoStartPts > 0) {
161-
videoStartUs = av_rescale_q(videoStartPts, vr, AV_TIME_BASE_Q);
162+
videoStartUs = av_rescale_q(videoStartPts, vr, timeBaseQ);
162163
}
163164
if (videoEndPts > 0) {
164165
// Add jitter to the end of the range to avoid conversion/rounding error.
165166
// Small value 100us won't be enough to select the next frame, but enough
166167
// to compensate rounding error due to the multiple conversions.
167-
videoEndUs =
168-
timeBaseJitterUs + av_rescale_q(videoEndPts, vr, AV_TIME_BASE_Q);
168+
videoEndUs = timeBaseJitterUs + av_rescale_q(videoEndPts, vr, timeBaseQ);
169169
}
170170
} else if (readAudioStream) {
171-
AVRational ar = {(int)audioTimeBaseNum, (int)audioTimeBaseDen};
171+
AVRational ar = AVRational{(int)audioTimeBaseNum, (int)audioTimeBaseDen};
172172
if (audioStartPts > 0) {
173-
videoStartUs = av_rescale_q(audioStartPts, ar, AV_TIME_BASE_Q);
173+
videoStartUs = av_rescale_q(audioStartPts, ar, timeBaseQ);
174174
}
175175
if (audioEndPts > 0) {
176176
// Add jitter to the end of the range to avoid conversion/rounding error.
177177
// Small value 100us won't be enough to select the next frame, but enough
178178
// to compensate rounding error due to the multiple conversions.
179-
videoEndUs =
180-
timeBaseJitterUs + av_rescale_q(audioEndPts, ar, AV_TIME_BASE_Q);
179+
videoEndUs = timeBaseJitterUs + av_rescale_q(audioEndPts, ar, timeBaseQ);
181180
}
182181
}
183182
}
@@ -336,8 +335,8 @@ torch::List<torch::Tensor> readVideo(
336335

337336
videoDuration = torch::zeros({1}, torch::kLong);
338337
int64_t* videoDurationData = videoDuration.data_ptr<int64_t>();
339-
AVRational vr = {(int)header.num, (int)header.den};
340-
videoDurationData[0] = av_rescale_q(header.duration, AV_TIME_BASE_Q, vr);
338+
AVRational vr = AVRational{(int)header.num, (int)header.den};
339+
videoDurationData[0] = av_rescale_q(header.duration, timeBaseQ, vr);
341340
VLOG(1) << "Video decoding from " << logType << " [" << logMessage
342341
<< "] filled video tensors";
343342
} else {
@@ -398,8 +397,8 @@ torch::List<torch::Tensor> readVideo(
398397

399398
audioDuration = torch::zeros({1}, torch::kLong);
400399
int64_t* audioDurationData = audioDuration.data_ptr<int64_t>();
401-
AVRational ar = {(int)header.num, (int)header.den};
402-
audioDurationData[0] = av_rescale_q(header.duration, AV_TIME_BASE_Q, ar);
400+
AVRational ar = AVRational{(int)header.num, (int)header.den};
401+
audioDurationData[0] = av_rescale_q(header.duration, timeBaseQ, ar);
403402
VLOG(1) << "Video decoding from " << logType << " [" << logMessage
404403
<< "] filled audio tensors";
405404
} else {
@@ -598,8 +597,8 @@ torch::List<torch::Tensor> probeVideo(
598597

599598
videoDuration = torch::zeros({1}, torch::kLong);
600599
int64_t* videoDurationData = videoDuration.data_ptr<int64_t>();
601-
AVRational avr = {(int)header.num, (int)header.den};
602-
videoDurationData[0] = av_rescale_q(header.duration, AV_TIME_BASE_Q, avr);
600+
AVRational avr = AVRational{(int)header.num, (int)header.den};
601+
videoDurationData[0] = av_rescale_q(header.duration, timeBaseQ, avr);
603602

604603
VLOG(2) << "Prob fps: " << header.fps << ", duration: " << header.duration
605604
<< ", num: " << header.num << ", den: " << header.den;
@@ -631,8 +630,8 @@ torch::List<torch::Tensor> probeVideo(
631630

632631
audioDuration = torch::zeros({1}, torch::kLong);
633632
int64_t* audioDurationData = audioDuration.data_ptr<int64_t>();
634-
AVRational avr = {(int)header.num, (int)header.den};
635-
audioDurationData[0] = av_rescale_q(header.duration, AV_TIME_BASE_Q, avr);
633+
AVRational avr = AVRational{(int)header.num, (int)header.den};
634+
audioDurationData[0] = av_rescale_q(header.duration, timeBaseQ, avr);
636635

637636
VLOG(2) << "Prob sample rate: " << format.samples
638637
<< ", duration: " << header.duration << ", num: " << header.num

torchvision/io/_video_opt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def _validate_pts(pts_range):
8888
assert (
8989
pts_range[0] <= pts_range[1]
9090
), """Start pts should not be smaller than end pts, got
91-
start pts: %d and end pts: %d""" % (
91+
start pts: {0:d} and end pts: {1:d}""".format(
9292
pts_range[0],
9393
pts_range[1],
9494
)

0 commit comments

Comments
 (0)