Skip to content

Commit 6c018c8

Browse files
committed
wip
1 parent 9133f2a commit 6c018c8

File tree

12 files changed

+74
-21
lines changed

12 files changed

+74
-21
lines changed

torchaudio/csrc/ffmpeg/chunk.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#pragma once
2+
3+
#include<torch/types.h>
4+
5+
namespace torchaudio::io {
6+
7+
/// Stores decoded frames and metadata
8+
struct Chunk {
9+
/// Audio/video frames.
10+
///
11+
/// For audio, the shape is ``[time, num_channels]``, and the ``dtype``
12+
/// depends on output stream configurations.
13+
///
14+
/// For video, the shape is ``[time, channel, height, width]``, and
15+
/// the ``dtype`` is ``torch.uint8``.
16+
torch::Tensor frames;
17+
///
18+
/// Presentation time stamp of the first frame, in second.
19+
double pts;
20+
};
21+
22+
} // namespace torchaudio::io

torchaudio/csrc/ffmpeg/pybind/pybind.cpp

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <torch/extension.h>
2+
#include <torchaudio/csrc/ffmpeg/chunk.h>
23
#include <torchaudio/csrc/ffmpeg/pybind/fileobj.h>
34
#include <torchaudio/csrc/ffmpeg/stream_reader/stream_reader.h>
45
#include <torchaudio/csrc/ffmpeg/stream_writer/stream_writer.h>
@@ -40,8 +41,18 @@ PYBIND11_MODULE(_torchaudio_ffmpeg, m) {
4041
.def("add_video_stream", &StreamWriter::add_video_stream)
4142
.def("dump_format", &StreamWriter::dump_format)
4243
.def("open", &StreamWriter::open)
43-
.def("write_audio_chunk", &StreamWriter::write_audio_chunk)
44-
.def("write_video_chunk", &StreamWriter::write_video_chunk)
44+
.def("write_audio_chunk",
45+
py::overload_cast<int, const torch::Tensor&>(
46+
&StreamWriter::write_audio_chunk))
47+
.def("write_audio_chunk",
48+
py::overload_cast<int, const Chunk&>(
49+
&StreamWriter::write_audio_chunk))
50+
.def("write_video_chunk",
51+
py::overload_cast<int, const torch::Tensor&>(
52+
&StreamWriter::write_video_chunk))
53+
.def("write_video_chunk",
54+
py::overload_cast<int, const Chunk&>(
55+
&StreamWriter::write_video_chunk))
4556
.def("flush", &StreamWriter::flush)
4657
.def("close", &StreamWriter::close);
4758
py::class_<StreamWriterFileObj>(m, "StreamWriterFileObj", py::module_local())
@@ -51,8 +62,18 @@ PYBIND11_MODULE(_torchaudio_ffmpeg, m) {
5162
.def("add_video_stream", &StreamWriterFileObj::add_video_stream)
5263
.def("dump_format", &StreamWriterFileObj::dump_format)
5364
.def("open", &StreamWriterFileObj::open)
54-
.def("write_audio_chunk", &StreamWriterFileObj::write_audio_chunk)
55-
.def("write_video_chunk", &StreamWriterFileObj::write_video_chunk)
65+
.def("write_audio_chunk",
66+
py::overload_cast<int, const torch::Tensor&>(
67+
&StreamWriterFileObj::write_audio_chunk))
68+
.def("write_audio_chunk",
69+
py::overload_cast<int, const Chunk&>(
70+
&StreamWriterFileObj::write_audio_chunk))
71+
.def("write_video_chunk",
72+
py::overload_cast<int, const torch::Tensor&>(
73+
&StreamWriterFileObj::write_video_chunk))
74+
.def("write_video_chunk",
75+
py::overload_cast<int, const Chunk&>(
76+
&StreamWriterFileObj::write_video_chunk))
5677
.def("flush", &StreamWriterFileObj::flush)
5778
.def("close", &StreamWriterFileObj::close);
5879
py::class_<OutputStreamInfo>(m, "OutputStreamInfo", py::module_local())

torchaudio/csrc/ffmpeg/stream_reader/buffer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22
#include <torch/torch.h>
3+
#include <torchaudio/csrc/ffmpeg/chunk.h>
34
#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
45
#include <torchaudio/csrc/ffmpeg/stream_reader/typedefs.h>
56

torchaudio/csrc/ffmpeg/stream_reader/stream_reader.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#pragma once
2+
#include <torchaudio/csrc/ffmpeg/chunk.h>
23
#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
34
#include <torchaudio/csrc/ffmpeg/stream_reader/decoder.h>
45
#include <torchaudio/csrc/ffmpeg/stream_reader/stream_processor.h>

torchaudio/csrc/ffmpeg/stream_reader/typedefs.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -106,20 +106,5 @@ struct OutputStreamInfo {
106106
std::string filter_description;
107107
};
108108

109-
/// Stores decoded frames and metadata
110-
struct Chunk {
111-
/// Audio/video frames.
112-
///
113-
/// For audio, the shape is ``[time, num_channels]``, and the ``dtype``
114-
/// depends on output stream configurations.
115-
///
116-
/// For video, the shape is ``[time, channel, height, width]``, and
117-
/// the ``dtype`` is ``torch.uint8``.
118-
torch::Tensor frames;
119-
///
120-
/// Presentation time stamp of the first frame, in second.
121-
double pts;
122-
};
123-
124109
} // namespace io
125110
} // namespace torchaudio

torchaudio/csrc/ffmpeg/stream_writer/audio_output_stream.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ AudioOutputStream::AudioOutputStream(
4242
converter(src_fmt, codec_ctx_),
4343
codec_ctx(std::move(codec_ctx_)) {}
4444

45+
void AudioOutputStream::write_chunk(const Chunk& chunk) {
46+
write_chunk(chunk.frames);
47+
}
48+
4549
void AudioOutputStream::write_chunk(const torch::Tensor& waveform) {
4650
AVRational time_base{1, codec_ctx->sample_rate};
4751
for (const auto& frame : converter.convert(waveform)) {

torchaudio/csrc/ffmpeg/stream_writer/audio_output_stream.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ struct AudioOutputStream : OutputStream {
1515
AVCodecContextPtr&& codec_ctx);
1616

1717
void write_chunk(const torch::Tensor& waveform) override;
18+
void write_chunk(const Chunk& chunk) override;
1819
~AudioOutputStream() override = default;
1920
};
2021

torchaudio/csrc/ffmpeg/stream_writer/output_stream.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include <torch/types.h>
4+
#include <torchaudio/csrc/ffmpeg/chunk.h>
45
#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
56
#include <torchaudio/csrc/ffmpeg/filter_graph.h>
67
#include <torchaudio/csrc/ffmpeg/stream_writer/encoder.h>
@@ -23,6 +24,7 @@ struct OutputStream {
2324
FilterGraph&& filter);
2425

2526
virtual void write_chunk(const torch::Tensor& input) = 0;
27+
virtual void write_chunk(const Chunk& chunk) = 0;
2628
void process_frame(AVFrame* src);
2729
void flush();
2830
virtual ~OutputStream() = default;

torchaudio/csrc/ffmpeg/stream_writer/stream_writer.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,11 +587,19 @@ void StreamWriter::write_audio_chunk(int i, const torch::Tensor& waveform) {
587587
streams[i]->write_chunk(waveform);
588588
}
589589

590+
void StreamWriter::write_audio_chunk(int i, const Chunk& chunk) {
591+
write_audio_chunk(i, chunk.frames);
592+
}
593+
590594
void StreamWriter::write_video_chunk(int i, const torch::Tensor& frames) {
591595
validate_stream(i, AVMEDIA_TYPE_VIDEO);
592596
streams[i]->write_chunk(frames);
593597
}
594598

599+
void StreamWriter::write_video_chunk(int i, const Chunk& chunk) {
600+
write_video_chunk(i, chunk.frames);
601+
}
602+
595603
void StreamWriter::flush() {
596604
for (auto& os : streams) {
597605
os->flush();

torchaudio/csrc/ffmpeg/stream_writer/stream_writer.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include <torch/torch.h>
4+
#include <torchaudio/csrc/ffmpeg/chunk.h>
45
#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
56
#include <torchaudio/csrc/ffmpeg/filter_graph.h>
67
#include <torchaudio/csrc/ffmpeg/stream_writer/output_stream.h>
@@ -161,14 +162,16 @@ class StreamWriter {
161162
/// @param i Stream index.
162163
/// @param chunk Waveform tensor. Shape: ``(frame, channel)``.
163164
/// The ``dtype`` must match what was passed to ``add_audio_stream()`` method.
164-
void write_audio_chunk(int i, const torch::Tensor& chunk);
165+
void write_audio_chunk(int i, const torch::Tensor& frames);
166+
void write_audio_chunk(int i, const Chunk& chunk);
165167
/// Write video data
166168
/// @param i Stream index.
167169
/// @param chunk Video/image tensor. Shape: ``(time, channel, height,
168170
/// width)``. The ``dtype`` must be ``torch.uint8``. The shape ``(height,
169171
/// width and the number of channels)`` must match what was configured when
170172
/// calling ``add_video_stream()``.
171-
void write_video_chunk(int i, const torch::Tensor& chunk);
173+
void write_video_chunk(int i, const torch::Tensor& frames);
174+
void write_video_chunk(int i, const Chunk& chunk);
172175
/// Flush the frames from encoders and write the frames to the destination.
173176
void flush();
174177

torchaudio/csrc/ffmpeg/stream_writer/video_output_stream.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ VideoOutputStream::VideoOutputStream(
5050
hw_frame_ctx(std::move(hw_frame_ctx_)),
5151
codec_ctx(std::move(codec_ctx_)) {}
5252

53+
void VideoOutputStream::write_chunk(const Chunk& chunk) {
54+
write_chunk(chunk.frames);
55+
}
56+
5357
void VideoOutputStream::write_chunk(const torch::Tensor& frames) {
5458
for (const auto& frame : converter.convert(frames)) {
5559
process_frame(frame);

torchaudio/csrc/ffmpeg/stream_writer/video_output_stream.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ struct VideoOutputStream : OutputStream {
1818
AVBufferRefPtr&& hw_frame_ctx);
1919

2020
void write_chunk(const torch::Tensor& frames) override;
21+
void write_chunk(const Chunk& chunk) override;
2122

2223
~VideoOutputStream() override = default;
2324
};

0 commit comments

Comments
 (0)