Skip to content

Commit c76fd58

Browse files
mthrokfacebook-github-bot
authored andcommitted
Reuse HW device context in GPU encoder (#3215)
Summary: In #3178, a mechanism to cache HW device context was introduced. This commit applies the reuse in StreamWriter, so that when using GPU video decoding and encoding, they are shared. This gives back about 250 - 300 MB of GPU memory. --- Q: What is HW device context? From https://ffmpeg.org/doxygen/4.1/structAVHWDeviceContext.html#details > This struct aggregates all the (hardware/vendor-specific) "high-level" state, i.e. > > state that is not tied to a concrete processing configuration. E.g., in an API that supports hardware-accelerated encoding and decoding, this struct will (if possible) wrap the state that is common to both encoding and decoding and from which specific instances of encoders or decoders can be derived. Pull Request resolved: #3215 Reviewed By: nateanl Differential Revision: D44504051 Pulled By: mthrok fbshipit-source-id: 77579cdc8bd9e9b8a218e3f29031d091cda83860
1 parent c07a96a commit c76fd58

File tree

2 files changed

+7
-10
lines changed

2 files changed

+7
-10
lines changed

torchaudio/csrc/ffmpeg/stream_reader/stream_processor.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ void configure_codec_context(
122122
// will retrieve the HW pixel format from opaque pointer.
123123
codec_ctx->get_format = get_hw_format;
124124
codec_ctx->hw_device_ctx = av_buffer_ref(get_cuda_context(device.index()));
125+
TORCH_INTERNAL_ASSERT(
126+
codec_ctx->hw_device_ctx, "Failed to reference HW device context.");
125127
#endif
126128
}
127129
}

torchaudio/csrc/ffmpeg/stream_writer/encode_process.cpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <torchaudio/csrc/ffmpeg/hw_context.h>
12
#include <torchaudio/csrc/ffmpeg/stream_writer/encode_process.h>
23

34
namespace torchaudio::io {
@@ -460,15 +461,9 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) {
460461
// context to AVCodecContext. But this way, it will be deallocated
461462
// automatically at the time AVCodecContext is freed, so we do that.
462463

463-
int ret = av_hwdevice_ctx_create(
464-
&ctx->hw_device_ctx,
465-
AV_HWDEVICE_TYPE_CUDA,
466-
std::to_string(device.index()).c_str(),
467-
nullptr,
468-
0);
469-
TORCH_CHECK(
470-
ret >= 0, "Failed to create CUDA device context: ", av_err2string(ret));
471-
assert(ctx->hw_device_ctx);
464+
ctx->hw_device_ctx = av_buffer_ref(get_cuda_context(device.index()));
465+
TORCH_INTERNAL_ASSERT(
466+
ctx->hw_device_ctx, "Failed to reference HW device context.");
472467

473468
ctx->sw_pix_fmt = ctx->pix_fmt;
474469
ctx->pix_fmt = AV_PIX_FMT_CUDA;
@@ -483,7 +478,7 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) {
483478
frames_ctx->height = ctx->height;
484479
frames_ctx->initial_pool_size = 5;
485480

486-
ret = av_hwframe_ctx_init(ctx->hw_frames_ctx);
481+
int ret = av_hwframe_ctx_init(ctx->hw_frames_ctx);
487482
TORCH_CHECK(
488483
ret >= 0,
489484
"Failed to initialize CUDA frame context: ",

0 commit comments

Comments
 (0)