Skip to content

Add AudioEffector #3163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions docs/source/_templates/autosummary/io_class.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ Methods
not item.startswith('_')
and item not in inherited_members
and item not in attributes
and item != "CodecConfig"
%}

{{ item | underline("~") }}
Expand All @@ -56,11 +55,12 @@ Methods
{%- endif %}


{%- if name == "StreamReader" %}
{%- if name in ["StreamReader", "StreamWriter"] %}

Support Structures
------------------

{%- if name == "StreamReader" %}
{%- for item in [
"ChunkTensor",
"SourceStream",
Expand All @@ -77,15 +77,14 @@ Support Structures
:members:

{%- endfor %}
{%- elif name == "StreamWriter" %}

Support Structures
------------------
{%- elif name == "StreamWriter" %}

CodecConfig
~~~~~~~~~~~

.. autoclass:: torchaudio.io::StreamWriter.CodecConfig()
.. autoclass:: torchaudio.io::CodecConfig
:members:

{%- endif %}
{%- endif %}
1 change: 1 addition & 0 deletions docs/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ torchaudio.io

StreamReader
StreamWriter
AudioEffector
play_audio

.. rubric:: Tutorials using ``torchaudio.io``
Expand Down
16 changes: 16 additions & 0 deletions test/torchaudio_unittest/io/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import torchaudio


# If FFmpeg is 4.1 or older
# Tests that checks the number of output samples from OPUS fails
# They work on 4.2+
# Probably this commit fixed it.
# https://github.com/FFmpeg/FFmpeg/commit/18aea7bdd96b320a40573bccabea56afeccdd91c
def lt42():
ver = torchaudio.utils.ffmpeg_utils.get_versions()["libavcodec"]
# 5.1 libavcodec 59. 18.100
# 4.4 libavcodec 58.134.100
# 4.3 libavcodec 58. 91.100
# 4.2 libavcodec 58. 54.100
# 4.1 libavcodec 58. 35.100
return ver[0] < 59 and ver[1] < 54
82 changes: 82 additions & 0 deletions test/torchaudio_unittest/io/effector_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from parameterized import parameterized

from torchaudio.io import AudioEffector
from torchaudio_unittest.common_utils import get_sinusoid, skipIfNoFFmpeg, TorchaudioTestCase

from .common import lt42


@skipIfNoFFmpeg
class EffectorTest(TorchaudioTestCase):
def test_null(self):
"""No effect and codec will return the same result"""
sample_rate = 8000
frames_per_chunk = 256

effector = AudioEffector(effect=None, format=None)
original = get_sinusoid(n_channels=3, sample_rate=sample_rate, channels_first=False)

# one-go
output = effector.apply(original, sample_rate)
self.assertEqual(original, output)
# streaming
for i, chunk in enumerate(effector.stream(original, sample_rate, frames_per_chunk)):
start = i * frames_per_chunk
end = (i + 1) * frames_per_chunk
self.assertEqual(original[start:end, :], chunk)

@parameterized.expand(
[
("ogg", "flac"), # flac only supports s16 and s32
("ogg", "opus"), # opus only supports 48k Hz
("ogg", "vorbis"), # vorbis only supports stereo
("wav", None),
("wav", "pcm_u8"),
("mp3", None),
]
)
def test_formats(self, format, encoder):
"""Formats (some with restrictions) just work without an issue in effector"""
sample_rate = 8000

effector = AudioEffector(format=format, encoder=encoder)
original = get_sinusoid(n_channels=3, sample_rate=sample_rate, channels_first=False)

output = effector.apply(original, sample_rate)

# On 4.1 OPUS produces 8020 samples (extra 20)
# this has been fixed on 4.2+
if encoder == "opus" and lt42():
return

self.assertEqual(original.shape, output.shape)

# Note
# MP3 adds padding which cannot be removed when the encoded data is written to
# file-like object without seek method.
# The number of padding is retrievable as `AVCoedcContext::initial_padding`
# https://ffmpeg.org/doxygen/4.1/structAVCodecContext.html#a8f95550ce04f236e9915516d04d3d1ab
# but this is not exposed yet.
# These "priming" samples have negative time stamp, so we can also add logic
# to discard them at decoding, however, as far as I checked, when data is loaded
# with StreamReader, the time stamp is reset. I tried options like avoid_negative_ts,
# https://ffmpeg.org/ffmpeg-formats.html
# but it made no difference. Perhaps this is because the information about negative
# timestamp is only available at encoding side, and it presumably is written to
# header file, but it is not happening somehow with file-like object.
# Need to investigate more to remove MP3 padding
if format == "mp3":
return

for chunk in effector.stream(original, sample_rate, frames_per_chunk=original.size(0)):
self.assertEqual(original.shape, chunk.shape)

@parameterized.expand([("loudnorm=I=-16:LRA=11:TP=-1.5",), ("volume=2",)])
def test_effect(self, effect):
sample_rate = 8000

effector = AudioEffector(effect=effect)
original = get_sinusoid(n_channels=3, sample_rate=sample_rate, channels_first=False)

output = effector.apply(original, sample_rate)
self.assertEqual(original.shape, output.shape)
26 changes: 9 additions & 17 deletions test/torchaudio_unittest/io/stream_writer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
TempDirMixin,
TorchaudioTestCase,
)
from .common import lt42

if is_ffmpeg_available():
from torchaudio.io import StreamReader, StreamWriter
from torchaudio.io import CodecConfig, StreamReader, StreamWriter


def get_audio_chunk(fmt, sample_rate, num_channels):
Expand Down Expand Up @@ -380,20 +381,11 @@ def test_audio_num_frames_lossy(self, ext, num_channels, sample_rate):
s.process_all_packets()
(saved,) = s.pop_chunks()

# This test fails for OPUS if FFmpeg is 4.1, but it passes for 4.2+
# 4.1 produces 48312 samples (extra 312)
# Probably this commit fixes it.
# https://github.com/FFmpeg/FFmpeg/commit/18aea7bdd96b320a40573bccabea56afeccdd91c
# TODO: issue warning if 4.1?
if ext == "opus":
ver = torchaudio.utils.ffmpeg_utils.get_versions()["libavcodec"]
# 5.1 libavcodec 59. 18.100
# 4.4 libavcodec 58.134.100
# 4.3 libavcodec 58. 91.100
# 4.2 libavcodec 58. 54.100
# 4.1 libavcodec 58. 35.100
if ver[0] < 59 and ver[1] < 54:
return
# On 4.1 OPUS produces 48312 samples (extra 312)
# this has been fixed on 4.2+
# TODO: issue warning if on 4.1?
if ext == "opus" and lt42():
return
self.assertEqual(saved.shape, data.shape)

def test_preserve_fps(self):
Expand Down Expand Up @@ -534,7 +526,7 @@ def test_codec_config(self):
# Write data
dst = self.get_temp_path(filename)
writer = torchaudio.io.StreamWriter(dst=dst, format=ext)
codec_config = torchaudio.io.StreamWriter.CodecConfig(bit_rate=198_000, compression_level=3)
codec_config = CodecConfig(bit_rate=198_000, compression_level=3)
writer.add_audio_stream(sample_rate=sample_rate, num_channels=num_channels, codec_config=codec_config)

audio = torch.zeros((8000, 2))
Expand All @@ -553,7 +545,7 @@ def write_audio(buffer, bit_rate):
writer.add_audio_stream(
sample_rate=sample_rate,
num_channels=num_channels,
codec_config=torchaudio.io.StreamWriter.CodecConfig(bit_rate=bit_rate),
codec_config=CodecConfig(bit_rate=bit_rate),
)

with writer.open():
Expand Down
5 changes: 4 additions & 1 deletion torchaudio/io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from ._effector import AudioEffector
from ._playback import play_audio
from ._stream_reader import StreamReader
from ._stream_writer import StreamWriter
from ._stream_writer import CodecConfig, StreamWriter


__all__ = [
"AudioEffector",
"StreamReader",
"StreamWriter",
"CodecConfig",
"play_audio",
]
Loading