fix

mthrok · mthrok · commit c553272f110d · 2023-03-06T15:59:01.000-05:00
diff --git a/test/torchaudio_unittest/io/stream_writer_test.py b/test/torchaudio_unittest/io/stream_writer_test.py
@@ -1,3 +1,5 @@
+import math
+
 import torch
 import torchaudio
 
@@ -420,25 +422,38 @@ def test_audio_pts_increment(self):
             print(chunk.pts, expected)
             assert abs(chunk.pts - expected) < 1e-10
 
-    def test_video_pts_overwrite(self):
+    @parameterized.expand([
+        (10, 100),
+        (15, 150),
+        (24, 240),
+        (25, 200),
+        (30, 300),
+        (50, 500),
+        (60, 600),
+        # PTS value conversion involves float <-> int conversion, which can
+        # introduce rounding error.
+        # This test is a spot-check for popular 29.97 Hz
+        (30000/1001, 10010),
+    ])
+    def test_video_pts_overwrite(self, frame_rate, num_frames):
         """Can overwrite PTS"""
 
         ext = "mp4"
-        num_frames = 256
         filename = f"test.{ext}"
-        frame_rate = 10
-        width, height = 96, 128
+        width, height = 8, 8
 
         # Write data
         dst = self.get_dst(filename)
         writer = torchaudio.io.StreamWriter(dst=dst, format=ext)
         writer.add_video_stream(frame_rate=frame_rate, width=width, height=height)
 
-        video = torch.randint(256, (num_frames, 3, height, width), dtype=torch.uint8)
-        reference_pts = [2 * i / frame_rate for i in range(num_frames)]
+        video = torch.zeros((1, 3, height, width), dtype=torch.uint8)
+        reference_pts = []
         with writer.open():
-            for i, pts in enumerate(reference_pts):
-                writer.write_video_chunk(0, video[i : i + 1], pts)
+            for i in range(num_frames):
+                pts = i / frame_rate
+                reference_pts.append(pts)
+                writer.write_video_chunk(0, video, pts)
 
         # check
         if self.test_fileobj:
@@ -450,4 +465,7 @@ def test_video_pts_overwrite(self):
         assert len(pts) == len(reference_pts)
 
         for val, ref in zip(pts, reference_pts):
-            assert val == ref
+            # torch provides isclose, but we don't know if converting floats to tensor
+            # could introduce a descrepancy, so we compare floats and use math.isclose
+            # for that.
+            assert math.isclose(val, ref)
diff --git a/torchaudio/csrc/ffmpeg/CMakeLists.txt b/torchaudio/csrc/ffmpeg/CMakeLists.txt
@@ -16,14 +16,10 @@ set(
   stream_reader/sink.cpp
   stream_reader/stream_processor.cpp
   stream_reader/stream_reader.cpp
+  stream_writer/encode_process.cpp
   stream_writer/encoder.cpp
-  stream_writer/converter.cpp
-  stream_writer/output_stream.cpp
-  stream_writer/audio_converter.cpp
-  stream_writer/audio_output_stream.cpp
-  stream_writer/video_converter.cpp
-  stream_writer/video_output_stream.cpp
   stream_writer/stream_writer.cpp
+  stream_writer/tensor_converter.cpp
   compat.cpp
   utils.cpp
   )
diff --git a/torchaudio/csrc/ffmpeg/stream_writer/encode_process.cpp b/torchaudio/csrc/ffmpeg/stream_writer/encode_process.cpp
@@ -525,8 +525,8 @@ void EncodeProcess::process(
 
   AVRational codec_tb = codec_ctx->time_base;
   if (pts) {
-    src_frame->pts =
-        static_cast<int64_t>(pts.value() * codec_tb.den / codec_tb.num);
+    double pts_val = pts.value() * codec_tb.den / codec_tb.num;
+    src_frame->pts = static_cast<int64_t>(std::round(pts_val));
   }
   for (const auto& frame : converter.convert(tensor)) {
     process_frame(frame);