pytorch · scotts · Mar 27, 2025 · Mar 7, 2025 · Mar 14, 2025 · Mar 14, 2025
diff --git a/setup.py b/setup.py
@@ -68,7 +68,7 @@ def run(self):
         super().run()
 
     def build_extension(self, ext):
-        """Call our CMake build system to build libtorchcodec?.so"""
+        """Call our CMake build system to build libtorchcodec*.so"""
         # Setuptools was designed to build one extension (.so file) at a time,
         # calling this method for each Extension object. We're using a
         # CMake-based build where all our extensions are built together at once.
@@ -136,21 +136,22 @@ def copy_extensions_to_source(self):
         This is called by setuptools at the end of .run() during editable installs.
         """
         self.get_finalized_command("build_py")
-        extension = ""
+        extensions = []
         if sys.platform == "linux":
-            extension = "so"
+            extensions = ["so"]
         elif sys.platform == "darwin":
-            extension = "dylib"
+            extensions = ["dylib", "so"]
         else:
             raise NotImplementedError(
                 "Platforms other than linux/darwin are not supported yet"
             )
 
-        for so_file in self._install_prefix.glob(f"*.{extension}"):
-            assert "libtorchcodec" in so_file.name
-            destination = Path("src/torchcodec/") / so_file.name
-            print(f"Copying {so_file} to {destination}")
-            self.copy_file(so_file, destination, level=self.verbose)
+        for ext in extensions:
+            for lib_file in self._install_prefix.glob(f"*.{ext}"):
+                assert "libtorchcodec" in lib_file.name
+                destination = Path("src/torchcodec/") / lib_file.name
+                print(f"Copying {lib_file} to {destination}")
+                self.copy_file(lib_file, destination, level=self.verbose)
 
 
 NOT_A_LICENSE_VIOLATION_VAR = "I_CONFIRM_THIS_IS_NOT_A_LICENSE_VIOLATION"

diff --git a/src/torchcodec/decoders/_core/AVIOBytesContext.cpp b/src/torchcodec/decoders/_core/AVIOBytesContext.cpp
@@ -0,0 +1,68 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include "src/torchcodec/decoders/_core/AVIOBytesContext.h"
+#include <torch/types.h>
+
+namespace facebook::torchcodec {
+
+AVIOBytesContext::AVIOBytesContext(const void* data, int64_t dataSize)
+    : dataContext_{static_cast<const uint8_t*>(data), dataSize, 0} {
+  TORCH_CHECK(data != nullptr, "Video data buffer cannot be nullptr!");
+  TORCH_CHECK(dataSize > 0, "Video data size must be positive");
+  createAVIOContext(&read, &seek, &dataContext_);
+}
+
+// The signature of this function is defined by FFMPEG.
+int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) {
+  auto dataContext = static_cast<DataContext*>(opaque);
+  TORCH_CHECK(
+      dataContext->current <= dataContext->size,
+      "Tried to read outside of the buffer: current=",
+      dataContext->current,
+      ", size=",
+      dataContext->size);
+
+  buf_size = FFMIN(
+      buf_size, static_cast<int>(dataContext->size - dataContext->current));
+  TORCH_CHECK(
+      buf_size >= 0,
+      "Tried to read negative bytes: buf_size=",
+      buf_size,
+      ", size=",
+      dataContext->size,
+      ", current=",
+      dataContext->current);
+
+  if (!buf_size) {
+    return AVERROR_EOF;
+  }
+  memcpy(buf, dataContext->data + dataContext->current, buf_size);
+  dataContext->current += buf_size;
+  return buf_size;
+}
+
+// The signature of this function is defined by FFMPEG.
+int64_t AVIOBytesContext::seek(void* opaque, int64_t offset, int whence) {
+  auto dataContext = static_cast<DataContext*>(opaque);
+  int64_t ret = -1;
+
+  switch (whence) {
+    case AVSEEK_SIZE:
+      ret = dataContext->size;
+      break;
+    case SEEK_SET:
+      dataContext->current = offset;
+      ret = offset;
+      break;
+    default:
+      break;
+  }
+
+  return ret;
+}
+
+} // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/AVIOBytesContext.h b/src/torchcodec/decoders/_core/AVIOBytesContext.h
@@ -0,0 +1,32 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include "src/torchcodec/decoders/_core/AVIOContextHolder.h"
+
+namespace facebook::torchcodec {
+
+// Enables users to pass in the entire video as bytes. Our read and seek
+// functions then traverse the bytes in memory.
+class AVIOBytesContext : public AVIOContextHolder {
+ public:
+  explicit AVIOBytesContext(const void* data, int64_t dataSize);
+
+ private:
+  struct DataContext {
+    const uint8_t* data;
+    int64_t size;
+    int64_t current;
+  };
+
+  static int read(void* opaque, uint8_t* buf, int buf_size);
+  static int64_t seek(void* opaque, int64_t offset, int whence);
+
+  DataContext dataContext_;
+};
+
+} // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/AVIOContextHolder.cpp b/src/torchcodec/decoders/_core/AVIOContextHolder.cpp
@@ -0,0 +1,50 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include "src/torchcodec/decoders/_core/AVIOContextHolder.h"
+#include <torch/types.h>
+
+namespace facebook::torchcodec {
+
+void AVIOContextHolder::createAVIOContext(
+    AVIOReadFunction read,
+    AVIOSeekFunction seek,
+    void* heldData,
+    int bufferSize) {
+  TORCH_CHECK(
+      bufferSize > 0,
+      "Buffer size must be greater than 0; is " + std::to_string(bufferSize));
+  auto buffer = static_cast<uint8_t*>(av_malloc(bufferSize));
+  TORCH_CHECK(
+      buffer != nullptr,
+      "Failed to allocate buffer of size " + std::to_string(bufferSize));
+
+  avioContext_.reset(avio_alloc_context(
+      buffer,
+      bufferSize,
+      0,
+      heldData,
+      read,
+      nullptr, // write function; not supported yet
+      seek));
+
+  if (!avioContext_) {
+    av_freep(&buffer);
+    TORCH_CHECK(false, "Failed to allocate AVIOContext");
+  }
+}
+
+AVIOContextHolder::~AVIOContextHolder() {
+  if (avioContext_) {
+    av_freep(&avioContext_->buffer);
+  }
+}
+
+AVIOContext* AVIOContextHolder::getAVIOContext() {
+  return avioContext_.get();
+}
+
+} // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/AVIOContextHolder.h b/src/torchcodec/decoders/_core/AVIOContextHolder.h
@@ -0,0 +1,65 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include "src/torchcodec/decoders/_core/FFMPEGCommon.h"
+
+namespace facebook::torchcodec {
+
+// The AVIOContextHolder serves several purposes:
+//
+//   1. It is a smart pointer for the AVIOContext. It has the logic to create
+//      a new AVIOContext and will appropriately free the AVIOContext when it
+//      goes out of scope. Note that this requires more than just having a
+//      UniqueAVIOContext, as the AVIOContext points to a buffer which must be
+//      freed.
+//   2. It is a base class for AVIOContext specializations. When specializing a
+//      AVIOContext, we need to provide four things:
+//        1. A read callback function.
+//        2. A seek callback function.
+//        3. A write callback function. (Not supported yet; it's for encoding.)
+//        4. A pointer to some context object that has the same lifetime as the
+//           AVIOContext itself. This context object holds the custom state that
+//           tracks the custom behavior of reading, seeking and writing. It is
+//           provided upon AVIOContext creation and to the read, seek and
+//           write callback functions.
+//      While it's not required, it is natural for the derived classes to make
+//      all of the above members. Base classes need to call
+//      createAVIOContext(), ideally in their constructor.
+//  3. A generic handle for those that just need to manage having access to an
+//     AVIOContext, but aren't necessarily concerned with how it was customized:
+//     typically, the VideoDecoder.
+class AVIOContextHolder {
+ public:
+  virtual ~AVIOContextHolder();
+  AVIOContext* getAVIOContext();
+
+ protected:
+  // Make constructor protected to prevent anyone from constructing
+  // an AVIOContextHolder without deriving it. (Ordinarily this would be
+  // enforced by having a pure virtual methods, but we don't have any.)
+  AVIOContextHolder() = default;
+
+  // These signatures are defined by FFmpeg.
+  using AVIOReadFunction = int (*)(void*, uint8_t*, int);
+  using AVIOSeekFunction = int64_t (*)(void*, int64_t, int);
+
+  // Deriving classes should call this function in their constructor.
+  void createAVIOContext(
+      AVIOReadFunction read,
+      AVIOSeekFunction seek,
+      void* heldData,
+      int bufferSize = defaultBufferSize);
+
+ private:
+  UniqueAVIOContext avioContext_;
+
+  // Defaults to 64 KB
+  static const int defaultBufferSize = 64 * 1024;
+};
+
+} // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/AVIOFileLikeContext.cpp b/src/torchcodec/decoders/_core/AVIOFileLikeContext.cpp
@@ -0,0 +1,68 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include "src/torchcodec/decoders/_core/AVIOFileLikeContext.h"
+#include <torch/types.h>
+
+namespace facebook::torchcodec {
+
+AVIOFileLikeContext::AVIOFileLikeContext(py::object fileLike)
+    : fileLike_{UniquePyObject(new py::object(fileLike))} {
+  {
+    // TODO: Is it necessary to acquire the GIL here? Is it maybe even
+    // harmful? At the moment, this is only called from within a pybind
+    // function, and pybind guarantees we have the GIL.
+    py::gil_scoped_acquire gil;
+    TORCH_CHECK(
+        py::hasattr(fileLike, "read"),
+        "File like object must implement a read method.");
+    TORCH_CHECK(
+        py::hasattr(fileLike, "seek"),
+        "File like object must implement a seek method.");
+  }
+  createAVIOContext(&read, &seek, &fileLike_);
+}
+
+int AVIOFileLikeContext::read(void* opaque, uint8_t* buf, int buf_size) {
+  auto fileLike = static_cast<UniquePyObject*>(opaque);
+
+  // Note that we acquire the GIL outside of the loop. This is likely more
+  // efficient than releasing and acquiring it each loop iteration.
+  py::gil_scoped_acquire gil;
+  int num_read = 0;
+  while (num_read < buf_size) {
+    int request = buf_size - num_read;
+    auto chunk = static_cast<std::string>(
+        static_cast<py::bytes>((*fileLike)->attr("read")(request)));
+    int chunk_len = static_cast<int>(chunk.length());
+    if (chunk_len == 0) {
+      break;
+    }
+    TORCH_CHECK(
+        chunk_len <= request,
+        "Requested up to ",
+        request,
+        " bytes but, received ",
+        chunk_len,
+        " bytes. The given object does not conform to read protocol of file object.");
+    memcpy(buf, chunk.data(), chunk_len);
+    buf += chunk_len;
+    num_read += chunk_len;
+  }
+  return num_read == 0 ? AVERROR_EOF : num_read;
+}
+
+int64_t AVIOFileLikeContext::seek(void* opaque, int64_t offset, int whence) {
+  // We do not know the file size.
+  if (whence == AVSEEK_SIZE) {
+    return AVERROR(EIO);
+  }
+  auto fileLike = static_cast<UniquePyObject*>(opaque);
+  py::gil_scoped_acquire gil;
+  return py::cast<int64_t>((*fileLike)->attr("seek")(offset, whence));
+}
+
+} // namespace facebook::torchcodec
diff --git a/src/torchcodec/decoders/_core/AVIOFileLikeContext.h b/src/torchcodec/decoders/_core/AVIOFileLikeContext.h
@@ -0,0 +1,54 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+// All rights reserved.
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "src/torchcodec/decoders/_core/AVIOContextHolder.h"
+
+namespace py = pybind11;
+
+namespace facebook::torchcodec {
+
+// Enables uers to pass in a Python file-like object. We then forward all read
+// and seek calls back up to the methods on the Python object.
+class AVIOFileLikeContext : public AVIOContextHolder {
+ public:
+  explicit AVIOFileLikeContext(py::object fileLike);
+
+ private:
+  static int read(void* opaque, uint8_t* buf, int buf_size);
+  static int64_t seek(void* opaque, int64_t offset, int whence);
+
+  // Note that we dynamically allocate the Python object because we need to
+  // strictly control when its destructor is called. We must hold the GIL
+  // when its destructor gets called, as it needs to update the reference
+  // count. It's easiest to control that when it's dynamic memory. Otherwise,
+  // we'd have to ensure whatever enclosing scope holds the object has the GIL,
+  // and that's, at least, hard. For all of the common pitfalls, see:
+  //
+  //   https://pybind11.readthedocs.io/en/stable/advanced/misc.html#common-sources-of-global-interpreter-lock-errors
+  //
+  // We maintain a reference to the file-like object because the file-like
+  // object that was created on the Python side must live as long as our
+  // potential use. That is, even if there are no more references to the object
+  // on the Python side, we require that the object is still live.
+  struct PyObjectDeleter {
+    inline void operator()(py::object* obj) const {
+      if (obj) {
+        py::gil_scoped_acquire gil;
+        delete obj;
+      }
+    }
+  };
+
+  using UniquePyObject = std::unique_ptr<py::object, PyObjectDeleter>;
+  UniquePyObject fileLike_;
+};
+
+} // namespace facebook::torchcodec