meta-pytorch · PaliC · Aug 2, 2022
diff --git a/multipy/__init__.py b/multipy/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/multipy/package/__init__.py b/multipy/package/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from .analyze.is_from_package import is_from_package  # noqae
+from .analyze.is_from_package import is_from_package  # noqa
 from .file_structure_representation import Directory  # noqa
 from .glob_group import GlobGroup  # noqa
 from .importer import (  # noqa

diff --git a/multipy/runtime/CMakeLists.txt b/multipy/runtime/CMakeLists.txt
@@ -25,6 +25,7 @@ SET(INTERPRETER_DIR "${DEPLOY_DIR}/interpreter" PARENT_SCOPE)
 
 set(DEPLOY_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
 set(PYTORCH_ROOT "${DEPLOY_DIR}/third-party/pytorch")
+get_filename_component(MULTIPY_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
 
 add_subdirectory(interpreter)
 add_subdirectory(third-party/fmt)

diff --git a/multipy/runtime/environment.h b/multipy/runtime/environment.h
@@ -24,25 +24,36 @@ class Environment {
   // all zipped python libraries will be written
   // under this directory
   std::string extraPythonLibrariesDir_;
-  void setupZippedPythonModules(const std::string& pythonAppDir) {
-#ifdef FBCODE_CAFFE2
+  std::string getZippedArchive(
+      const char* zipped_torch_name,
+      const std::string& pythonAppDir) {
+    std::string execPath;
+    std::ifstream("/proc/self/cmdline") >> execPath;
+    ElfFile elfFile(execPath.c_str());
     // load the zipped torch modules
-    constexpr const char* ZIPPED_TORCH_NAME = ".torch_python_modules";
-    auto zippedTorchSection = searchForSection(ZIPPED_TORCH_NAME);
+    auto zippedTorchSection = elfFile.findSection(zipped_torch_name);
     MULTIPY_CHECK(
         zippedTorchSection.has_value(), "Missing the zipped torch section");
     const char* zippedTorchStart = zippedTorchSection->start;
     auto zippedTorchSize = zippedTorchSection->len;
 
-    std::string zipArchive =
-        std::string(pythonAppDir) + "/torch_python_modules.zip";
+    std::string zipArchive = pythonAppDir;
     auto zippedFile = fopen(zipArchive.c_str(), "wb");
     MULTIPY_CHECK(
         zippedFile != nullptr, "Fail to create file: ", strerror(errno));
     fwrite(zippedTorchStart, 1, zippedTorchSize, zippedFile);
     fclose(zippedFile);
+    return zipArchive;
+  }
+  void setupZippedPythonModules(const std::string& pythonAppDir) {
+#ifdef FBCODE_CAFFE2
+    extraPythonPaths_.push_back(getZippedArchive(
+        ".torch_python_modules",
+        std::string(pythonAppDir) + "/torch_python_modules.zip"));
+    extraPythonPaths_.push_back(getZippedArchive(
+        ".multipy_python_modules",
+        std::string(pythonAppDir) + "/multipy_python_modules.zip"));
 
-    extraPythonPaths_.push_back(zipArchive);
 #endif
     extraPythonLibrariesDir_ = pythonAppDir;
   }

diff --git a/multipy/runtime/interpreter/CMakeLists.txt b/multipy/runtime/interpreter/CMakeLists.txt
@@ -93,7 +93,8 @@ add_custom_command(
    OUTPUT ${FROZEN_FILES}
    WORKING_DIRECTORY ${INTERPRETER_DIR}
    COMMAND mkdir -p ${FROZEN_DIR}
-   COMMAND ${PYTHON_BIN} ${MULTIPY_UTILS}/_freeze.py ${PYTHON_STDLIB_DIR} ${TYPING_PKG} ${PYTORCH_ROOT}/torch --oss --install_dir ${FROZEN_DIR} --verbose
+   COMMAND ${PYTHON_BIN} ${MULTIPY_UTILS}/_freeze.py
+   ${MULTIPY_DIR} ${PYTHON_STDLIB_DIR} ${TYPING_PKG} ${PYTORCH_ROOT}/torch --oss --install_dir ${FROZEN_DIR} --verbose
    DEPENDS cpython typing ${PYTORCH_PYTHON_SOURCE_FILES}
    VERBATIM
 )

diff --git a/multipy/runtime/interpreter/interpreter_impl.cpp b/multipy/runtime/interpreter/interpreter_impl.cpp
@@ -49,6 +49,7 @@ import _ssl # must come before _hashlib otherwise ssl's locks will be set to a P
 import sys
 import importlib.abc
 import linecache
+from zipfile import ZipFile
 
 class RegisterModuleImporter(importlib.abc.InspectLoader):
     def __init__(self, find_module_source):
@@ -79,7 +80,10 @@ class RegisterModuleImporter(importlib.abc.InspectLoader):
 # print("executable:", sys.executable)
 # print("path:", sys.path)
 # print("prefix:", sys.prefix)
+# print("modules:", sys.modules)
+
 import torch # has to be done serially otherwise things will segfault
+import multipy.utils
 try:
   import torch.version # for some reason torch doesn't import this and cuda fails?
 except ModuleNotFoundError:
@@ -203,10 +207,15 @@ struct __attribute__((visibility("hidden"))) ConcreteInterpreterImpl
 
     // we cache these so we don't have to repeat the conversion of strings into
     // Python and hash table lookups to get to these object
+    saveStorage = global_impl("multipy.utils._deploy", "_save_storages");
+    loadStorage = global_impl("multipy.utils._deploy", "_load_storages");
+    getPackage = global_impl("multipy.utils._deploy", "_get_package");
+    objects = global_impl("multipy.utils._deploy", "_deploy_objects");
     saveStorage = global_impl("torch._deploy", "_save_storages");
     loadStorage = global_impl("torch._deploy", "_load_storages");
     getPackage = global_impl("torch._deploy", "_get_package");
     objects = global_impl("torch._deploy", "_deploy_objects");
+
     // Release the GIL that PyInitialize acquires
     PyEval_SaveThread();
   }

diff --git a/multipy/utils/__init__.py b/multipy/utils/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/multipy/utils/_deploy.py b/multipy/utils/_deploy.py
@@ -0,0 +1,115 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import io
+
+import multipy.package
+
+import torch
+from multipy.package import Importer, OrderedImporter, PackageImporter, sys_importer
+from multipy.package._package_pickler import create_pickler
+from multipy.package._package_unpickler import PackageUnpickler
+from torch.serialization import _maybe_decode_ascii
+
+
+def _save_storages(importer, obj):
+    serialized_storages = []
+    serialized_dtypes = []
+
+    importer = (
+        importer if isinstance(importer, multipy.package.PackageImporter) else None
+    )
+    importers: Importer
+    if importer is not None:
+        importers = OrderedImporter(importer, sys_importer)
+    else:
+        importers = sys_importer
+
+    def persistent_id(obj):
+        if torch.is_storage(obj) or isinstance(obj, torch.storage._TypedStorage):
+            if isinstance(obj, torch.storage._TypedStorage):
+                # TODO: Once we decide to break serialization FC, we can
+                # remove this case
+                storage = obj._storage
+                dtype = obj.dtype
+            else:
+                storage = obj
+                dtype = torch.uint8
+
+            serialized_storages.append(obj)
+            serialized_dtypes.append(dtype)
+            return ("storage", len(serialized_storages) - 1)
+
+        if hasattr(obj, "__reduce_deploy__"):
+            if _serialized_reduces.get(id(obj)) is None:
+                _serialized_reduces[id(obj)] = (
+                    "reduce_deploy",
+                    id(obj),
+                    *obj.__reduce_deploy__(importers),
+                )
+            return _serialized_reduces[id(obj)]
+
+        return None
+
+    # Write the pickle data for `obj`
+    data_buf = io.BytesIO()
+    pickler = create_pickler(data_buf, importers)
+    pickler.persistent_id = persistent_id
+    pickler.dump(obj)
+    data_value = data_buf.getvalue()
+    return (
+        data_value,
+        serialized_storages,
+        serialized_dtypes,
+        importer.zip_reader if importer else None,
+    )
+
+
+def _load_storages(id, zip_reader, obj_bytes, serialized_storages, serialized_dtypes):
+    def persistent_load(saved_id):
+        assert isinstance(saved_id, tuple)
+        typename = _maybe_decode_ascii(saved_id[0])
+        data = saved_id[1:]
+
+        if typename == "storage":
+            # TODO: Once we decide to break serialization FC, we can
+            # stop wrapping with _TypedStorage
+            storage = serialized_storages[data[0]]
+            dtype = serialized_dtypes[data[0]]
+            return torch.storage._TypedStorage(
+                wrap_storage=storage._untyped(), dtype=dtype
+            )
+
+        if typename == "reduce_deploy":
+            reduce_id, func, args = data
+            if reduce_id not in _loaded_reduces:
+                _loaded_reduces[reduce_id] = func(_raw_packages[zip_reader], *args)
+            return _loaded_reduces[reduce_id]
+
+        return None
+
+    importer: Importer
+    if zip_reader is not None:
+        importer = OrderedImporter(_get_package(zip_reader), sys_importer)
+    else:
+        importer = sys_importer
+
+    unpickler = PackageUnpickler(importer, io.BytesIO(obj_bytes))
+    unpickler.persistent_load = persistent_load  # type: ignore[assignment]
+    result = _deploy_objects[id] = unpickler.load()
+    return result
+
+
+def _get_package(zip_reader):
+    if zip_reader not in _raw_packages:
+        _raw_packages[zip_reader] = PackageImporter(zip_reader)
+    return _raw_packages[zip_reader]
+
+
+_raw_packages: dict = {}
+_deploy_objects: dict = {}
+_serialized_reduces: dict = {}
+_loaded_reduces: dict = {}