Skip to content

Refactor conda backup script #1350

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 13, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions s3_management/backup_conda.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
#!/usr/bin/env python3
# Downloads domain library packages from channel
# Downloads domain pytorch and library packages from channel
# And backs them up to S3
# Do not use unless you know what you are doing
# Usage: python backup_conda.py --version 1.6.0

import conda.api
import boto3
from typing import List, Optional
import urllib
import os
import hashlib
import argparse

S3 = boto3.resource('s3')
BUCKET = S3.Bucket('pytorch-backup')
Expand All @@ -23,11 +25,13 @@ def compute_md5(path:str) -> str:
def download_conda_package(package:str, version:Optional[str] = None, depends:Optional[str] = None, channel:Optional[str] = None) -> List[str]:
packages = conda.api.SubdirData.query_all(package, channels = [channel] if channel is not None else None, subdirs = _known_subdirs)
rc = []

for pkg in packages:
if version is not None and pkg.version != version:
continue
if depends is not None and depends not in pkg.depends:
continue

print(f"Downloading {pkg.url}...")
os.makedirs(pkg.subdir, exist_ok = True)
fname = f"{pkg.subdir}/{pkg.fn}"
Expand All @@ -50,6 +54,18 @@ def upload_to_s3(prefix: str, fnames: List[str]) -> None:


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--version",
help="PyTorch Version to backup",
type=str,
required = True
)
options = parser.parse_args()
rc = download_conda_package("pytorch", channel = "pytorch", version = options.version)
upload_to_s3(f"v{options.version}/conda", rc)

for libname in ["torchvision", "torchaudio", "torchtext"]:
rc = download_conda_package(libname, channel = "pytorch", depends = "pytorch 1.9.0")
upload_to_s3("v1.9.0-rc4/conda", rc)
print(f"processing {libname}")
rc = download_conda_package(libname, channel = "pytorch", depends = f"pytorch {options.version}")
upload_to_s3f(f"v{options.version}/conda", rc)