From 46676705833695b724db3628ac6d75104385ed8f Mon Sep 17 00:00:00 2001 From: Kevin Tse Date: Fri, 25 Mar 2022 11:30:38 -0400 Subject: [PATCH] Updating TorchData DataPipe API usages --- torchtext/datasets/amazonreviewfull.py | 2 +- torchtext/datasets/amazonreviewpolarity.py | 2 +- torchtext/datasets/cc100.py | 2 +- torchtext/datasets/dbpedia.py | 2 +- torchtext/datasets/enwik9.py | 2 +- torchtext/datasets/imdb.py | 2 +- torchtext/datasets/iwslt2016.py | 4 ++-- torchtext/datasets/iwslt2017.py | 4 ++-- torchtext/datasets/multi30k.py | 4 ++-- torchtext/datasets/sogounews.py | 2 +- torchtext/datasets/sst2.py | 2 +- torchtext/datasets/udpos.py | 2 +- torchtext/datasets/wikitext103.py | 2 +- torchtext/datasets/wikitext2.py | 2 +- torchtext/datasets/yahooanswers.py | 2 +- torchtext/datasets/yelpreviewfull.py | 2 +- torchtext/datasets/yelpreviewpolarity.py | 2 +- 17 files changed, 20 insertions(+), 20 deletions(-) diff --git a/torchtext/datasets/amazonreviewfull.py b/torchtext/datasets/amazonreviewfull.py index 6313d60817..d490ba7463 100644 --- a/torchtext/datasets/amazonreviewfull.py +++ b/torchtext/datasets/amazonreviewfull.py @@ -70,7 +70,7 @@ def AmazonReviewFull(root: str, split: Union[Tuple[str], str]): filepath_fn=lambda x: os.path.join(root, _EXTRACTED_FILES[split]) ) cache_decompressed_dp = ( - FileOpener(cache_decompressed_dp, mode="b").read_from_tar().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) + FileOpener(cache_decompressed_dp, mode="b").load_from_tar().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) ) cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True) diff --git a/torchtext/datasets/amazonreviewpolarity.py b/torchtext/datasets/amazonreviewpolarity.py index 0a2c6f28fa..24b7278743 100644 --- a/torchtext/datasets/amazonreviewpolarity.py +++ b/torchtext/datasets/amazonreviewpolarity.py @@ -67,7 +67,7 @@ def AmazonReviewPolarity(root: str, split: Union[Tuple[str], str]): filepath_fn=lambda x: os.path.join(root, _EXTRACTED_FILES[split]) ) cache_decompressed_dp = ( - FileOpener(cache_decompressed_dp, mode="b").read_from_tar().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) + FileOpener(cache_decompressed_dp, mode="b").load_from_tar().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) ) cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True) diff --git a/torchtext/datasets/cc100.py b/torchtext/datasets/cc100.py index 5477fe4ae2..dc4d3af338 100644 --- a/torchtext/datasets/cc100.py +++ b/torchtext/datasets/cc100.py @@ -161,7 +161,7 @@ def CC100(root: str, language_code: str = "en"): cache_decompressed_dp = cache_compressed_dp.on_disk_cache( filepath_fn=lambda x: os.path.join(root, os.path.basename(x).rstrip(".xz")) ) - cache_decompressed_dp = FileOpener(cache_decompressed_dp, mode="b").read_from_xz() + cache_decompressed_dp = FileOpener(cache_decompressed_dp, mode="b").load_from_xz() cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb") data_dp = FileOpener(cache_decompressed_dp, encoding="utf-8").readlines(return_path=False) diff --git a/torchtext/datasets/dbpedia.py b/torchtext/datasets/dbpedia.py index 88cf31033b..6ea4b64953 100644 --- a/torchtext/datasets/dbpedia.py +++ b/torchtext/datasets/dbpedia.py @@ -66,7 +66,7 @@ def DBpedia(root: str, split: Union[Tuple[str], str]): filepath_fn=lambda x: os.path.join(root, _EXTRACTED_FILES[split]) ) cache_decompressed_dp = ( - FileOpener(cache_decompressed_dp, mode="b").read_from_tar().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) + FileOpener(cache_decompressed_dp, mode="b").load_from_tar().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) ) cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True) diff --git a/torchtext/datasets/enwik9.py b/torchtext/datasets/enwik9.py index f5a5de8ed9..0940b77760 100644 --- a/torchtext/datasets/enwik9.py +++ b/torchtext/datasets/enwik9.py @@ -48,7 +48,7 @@ def EnWik9(root: str): cache_decompressed_dp = cache_compressed_dp.on_disk_cache( filepath_fn=lambda x: os.path.join(root, os.path.splitext(_PATH)[0]) ) - cache_decompressed_dp = FileOpener(cache_decompressed_dp, mode="b").read_from_zip() + cache_decompressed_dp = FileOpener(cache_decompressed_dp, mode="b").load_from_zip() cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True) data_dp = FileOpener(cache_decompressed_dp, encoding="utf-8") diff --git a/torchtext/datasets/imdb.py b/torchtext/datasets/imdb.py index 045b0e5608..80de7134ed 100644 --- a/torchtext/datasets/imdb.py +++ b/torchtext/datasets/imdb.py @@ -62,7 +62,7 @@ def IMDB(root: str, split: Union[Tuple[str], str]): filepath_fn=lambda x: [os.path.join(root, decompressed_folder, split, label) for label in labels] ) cache_decompressed_dp = FileOpener(cache_decompressed_dp, mode="b") - cache_decompressed_dp = cache_decompressed_dp.read_from_tar() + cache_decompressed_dp = cache_decompressed_dp.load_from_tar() def filter_imdb_data(key, fname): # eg. fname = "aclImdb/train/neg/12416_3.txt" diff --git a/torchtext/datasets/iwslt2016.py b/torchtext/datasets/iwslt2016.py index f6b4eb51ff..a79c189613 100644 --- a/torchtext/datasets/iwslt2016.py +++ b/torchtext/datasets/iwslt2016.py @@ -125,7 +125,7 @@ # avoid additional conditional imports. def _filter_clean_cache(cache_decompressed_dp, full_filepath, uncleaned_filename): cache_inner_decompressed_dp = cache_decompressed_dp.on_disk_cache(filepath_fn=lambda x: full_filepath) - cache_inner_decompressed_dp = FileOpener(cache_inner_decompressed_dp, mode="b").read_from_tar() + cache_inner_decompressed_dp = FileOpener(cache_inner_decompressed_dp, mode="b").load_from_tar() cache_inner_decompressed_dp = cache_inner_decompressed_dp.filter( lambda x: os.path.basename(uncleaned_filename) in x[0] ) @@ -263,7 +263,7 @@ def IWSLT2016( cache_decompressed_dp = cache_compressed_dp.on_disk_cache(filepath_fn=lambda x: inner_iwslt_tar) cache_decompressed_dp = ( FileOpener(cache_decompressed_dp, mode="b") - .read_from_tar() + .load_from_tar() .filter(lambda x: os.path.basename(inner_iwslt_tar) in x[0]) ) cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True) diff --git a/torchtext/datasets/iwslt2017.py b/torchtext/datasets/iwslt2017.py index 04aad5df24..e97ce9fbf5 100644 --- a/torchtext/datasets/iwslt2017.py +++ b/torchtext/datasets/iwslt2017.py @@ -104,7 +104,7 @@ # avoid additional conditional imports. def _filter_clean_cache(cache_decompressed_dp, full_filepath, uncleaned_filename): cache_inner_decompressed_dp = cache_decompressed_dp.on_disk_cache(filepath_fn=lambda x: full_filepath) - cache_inner_decompressed_dp = FileOpener(cache_inner_decompressed_dp, mode="b").read_from_tar() + cache_inner_decompressed_dp = FileOpener(cache_inner_decompressed_dp, mode="b").load_from_tar() cache_inner_decompressed_dp = cache_inner_decompressed_dp.filter( lambda x: os.path.basename(uncleaned_filename) in x[0] ) @@ -208,7 +208,7 @@ def IWSLT2017(root=".data", split=("train", "valid", "test"), language_pair=("de ) cache_decompressed_dp = cache_compressed_dp.on_disk_cache(filepath_fn=lambda x: inner_iwslt_tar) - cache_decompressed_dp = FileOpener(cache_decompressed_dp, mode="b").read_from_tar() + cache_decompressed_dp = FileOpener(cache_decompressed_dp, mode="b").load_from_tar() cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True) src_filename = file_path_by_lang_and_split[src_language][split] diff --git a/torchtext/datasets/multi30k.py b/torchtext/datasets/multi30k.py index 15f61203ab..8382fdc57c 100644 --- a/torchtext/datasets/multi30k.py +++ b/torchtext/datasets/multi30k.py @@ -84,7 +84,7 @@ def Multi30k(root: str, split: Union[Tuple[str], str], language_pair: Tuple[str] ) src_cache_decompressed_dp = ( FileOpener(src_cache_decompressed_dp, mode="b") - .read_from_tar() + .load_from_tar() .filter(lambda x: f"{_PREFIX[split]}.{language_pair[0]}" in x[0]) ) src_cache_decompressed_dp = src_cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True) @@ -94,7 +94,7 @@ def Multi30k(root: str, split: Union[Tuple[str], str], language_pair: Tuple[str] ) tgt_cache_decompressed_dp = ( FileOpener(tgt_cache_decompressed_dp, mode="b") - .read_from_tar() + .load_from_tar() .filter(lambda x: f"{_PREFIX[split]}.{language_pair[1]}" in x[0]) ) tgt_cache_decompressed_dp = tgt_cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True) diff --git a/torchtext/datasets/sogounews.py b/torchtext/datasets/sogounews.py index c673519f06..8f023971ec 100644 --- a/torchtext/datasets/sogounews.py +++ b/torchtext/datasets/sogounews.py @@ -70,7 +70,7 @@ def SogouNews(root: str, split: Union[Tuple[str], str]): filepath_fn=lambda x: os.path.join(root, _EXTRACTED_FILES[split]) ) cache_decompressed_dp = ( - FileOpener(cache_decompressed_dp, mode="b").read_from_tar().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) + FileOpener(cache_decompressed_dp, mode="b").load_from_tar().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) ) cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True) diff --git a/torchtext/datasets/sst2.py b/torchtext/datasets/sst2.py index 1f072cddf9..23409f0a20 100644 --- a/torchtext/datasets/sst2.py +++ b/torchtext/datasets/sst2.py @@ -73,7 +73,7 @@ def SST2(root, split): filepath_fn=lambda x: os.path.join(root, _EXTRACTED_FILES[split]) ) cache_decompressed_dp = ( - FileOpener(cache_decompressed_dp, mode="b").read_from_zip().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) + FileOpener(cache_decompressed_dp, mode="b").load_from_zip().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) ) cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True) diff --git a/torchtext/datasets/udpos.py b/torchtext/datasets/udpos.py index 8410b76886..b9f0328690 100644 --- a/torchtext/datasets/udpos.py +++ b/torchtext/datasets/udpos.py @@ -61,7 +61,7 @@ def UDPOS(root: str, split: Union[Tuple[str], str]): filepath_fn=lambda x: os.path.join(root, _EXTRACTED_FILES[split]) ) cache_decompressed_dp = ( - FileOpener(cache_decompressed_dp, mode="b").read_from_zip().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) + FileOpener(cache_decompressed_dp, mode="b").load_from_zip().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) ) cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True) diff --git a/torchtext/datasets/wikitext103.py b/torchtext/datasets/wikitext103.py index 2e6d037bb7..6f437fa7c6 100644 --- a/torchtext/datasets/wikitext103.py +++ b/torchtext/datasets/wikitext103.py @@ -66,7 +66,7 @@ def WikiText103(root: str, split: Union[Tuple[str], str]): ) # Extract zip and filter the appropriate split file cache_decompressed_dp = ( - FileOpener(cache_decompressed_dp, mode="b").read_from_zip().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) + FileOpener(cache_decompressed_dp, mode="b").load_from_zip().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) ) cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True) data_dp = FileOpener(cache_decompressed_dp, encoding="utf-8") diff --git a/torchtext/datasets/wikitext2.py b/torchtext/datasets/wikitext2.py index c8663fcde2..edbd8faac2 100644 --- a/torchtext/datasets/wikitext2.py +++ b/torchtext/datasets/wikitext2.py @@ -66,7 +66,7 @@ def WikiText2(root: str, split: Union[Tuple[str], str]): ) # Extract zip and filter the appropriate split file cache_decompressed_dp = ( - FileOpener(cache_decompressed_dp, mode="b").read_from_zip().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) + FileOpener(cache_decompressed_dp, mode="b").load_from_zip().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) ) cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True) data_dp = FileOpener(cache_decompressed_dp, encoding="utf-8") diff --git a/torchtext/datasets/yahooanswers.py b/torchtext/datasets/yahooanswers.py index 9553c11bdf..dae6a86f2f 100644 --- a/torchtext/datasets/yahooanswers.py +++ b/torchtext/datasets/yahooanswers.py @@ -67,7 +67,7 @@ def YahooAnswers(root: str, split: Union[Tuple[str], str]): filepath_fn=lambda x: os.path.join(root, _EXTRACTED_FILES[split]) ) cache_decompressed_dp = FileOpener(cache_decompressed_dp, mode="b") - cache_decompressed_dp = cache_decompressed_dp.read_from_tar() + cache_decompressed_dp = cache_decompressed_dp.load_from_tar() cache_decompressed_dp = cache_decompressed_dp.filter(lambda x: _EXTRACTED_FILES[split] in x[0]) cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True) diff --git a/torchtext/datasets/yelpreviewfull.py b/torchtext/datasets/yelpreviewfull.py index 1056dcaf05..54706e6222 100644 --- a/torchtext/datasets/yelpreviewfull.py +++ b/torchtext/datasets/yelpreviewfull.py @@ -67,7 +67,7 @@ def YelpReviewFull(root: str, split: Union[Tuple[str], str]): filepath_fn=lambda x: os.path.join(root, _EXTRACTED_FILES[split]) ) cache_decompressed_dp = FileOpener(cache_decompressed_dp, mode="b") - cache_decompressed_dp = cache_decompressed_dp.read_from_tar().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) + cache_decompressed_dp = cache_decompressed_dp.load_from_tar().filter(lambda x: _EXTRACTED_FILES[split] in x[0]) cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True) data_dp = FileOpener(cache_decompressed_dp, encoding="utf-8") diff --git a/torchtext/datasets/yelpreviewpolarity.py b/torchtext/datasets/yelpreviewpolarity.py index 7c67374fd5..9efc1084b2 100644 --- a/torchtext/datasets/yelpreviewpolarity.py +++ b/torchtext/datasets/yelpreviewpolarity.py @@ -67,7 +67,7 @@ def YelpReviewPolarity(root: str, split: Union[Tuple[str], str]): ) cache_decompressed_dp = FileOpener(cache_decompressed_dp, mode="b") - cache_decompressed_dp = cache_decompressed_dp.read_from_tar() + cache_decompressed_dp = cache_decompressed_dp.load_from_tar() cache_decompressed_dp = cache_decompressed_dp.filter(lambda x: _EXTRACTED_FILES[split] in x[0]) cache_decompressed_dp = cache_decompressed_dp.end_caching(mode="wb", same_filepath_fn=True)