diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py index 3db10183f68..c4b91b4a14b 100644 --- a/torchvision/prototype/datasets/utils/_internal.py +++ b/torchvision/prototype/datasets/utils/_internal.py @@ -8,6 +8,7 @@ import os.path import pathlib import pickle +import platform from typing import BinaryIO from typing import ( Sequence, @@ -260,6 +261,11 @@ def _make_sharded_datapipe(root: str, dataset_size: int) -> IterDataPipe: return dp +def _read_mutable_buffer_fallback(file: BinaryIO, count: int, item_size: int) -> bytearray: + # A plain file.read() will give a read-only bytes, so we convert it to bytearray to make it mutable + return bytearray(file.read(-1 if count == -1 else count * item_size)) + + def fromfile( file: BinaryIO, *, @@ -293,20 +299,24 @@ def fromfile( item_size = (torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits // 8 np_dtype = byte_order + char + str(item_size) - # PyTorch does not support tensors with underlying read-only memory. In case - # - the file has a .fileno(), - # - the file was opened for updating, i.e. 'r+b' or 'w+b', - # - the file is seekable - # we can avoid copying the data for performance. Otherwise we fall back to simply .read() the data and copy it to - # a mutable location afterwards. buffer: Union[memoryview, bytearray] - try: - buffer = memoryview(mmap.mmap(file.fileno(), 0))[file.tell() :] - # Reading from the memoryview does not advance the file cursor, so we have to do it manually. - file.seek(*(0, io.SEEK_END) if count == -1 else (count * item_size, io.SEEK_CUR)) - except (PermissionError, io.UnsupportedOperation): - # A plain file.read() will give a read-only bytes, so we convert it to bytearray to make it mutable - buffer = bytearray(file.read(-1 if count == -1 else count * item_size)) + if platform.system() != "Windows": + # PyTorch does not support tensors with underlying read-only memory. In case + # - the file has a .fileno(), + # - the file was opened for updating, i.e. 'r+b' or 'w+b', + # - the file is seekable + # we can avoid copying the data for performance. Otherwise we fall back to simply .read() the data and copy it + # to a mutable location afterwards. + try: + buffer = memoryview(mmap.mmap(file.fileno(), 0))[file.tell() :] + # Reading from the memoryview does not advance the file cursor, so we have to do it manually. + file.seek(*(0, io.SEEK_END) if count == -1 else (count * item_size, io.SEEK_CUR)) + except (PermissionError, io.UnsupportedOperation): + buffer = _read_mutable_buffer_fallback(file, count, item_size) + else: + # On Windows just trying to call mmap.mmap() on a file that does not support it, may corrupt the internal state + # so no data can be read afterwards. Thus, we simply ignore the possible speed-up. + buffer = _read_mutable_buffer_fallback(file, count, item_size) # We cannot use torch.frombuffer() directly, since it only supports the native byte order of the system. Thus, we # read the data with np.frombuffer() with the correct byte order and convert it to the native one with the