Skip to content

Commit 390e17c

Browse files
authored
[Storage]fix retry on large block upload (#17909)
1 parent 68095e0 commit 390e17c

File tree

5 files changed

+55
-0
lines changed

5 files changed

+55
-0
lines changed

sdk/storage/azure-storage-blob/azure/storage/blob/_shared/uploads.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,13 @@ def read(self, size=None):
477477
raise IOError("Stream failed to seek to the desired location.")
478478
buffer_from_stream = self._wrapped_stream.read(current_max_buffer_size)
479479
else:
480+
absolute_position = self._stream_begin_index + self._position
481+
# It's possible that there's connection problem during data transfer,
482+
# so when we retry we don't want to read from current position of wrapped stream,
483+
# instead we should seek to where we want to read from.
484+
if self._wrapped_stream.tell() != absolute_position:
485+
self._wrapped_stream.seek(absolute_position, SEEK_SET)
486+
480487
buffer_from_stream = self._wrapped_stream.read(current_max_buffer_size)
481488

482489
if buffer_from_stream:

sdk/storage/azure-storage-blob/tests/test_largest_block_blob.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
from _shared.testcase import StorageTestCase, GlobalStorageAccountPreparer
2323

2424
# ------------------------------------------------------------------------------
25+
from azure.storage.blob._shared.uploads import SubStream
26+
2527
TEST_BLOB_PREFIX = 'largestblob'
2628
LARGEST_BLOCK_SIZE = 4000 * 1024 * 1024
2729
LARGEST_SINGLE_UPLOAD_SIZE = 5000 * 1024 * 1024
@@ -207,6 +209,31 @@ def test_create_largest_blob_from_path(self, resource_group, location, storage_a
207209
# Assert
208210
self._teardown(FILE_PATH)
209211

212+
def test_substream_for_single_thread_upload_large_block(self):
213+
FILE_PATH = 'largest_blob_from_path.temp.{}.dat'.format(str(uuid.uuid4()))
214+
with open(FILE_PATH, 'wb') as stream:
215+
largeStream = LargeStream(LARGEST_BLOCK_SIZE, 100 * 1024 * 1024)
216+
chunk = largeStream.read()
217+
while chunk:
218+
stream.write(chunk)
219+
chunk = largeStream.read()
220+
221+
with open(FILE_PATH, 'rb') as stream:
222+
substream = SubStream(stream, 0, 2 * 1024 * 1024, None)
223+
# this is to mimic stage large block: SubStream.read() is getting called by http client
224+
data1 = substream.read(2 * 1024 * 1024)
225+
substream.read(2 * 1024 * 1024)
226+
substream.read(2 * 1024 * 1024)
227+
228+
# this is to mimic rewinding request body after connection error
229+
substream.seek(0)
230+
231+
# this is to mimic retry: stage that large block from beginning
232+
data2 = substream.read(2 * 1024 * 1024)
233+
234+
self.assertEqual(data1, data2)
235+
self._teardown(FILE_PATH)
236+
210237
@pytest.mark.live_test_only
211238
@GlobalStorageAccountPreparer()
212239
def test_create_largest_blob_from_path_without_network(self, resource_group, location, storage_account, storage_account_key):

sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_shared/uploads.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,13 @@ def read(self, size=None):
477477
raise IOError("Stream failed to seek to the desired location.")
478478
buffer_from_stream = self._wrapped_stream.read(current_max_buffer_size)
479479
else:
480+
absolute_position = self._stream_begin_index + self._position
481+
# It's possible that there's connection problem during data transfer,
482+
# so when we retry we don't want to read from current position of wrapped stream,
483+
# instead we should seek to where we want to read from.
484+
if self._wrapped_stream.tell() != absolute_position:
485+
self._wrapped_stream.seek(absolute_position, SEEK_SET)
486+
480487
buffer_from_stream = self._wrapped_stream.read(current_max_buffer_size)
481488

482489
if buffer_from_stream:

sdk/storage/azure-storage-file-share/azure/storage/fileshare/_shared/uploads.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,13 @@ def read(self, size=None):
477477
raise IOError("Stream failed to seek to the desired location.")
478478
buffer_from_stream = self._wrapped_stream.read(current_max_buffer_size)
479479
else:
480+
absolute_position = self._stream_begin_index + self._position
481+
# It's possible that there's connection problem during data transfer,
482+
# so when we retry we don't want to read from current position of wrapped stream,
483+
# instead we should seek to where we want to read from.
484+
if self._wrapped_stream.tell() != absolute_position:
485+
self._wrapped_stream.seek(absolute_position, SEEK_SET)
486+
480487
buffer_from_stream = self._wrapped_stream.read(current_max_buffer_size)
481488

482489
if buffer_from_stream:

sdk/storage/azure-storage-queue/azure/storage/queue/_shared/uploads.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,13 @@ def read(self, size=None):
477477
raise IOError("Stream failed to seek to the desired location.")
478478
buffer_from_stream = self._wrapped_stream.read(current_max_buffer_size)
479479
else:
480+
absolute_position = self._stream_begin_index + self._position
481+
# It's possible that there's connection problem during data transfer,
482+
# so when we retry we don't want to read from current position of wrapped stream,
483+
# instead we should seek to where we want to read from.
484+
if self._wrapped_stream.tell() != absolute_position:
485+
self._wrapped_stream.seek(absolute_position, SEEK_SET)
486+
480487
buffer_from_stream = self._wrapped_stream.read(current_max_buffer_size)
481488

482489
if buffer_from_stream:

0 commit comments

Comments
 (0)