1
1
import random
2
- from typing import Any , Dict , Iterator , BinaryIO , Optional , Tuple
2
+ from typing import Any , Dict , Iterator , Optional , Tuple
3
3
4
4
import av
5
5
import numpy as np
@@ -15,7 +15,7 @@ def __init__(self, datapipe: IterDataPipe, *, inline: bool = True) -> None:
15
15
self .datapipe = datapipe
16
16
self ._inline = inline
17
17
18
- def _decode (self , buffer : BinaryIO , meta : Dict [str , Any ]) -> Iterator [Dict [str , Any ]]:
18
+ def _decode (self , buffer : ReadOnlyTensorBuffer , meta : Dict [str , Any ]) -> Iterator [Dict [str , Any ]]:
19
19
raise NotImplementedError
20
20
21
21
def _find_encoded_video (self , id : Tuple [Any , ...], obj : Any ) -> Optional [Tuple [Any , ...]]:
@@ -65,13 +65,12 @@ def __iter__(self) -> Iterator[Any]:
65
65
raise ValueError ("more than one encoded video" )
66
66
id , video = ids_and_videos [0 ]
67
67
68
- buffer = ReadOnlyTensorBuffer (video )
69
- for data in self ._decode (buffer , video .meta .copy ()):
68
+ for data in self ._decode (ReadOnlyTensorBuffer (video ), video .meta .copy ()):
70
69
yield self ._integrate_data (sample , id , data )
71
70
72
71
73
72
class KeyframeDecoder (_VideoDecoder ):
74
- def _decode (self , buffer : BinaryIO , meta : Dict [str , Any ]) -> Iterator [Dict [str , Any ]]:
73
+ def _decode (self , buffer : ReadOnlyTensorBuffer , meta : Dict [str , Any ]) -> Iterator [Dict [str , Any ]]:
75
74
with av .open (buffer , metadata_errors = "ignore" ) as container :
76
75
stream = container .streams .video [0 ]
77
76
stream .codec_context .skip_frame = "NONKEY"
@@ -92,7 +91,7 @@ def __init__(self, datapipe: IterDataPipe, *, num_samples: int = 1, inline: bool
92
91
super ().__init__ (datapipe , inline = inline )
93
92
self .num_sampler = num_samples
94
93
95
- def _decode (self , buffer : BinaryIO , meta : Dict [str , Any ]) -> Iterator [Dict [str , Any ]]:
94
+ def _decode (self , buffer : ReadOnlyTensorBuffer , meta : Dict [str , Any ]) -> Iterator [Dict [str , Any ]]:
96
95
with av .open (buffer , metadata_errors = "ignore" ) as container :
97
96
stream = container .streams .video [0 ]
98
97
# duration is given in time_base units as int
@@ -147,7 +146,7 @@ def _unfold(self, tensor: torch.Tensor, dilation: int = 1) -> torch.Tensor:
147
146
new_size = (0 , self .num_frames_per_clip )
148
147
return torch .as_strided (tensor , new_size , new_stride )
149
148
150
- def _decode (self , buffer : BinaryIO , meta : Dict [str , Any ]) -> Iterator [Dict [str , Any ]]:
149
+ def _decode (self , buffer : ReadOnlyTensorBuffer , meta : Dict [str , Any ]) -> Iterator [Dict [str , Any ]]:
151
150
with av .open (buffer , metadata_errors = "ignore" ) as container :
152
151
stream = container .streams .video [0 ]
153
152
time_base = stream .time_base
0 commit comments