@@ -255,13 +255,16 @@ class MiniCPMVImageEmbeddingItems(DictEmbeddingItems):
255
255
def __init__ (
256
256
self ,
257
257
data : Mapping [str , torch .Tensor ],
258
- fields_config : Mapping [str , MultiModalFieldConfig ],
258
+ fields_factory : Callable [
259
+ [Mapping [str , torch .Tensor ]],
260
+ Mapping [str , MultiModalFieldConfig ],
261
+ ],
259
262
) -> None :
260
263
super ().__init__ (
261
264
data ,
262
265
modality = "image" ,
263
- fields_config = fields_config ,
264
266
required_fields = {"image_embeds" , "image_sizes" },
267
+ fields_factory = fields_factory ,
265
268
)
266
269
267
270
def get_image_size (self , index : int ) -> ImageSize :
@@ -274,13 +277,16 @@ class MiniCPMVVideoEmbeddingItems(DictEmbeddingItems):
274
277
def __init__ (
275
278
self ,
276
279
data : Mapping [str , torch .Tensor ],
277
- fields_config : Mapping [str , MultiModalFieldConfig ],
280
+ fields_factory : Callable [
281
+ [Mapping [str , torch .Tensor ]],
282
+ Mapping [str , MultiModalFieldConfig ],
283
+ ],
278
284
) -> None :
279
285
super ().__init__ (
280
286
data ,
281
287
modality = "video" ,
282
- fields_config = fields_config ,
283
288
required_fields = {"video_embeds" , "video_image_sizes" },
289
+ fields_factory = fields_factory ,
284
290
)
285
291
286
292
def get_frame_size (self , index : int ) -> ImageSize :
@@ -300,7 +306,7 @@ def _parse_image_data(
300
306
if isinstance (data , dict ):
301
307
return MiniCPMVImageEmbeddingItems (
302
308
data ,
303
- fields_config = _minicpmv_field_config ( data ) ,
309
+ fields_factory = _minicpmv_field_config ,
304
310
)
305
311
306
312
return super ()._parse_image_data (data )
@@ -312,7 +318,7 @@ def _parse_video_data(
312
318
if isinstance (data , dict ):
313
319
return MiniCPMVVideoEmbeddingItems (
314
320
data ,
315
- fields_config = _minicpmv_field_config ( data ) ,
321
+ fields_factory = _minicpmv_field_config ,
316
322
)
317
323
318
324
return super ()._parse_video_data (data )
0 commit comments