From cf98de96b6bd8dbcc9545c77a6d72f451f232f82 Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Thu, 14 Aug 2025 11:07:11 +0800 Subject: [PATCH 01/16] add qwen-2.5-vl processor --- fastdeploy/input/ernie_vl_processor.py | 27 +- fastdeploy/input/mm_processor/__init__.py | 3 +- .../image_preprocessor_adaptive.py | 36 +- fastdeploy/input/mm_processor/process.py | 313 +++------ .../input/mm_processor/process_video.py | 167 +---- fastdeploy/input2/__init__.py | 15 + fastdeploy/input2/ernie_processor.py | 425 +++++++++++++ fastdeploy/input2/ernie_tokenizer.py | 394 ++++++++++++ fastdeploy/input2/ernie_vl_processor.py | 285 +++++++++ fastdeploy/input2/mm_processor/__init__.py | 23 + .../image_preprocessor/__init__.py | 20 + .../get_image_preprocessor.py | 34 + .../image_preprocessor_adaptive.py | 587 +++++++++++++++++ fastdeploy/input2/mm_processor/process.py | 512 +++++++++++++++ .../input2/mm_processor/process_video.py | 205 ++++++ .../mm_processor/tokenizer/__init__.py | 0 .../mm_processor/tokenizer/tokenizer_vl.py | 0 .../mm_processor/utils/Roboto-Regular.ttf | Bin 0 -> 146004 bytes .../input2/mm_processor/utils/__init__.py | 15 + .../input2/mm_processor/utils/io_utils.py | 264 ++++++++ .../mm_processor/utils/render_timestamp.py | 103 +++ .../input2/mm_processor/utils/video_utils.py | 83 +++ fastdeploy/input2/preprocess.py | 101 +++ fastdeploy/input2/text_processor.py | 602 ++++++++++++++++++ 24 files changed, 3805 insertions(+), 409 deletions(-) create mode 100644 fastdeploy/input2/__init__.py create mode 100644 fastdeploy/input2/ernie_processor.py create mode 100644 fastdeploy/input2/ernie_tokenizer.py create mode 100644 fastdeploy/input2/ernie_vl_processor.py create mode 100644 fastdeploy/input2/mm_processor/__init__.py create mode 100644 fastdeploy/input2/mm_processor/image_preprocessor/__init__.py create mode 100644 fastdeploy/input2/mm_processor/image_preprocessor/get_image_preprocessor.py create mode 100644 fastdeploy/input2/mm_processor/image_preprocessor/image_preprocessor_adaptive.py create mode 100644 fastdeploy/input2/mm_processor/process.py create mode 100644 fastdeploy/input2/mm_processor/process_video.py rename fastdeploy/{input => input2}/mm_processor/tokenizer/__init__.py (100%) rename fastdeploy/{input => input2}/mm_processor/tokenizer/tokenizer_vl.py (100%) create mode 100644 fastdeploy/input2/mm_processor/utils/Roboto-Regular.ttf create mode 100644 fastdeploy/input2/mm_processor/utils/__init__.py create mode 100644 fastdeploy/input2/mm_processor/utils/io_utils.py create mode 100644 fastdeploy/input2/mm_processor/utils/render_timestamp.py create mode 100644 fastdeploy/input2/mm_processor/utils/video_utils.py create mode 100644 fastdeploy/input2/preprocess.py create mode 100644 fastdeploy/input2/text_processor.py diff --git a/fastdeploy/input/ernie_vl_processor.py b/fastdeploy/input/ernie_vl_processor.py index 63ae5bc310..d7ce66e932 100644 --- a/fastdeploy/input/ernie_vl_processor.py +++ b/fastdeploy/input/ernie_vl_processor.py @@ -43,8 +43,9 @@ def __init__( image_preprocessor_name=preprocessor_path, **processor_kwargs, ) - self.ernie_processor.eval() - self.image_patch_id = self.ernie_processor.image_patch_id + + # self.ernie_processor.eval() + self.image_patch_id = self.ernie_processor.image_token_id self.spatial_conv_size = self.ernie_processor.spatial_conv_size self.decode_status = dict() @@ -249,14 +250,20 @@ def append_generated_tokens(self, multimodal_inputs, generated_token_ids): def pack_outputs(self, outs): # Stack or nullify image-related fields - if not outs["images"]: - outs["images"] = None - outs["grid_thw"] = None - outs["image_type_ids"] = None - else: - outs["images"] = np.vstack(outs["images"]) - outs["grid_thw"] = np.vstack(outs["grid_thw"]) - outs["image_type_ids"] = np.array(outs["image_type_ids"]) + # if not outs["images"]: + # outs["images"] = None + # outs["grid_thw"] = None + # outs["image_type_ids"] = None + # else: + # outs["images"] = np.vstack(outs["images"]) + # outs["grid_thw"] = np.vstack(outs["grid_thw"]) + # outs["image_type_ids"] = np.array(outs["image_type_ids"]) + + outs["images"] = np.array(outs["images"]) + outs["grid_thw"] = np.array(outs["grid_thw"]) + # outs["pixel_values_videos"] = np.array(outs["pixel_values_videos"]) + # outs["video_grid_thw"] = np.array(outs["video_grid_thw"]) + outs["image_type_ids"] = np.array(outs["image_type_ids"]) outs["image_patch_id"] = self.image_patch_id # Convert lists to arrays diff --git a/fastdeploy/input/mm_processor/__init__.py b/fastdeploy/input/mm_processor/__init__.py index ba59bc1654..5a97e41863 100644 --- a/fastdeploy/input/mm_processor/__init__.py +++ b/fastdeploy/input/mm_processor/__init__.py @@ -14,10 +14,9 @@ # limitations under the License. """ -from .process import IDS_TYPE_FLAG, DataProcessor, fancy_print +from .process import IDS_TYPE_FLAG, DataProcessor __all__ = [ "DataProcessor", - "fancy_print", "IDS_TYPE_FLAG", ] diff --git a/fastdeploy/input/mm_processor/image_preprocessor/image_preprocessor_adaptive.py b/fastdeploy/input/mm_processor/image_preprocessor/image_preprocessor_adaptive.py index 15b15a4d22..c86d8046e3 100644 --- a/fastdeploy/input/mm_processor/image_preprocessor/image_preprocessor_adaptive.py +++ b/fastdeploy/input/mm_processor/image_preprocessor/image_preprocessor_adaptive.py @@ -153,7 +153,7 @@ class AdaptiveImageProcessor(BaseImageProcessor): The max pixels of the image to resize the image. patch_size (`int`, *optional*, defaults to 14): The spacial patch size of the vision encoder. - temporal_conv_size (`int`, *optional*, defaults to 2): + temporal_patch_size (`int`, *optional*, defaults to 2): The temporal conv size in resampler. merge_size (`int`, *optional*, defaults to 2): The merge size of the vision encoder to llm encoder. @@ -179,7 +179,7 @@ def __init__( min_pixels: int = 56 * 56, max_pixels: int = 28 * 28 * 1280, patch_size: int = 14, - temporal_conv_size: int = 2, + temporal_patch_size: int = 2, merge_size: int = 2, **kwargs, ) -> None: @@ -195,7 +195,7 @@ def __init__( self.min_pixels = min_pixels self.max_pixels = max_pixels self.patch_size = patch_size - self.temporal_conv_size = temporal_conv_size + self.temporal_patch_size = temporal_patch_size self.merge_size = merge_size self.size = {"min_pixels": min_pixels, "max_pixels": max_pixels} self.do_convert_rgb = do_convert_rgb @@ -332,10 +332,18 @@ def _preprocess( resample=resample, data_format=input_data_format, ) + + if do_rescale and do_normalize: + image_mean = np.array(image_mean, dtype=np.float32) * (1.0 / rescale_factor) + image_std = np.array(image_std, dtype=np.float32) * (1.0 / rescale_factor) + do_rescale = False + if do_rescale: + image = image.astype("float32") image = rescale(image, scale=rescale_factor, data_format=input_data_format) if do_normalize: + image = image.astype("float32") image = normalize( image=image, mean=image_mean, @@ -344,14 +352,21 @@ def _preprocess( ) image = to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) # [C, H, W] - processed_images.append(image) + patches = np.array(processed_images) + if patches.shape[0] % self.temporal_patch_size != 0: + repeats = np.repeat( + patches[-1][np.newaxis], self.temporal_patch_size - (patches.shape[0] % self.temporal_patch_size), axis=0 + ) + patches = np.concatenate([patches, repeats], axis=0) + if data_format == ChannelDimension.LAST: patches = patches.transpose([0, 3, 1, 2]) + + grid_t, channel = patches.shape[:2] + grid_t = grid_t // self.temporal_patch_size - channel = patches.shape[1] # [time, C, H, W] - grid_t = patches.shape[0] grid_h, grid_w = ( resized_height // self.patch_size, resized_width // self.patch_size, @@ -359,6 +374,7 @@ def _preprocess( patches = patches.reshape( [ grid_t, + self.temporal_patch_size, channel, grid_h // self.merge_size, self.merge_size, @@ -368,15 +384,15 @@ def _preprocess( self.patch_size, ] ) - # [grid_t, grid_h/merge_size, grid_w/merge_size, merge_size, merge_size, C, psz, psz] - patches = patches.transpose([0, 2, 5, 3, 6, 1, 4, 7]) + # [grid_t, temporal_patch_size, grid_h/merge_size, grid_w/merge_size, merge_size, merge_size, C, psz, psz] + patches = patches.transpose([0, 3, 6, 4, 7, 2, 1, 5, 8]) flatten_patches = patches.reshape( [ grid_t * grid_h * grid_w, - channel * self.patch_size * self.patch_size, + channel * self.temporal_patch_size * self.patch_size * self.patch_size, ] - ) # [grid_t * grid_h * grid_w, C * psz * psz] + ) return flatten_patches, (grid_t, grid_h, grid_w) diff --git a/fastdeploy/input/mm_processor/process.py b/fastdeploy/input/mm_processor/process.py index ea2559a0fe..bcdff21313 100644 --- a/fastdeploy/input/mm_processor/process.py +++ b/fastdeploy/input/mm_processor/process.py @@ -16,9 +16,6 @@ """ """ process.py """ -import copy -import os -from collections import defaultdict from typing import Any, Dict, List, Union import numpy as np @@ -26,76 +23,37 @@ from PIL import Image from fastdeploy.entrypoints.chat_utils import parse_chat_messages -from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer from fastdeploy.utils import data_processor_logger +from paddleformers.transformers import AutoTokenizer + from .image_preprocessor.image_preprocessor_adaptive import AdaptiveImageProcessor -from .process_video import read_frames_decord, read_video_decord -from .utils.render_timestamp import render_frame_timestamp +from .process_video import read_video_decord IDS_TYPE_FLAG = {"text": 0, "image": 1, "video": 2, "audio": 3} -def fancy_print(input_ids, tokenizer, image_patch_id=None): - """ - input_ids: input_ids - tokenizer: the tokenizer of models - """ - i = 0 - res = "" - text_ids = [] - real_image_token_len = 0 - while i < len(input_ids): - if input_ids[i] == image_patch_id: - if len(text_ids) > 0: - res += tokenizer.decode(text_ids) - text_ids = [] - - real_image_token_len += 1 - else: - if real_image_token_len != 0: - res += f"<|IMAGE@{real_image_token_len}|>" - real_image_token_len = 0 - - text_ids.append(input_ids[i]) - - i += 1 - if len(text_ids) > 0: - - res += tokenizer.decode(text_ids) - text_ids = [] - return res - - class DataProcessor: """ Processes multimodal chat messages into model-ready inputs, handling text, images, and videos with 3D positional embeddings. """ - CLS_TOKEN = "<|begin_of_sentence|>" - SEP_TOKEN = "<|end_of_sentence|>" - EOS_TOKEN = "" - IMG_START = "<|IMAGE_START|>" - IMG_END = "<|IMAGE_END|>" - VID_START = "<|VIDEO_START|>" - VID_END = "<|VIDEO_END|>" - def __init__( self, tokenizer_name: str, image_preprocessor_name: str, spatial_conv_size: int = 2, temporal_conv_size: int = 2, - image_min_pixels: int = 4 * 28 * 28, - image_max_pixels: int = 6177 * 28 * 28, - video_min_pixels: int = 299 * 28 * 28, - video_max_pixels: int = 1196 * 28 * 28, - video_target_frames: int = -1, - video_frames_sample: str = "leading", - video_max_frames: int = 180, - video_min_frames: int = 16, - video_fps: int = 2, + image_min_pixels: int = 3136, + image_max_pixels: int = 12845056, + video_min_pixels: int = 3136, + video_max_pixels: int = 12845056, + # video_target_frames: int = -1, + # video_frames_sample: str = "leading", + # video_max_frames: int = 180, + # video_min_frames: int = 16, + # video_fps: int = 2, **kwargs, ) -> None: # Tokenizer and image preprocessor @@ -115,28 +73,26 @@ def __init__( self.video_max_pixels = video_max_pixels # Video sampling parameters - self.target_frames = video_target_frames - self.frames_sample = video_frames_sample - self.max_frames = video_max_frames - self.min_frames = video_min_frames - self.fps = video_fps + # self.target_frames = video_target_frames + # self.frames_sample = video_frames_sample + # self.max_frames = video_max_frames + # self.min_frames = video_min_frames + # self.fps = video_fps # Special tokens and IDs - self.cls_token = self.CLS_TOKEN - self.sep_token = self.SEP_TOKEN - self.eos_token = self.EOS_TOKEN - self.image_start = self.IMG_START - self.image_end = self.IMG_END - self.video_start = self.VID_START - self.video_end = self.VID_END - self.image_patch_id = self.tokenizer.convert_tokens_to_ids("<|IMAGE_PLACEHOLDER|>") - self.image_start_id = self.tokenizer.convert_tokens_to_ids(self.image_start) - self.video_start_id = self.tokenizer.convert_tokens_to_ids(self.video_start) - self.sep_token_id = self.tokenizer.convert_tokens_to_ids(self.sep_token) - self.eos_token_id = self.tokenizer.convert_tokens_to_ids(self.eos_token) - - self.token_type_mapping = self._build_token_type_mapping() - self.is_training = True + # self.cls_token = "<|im_start|>" + # self.eos_token = "<|im_end|>" + self.vision_start = "<|vision_start|>" + self.vision_end = "<|vision_end|>" + self.image_token = "<|image_pad|>" + self.video_token = "<|video_pad|>" + + self.image_token_id = self.tokenizer.convert_tokens_to_ids(self.image_token) + self.video_token_id = self.tokenizer.convert_tokens_to_ids(self.video_token) + + self.vision_start_id = self.tokenizer.convert_tokens_to_ids(self.vision_start) + self.vision_start_id = self.tokenizer.convert_tokens_to_ids(self.vision_end) + self.role_prefixes = { "system": "", "user": "User: ", @@ -144,26 +100,6 @@ def __init__( "assistant": "Assistant: ", } - def _build_token_type_mapping(self) -> Dict[Any, int]: - mapping = defaultdict(lambda: IDS_TYPE_FLAG["text"]) - for token in ( - self.IMG_START, - self.IMG_END, - self.VID_START, - self.VID_END, - ): - mapping[token] = IDS_TYPE_FLAG["image"] - mapping[self.image_patch_id] = IDS_TYPE_FLAG["image"] - return mapping - - def train(self) -> None: - """Enable training mode (produces labels).""" - self.is_training = True - - def eval(self) -> None: - """Enable evaluation mode (doesn't produce labels).""" - self.is_training = False - def text2ids(self, text, images=None, videos=None): """ Convert chat text into model inputs. @@ -251,48 +187,46 @@ def request2ids( "video", ]: image_message_list.append(item) + request["messages"] = messages prompt_token_ids = self.apply_chat_template(request) if len(prompt_token_ids) == 0: raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") - image_start_index = 0 - image_message_index = 0 + + vision_start_index = 0 + vision_message_index = 0 for i in range(len(prompt_token_ids)): - if prompt_token_ids[i] in [ - self.image_start_id, - self.video_start_id, - ]: - self._add_text(prompt_token_ids[image_start_index : i + 1], outputs) - image_start_index = i + 1 - image_message = image_message_list[image_message_index] + if prompt_token_ids[i] == self.vision_start_id : + self._add_text(prompt_token_ids[vision_start_index : i + 1], outputs) + + vision_start_index = i + 1 + image_message = image_message_list[vision_message_index] + if image_message["type"] == "image": img = image_message.get("image") if img is None: continue outputs["pic_cnt"] += 1 self._add_image(img, outputs) + elif image_message["type"] == "video": video_bytes = image_message.get("video") if video_bytes is None: continue frames = self._load_and_process_video(video_bytes, image_message) + # ----------- + # mm_parser = MultiModalPartParser() + # fimg = mm_parser.parse_image("file:///home/liudongdong/github/FastDeploy/data/images/demo.jpeg") + # for i in range(len(frames)): + # frames[i] = fimg.copy() + outputs["video_cnt"] += 1 self._add_video(frames, outputs) - image_message_index += 1 - self._add_text(prompt_token_ids[image_start_index:], outputs) - if self.is_training: - assert tgts, "training must give tgt !" - self._extract_labels(outputs, tgts) - return outputs + vision_message_index += 1 - def _add_special_token(self, token: Union[str, int], outputs: Dict) -> None: - token_id = token if isinstance(token, int) else self.tokenizer.convert_tokens_to_ids(token) - outputs["input_ids"].append(token_id) - outputs["token_type_ids"].append(self.token_type_mapping[token]) - pos = outputs["cur_position"] - outputs["position_ids"].append([pos] * 3) - outputs["cur_position"] += 1 + self._add_text(prompt_token_ids[vision_start_index:], outputs) + return outputs def _add_text(self, tokens, outputs: Dict) -> None: if isinstance(tokens, str): @@ -314,7 +248,7 @@ def _add_image(self, img, outputs: Dict) -> None: )[1] num_tokens = (patches_h * patches_w) // (self.spatial_conv_size**2) - outputs["input_ids"].extend([self.image_patch_id] * num_tokens) + outputs["input_ids"].extend([self.image_token_id] * num_tokens) outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]] * num_tokens) pos_ids = self._compute_3d_positions(1, patches_h, patches_w, outputs["cur_position"]) @@ -322,14 +256,21 @@ def _add_image(self, img, outputs: Dict) -> None: outputs["cur_position"] = np.max(pos_ids) + 1 # Preprocess pixels + # image_mean = [0.48145466, 0.4578275, 0.40821073] + # image_std = [0.26862954, 0.26130258, 0.27577711] + # do_rescale = True + # do_normalize = True ret = self.image_preprocessor.preprocess( images=[img.convert("RGB")], - do_normalize=False, - do_rescale=False, + # do_normalize=do_normalize, + # image_mean=image_mean, + # image_std=image_std, + # do_rescale=do_rescale, predetermined_grid_thw=np.array([[patches_h, patches_w]]), do_convert_rgb=True, input_data_format=ChannelDimension.LAST, ) + outputs["images"].append(ret["pixel_values"]) outputs["grid_thw"].append(ret["image_grid_thw"]) outputs["image_type_ids"].append(0) @@ -345,112 +286,48 @@ def _add_video(self, frames, outputs: Dict) -> None: num_tokens = (num_frames * patches_h * patches_w) // (self.spatial_conv_size**2 * self.temporal_conv_size) pixel_stack = np.stack([np.array(f.convert("RGB")) for f in frames], axis=0) + + # rescale_factor = 1 / 255 + # image_mean = [0.48145466, 0.4578275, 0.40821073] + # image_std = [0.26862954, 0.26130258, 0.27577711] + # do_rescale = True + # do_normalize = True ret = self.image_preprocessor.preprocess( images=None, videos=pixel_stack, - do_normalize=False, - do_rescale=False, + # do_normalize=do_normalize, + # image_mean=image_mean, + # image_std=image_std, + # do_rescale=do_rescale, predetermined_grid_thw=np.array([[patches_h, patches_w]] * num_frames), do_convert_rgb=True, input_data_format=ChannelDimension.LAST, ) + outputs["images"].append(ret["pixel_values_videos"]) outputs["grid_thw"].append(ret["video_grid_thw"]) + # outputs["pixel_values_videos"].append(ret["pixel_values_videos"]) + # outputs["video_grid_thw"].append(ret["video_grid_thw"]) outputs["image_type_ids"].extend([1] * num_frames) - outputs["input_ids"].extend([self.image_patch_id] * num_tokens) + # num_tokens = ret["video_grid_thw"] + outputs["input_ids"].extend([self.video_token_id] * num_tokens) outputs["token_type_ids"].extend([IDS_TYPE_FLAG["video"]] * num_tokens) pos_ids = self._compute_3d_positions(num_frames, patches_h, patches_w, outputs["cur_position"]) outputs["position_ids"].extend(pos_ids) outputs["cur_position"] = np.max(pos_ids) + 1 - def _extract_labels(self, outputs: Dict, tgts: List[str]) -> None: - input_ids = copy.deepcopy(outputs["input_ids"]) - labels = [self.tokenizer.ignored_index] * len(input_ids) - - tgt_count = input_ids.count(self.sep_token_id) - assert tgt_count == len(tgts), f"len(tgts) != len(src) {len(tgts)} vs {tgt_count}" - - tgt_index = 0 - for i, token_id in enumerate(input_ids): - if token_id == self.sep_token_id: - labels_token = self.tokenizer.tokenize(tgts[tgt_index]) - labels_token_id = self.tokenizer.convert_tokens_to_ids(labels_token) - labels[i - len(labels_token_id) : i] = labels_token_id - labels[i] = self.eos_token_id # - tgt_index += 1 - - outputs["labels"] = labels - def _load_and_process_video(self, url: str, item: Dict) -> List[Image.Image]: - reader, meta, path = read_video_decord(url, save_to_disk=False) - - video_frame_args = dict() - video_frame_args["fps"] = item.get("fps", self.fps) - video_frame_args["min_frames"] = item.get("min_frames", self.min_frames) - video_frame_args["max_frames"] = item.get("max_frames", self.max_frames) - video_frame_args["target_frames"] = item.get("target_frames", self.target_frames) - video_frame_args["frames_sample"] = item.get("frames_sample", self.frames_sample) - - video_frame_args = self._set_video_frame_args(video_frame_args, meta) - - frames_data, _, timestamps = read_frames_decord( - path, - reader, - meta, - target_frames=video_frame_args["target_frames"], - target_fps=video_frame_args["fps"], - frames_sample=video_frame_args["frames_sample"], - save_to_disk=False, - ) + reader, meta = read_video_decord(url, save_to_disk=False) - frames: List[Image.Image] = [] - for img_array, ts in zip(frames_data, timestamps): - frames.append(render_frame_timestamp(img_array, ts)) - # Ensure even number of frames for temporal conv - if len(frames) % 2 != 0: - frames.append(copy.deepcopy(frames[-1])) - return frames + frames = [] + for i in range(meta["num_of_frame"]): + frame = reader[i].asnumpy() + image = Image.fromarray(frame, "RGB") + frames.append(image) - def _set_video_frame_args(self, video_frame_args, video_meta): - """ - 根据已知参数和优先级,设定最终的抽帧参数 - """ - # 优先级:video_target_frames > (video_min_frames, video_max_frames) > video_fps - if video_frame_args["target_frames"] > 0: - if video_frame_args["fps"] >= 0: - raise ValueError("fps must be negative if target_frames is given") - if ( - video_frame_args["min_frames"] > 0 - and video_frame_args["target_frames"] < video_frame_args["min_frames"] - ): - raise ValueError("target_frames must be larger than min_frames") - if ( - video_frame_args["max_frames"] > 0 - and video_frame_args["target_frames"] > video_frame_args["max_frames"] - ): - raise ValueError("target_frames must be smaller than max_frames") - else: - if video_frame_args["fps"] < 0: - raise ValueError("Must provide either positive target_fps or positive target_frames.") - # 先计算在video_fps下抽到的帧数 - frames_to_extract = int(video_meta["duration"] * video_frame_args["fps"]) - # 判断是否在目标区间内,如果不是,则取target_frames为上界或下界 - if ( - video_frame_args["min_frames"] > 0 - and video_frame_args["max_frames"] > 0 - and video_frame_args["min_frames"] > video_frame_args["max_frames"] - ): - raise ValueError("min_frames must be smaller than max_frames") - if video_frame_args["min_frames"] > 0 and frames_to_extract < video_frame_args["min_frames"]: - video_frame_args["target_frames"] = video_frame_args["min_frames"] - video_frame_args["fps"] = -1 - if video_frame_args["max_frames"] > 0 and frames_to_extract > video_frame_args["max_frames"]: - video_frame_args["target_frames"] = video_frame_args["max_frames"] - video_frame_args["fps"] = -1 - - return video_frame_args + return frames def _compute_3d_positions(self, t: int, h: int, w: int, start_idx: int) -> List[List[int]]: # Downsample time if needed @@ -470,16 +347,7 @@ def _load_tokenizer(self): Returns: tokenizer (AutoTokenizer) """ - vocab_file_names = [ - "tokenizer.model", - "spm.model", - "ernie_token_100k.model", - ] - for i in range(len(vocab_file_names)): - if os.path.exists(os.path.join(self.model_name_or_path, vocab_file_names[i])): - ErnieBotTokenizer.resource_files_names["vocab_file"] = vocab_file_names[i] - break - self.tokenizer = ErnieBotTokenizer.from_pretrained(self.model_name_or_path) + self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, padding_side="left", use_fast=True) def apply_chat_template(self, request): """ @@ -495,18 +363,17 @@ def apply_chat_template(self, request): if self.tokenizer.chat_template is None: raise ValueError("This model does not support chat_template.") - prompt_token_str = ( - self.tokenizer.apply_chat_template( - request, - tokenize=False, - add_generation_prompt=request.get("add_generation_prompt", True), - ) - .replace("<|image@placeholder|>", "") - .replace("<|video@placeholder|>", "") + prompt_token_str = self.tokenizer.apply_chat_template( + request["messages"], + tokenize=False, + add_generation_prompt=request.get("add_generation_prompt", True), ) + prompt_token_str = prompt_token_str.replace(self.image_token, "").replace(self.video_token, "") + tokens = self.tokenizer.tokenize(prompt_token_str) token_ids = self.tokenizer.convert_tokens_to_ids(tokens) data_processor_logger.info( f"req_id:{request.get('request_id', ''), } tokens: {tokens}, token_ids: {token_ids}" ) return token_ids + diff --git a/fastdeploy/input/mm_processor/process_video.py b/fastdeploy/input/mm_processor/process_video.py index 91120096c7..e82456bfcf 100644 --- a/fastdeploy/input/mm_processor/process_video.py +++ b/fastdeploy/input/mm_processor/process_video.py @@ -27,179 +27,18 @@ from .utils.video_utils import VideoReaderWrapper -def read_video_decord(video_path, save_to_disk): +def read_video_decord(video_path): """get reader and meta by decord""" - # video_path = get_downloadable(video_path, save_to_disk=save_to_disk) if isinstance(video_path, VideoReaderWrapper): video_reader = video_path else: if isinstance(video_path, bytes): video_path = io.BytesIO(video_path) video_reader = VideoReaderWrapper(video_path, num_threads=1) + vlen = len(video_reader) fps = video_reader.get_avg_fps() duration = vlen / float(fps) video_meta = {"fps": fps, "duration": duration, "num_of_frame": vlen} - - return video_reader, video_meta, video_path - - -def get_frame_indices( - vlen, - target_frames=-1, - target_fps=-1, - frames_sample="middle", - fix_start=None, - input_fps=-1, -): - """ - 取出对应的frame index - """ - assert frames_sample in ["rand", "middle", "leading"] - if target_frames > 0: - assert target_fps <= 0, "target_fps must be negative if target_frames is given." - if target_frames > vlen: - acc_samples = vlen - data_processor_logger.info( - f"target_frames={target_frames} is larger than video length {vlen}, " - f"will sample {acc_samples} frames." - ) - else: - acc_samples = target_frames - data_processor_logger.debug(f"sampling at target_frames={target_frames}, frames_sample={frames_sample}") - - # split the video into `acc_samples` intervals, and sample from each interval. - intervals = np.linspace(start=0, stop=vlen, num=acc_samples + 1).astype(int) - ranges = [] - for idx, interv in enumerate(intervals[:-1]): - ranges.append((interv, intervals[idx + 1] - 1)) - if frames_sample == "rand": - try: - frame_indices = [random.choice(range(x[0], x[1])) for x in ranges] - except Exception: - frame_indices = np.random.permutation(vlen)[:acc_samples] - frame_indices.sort() - frame_indices = list(frame_indices) - elif fix_start is not None: - frame_indices = [x[0] + fix_start for x in ranges] - elif frames_sample == "leading": - frame_indices = [x[0] for x in ranges] - elif frames_sample == "middle": - frame_indices = [(x[0] + x[1]) // 2 for x in ranges] - else: - raise NotImplementedError - - elif target_fps > 0: - assert target_frames <= 0, "target_frames must be negative if target_fps is given." - assert input_fps > 0, "input_fps must be provided if target_fps is given." - data_processor_logger.info(f"sampling at fps={target_fps}, frames_sample={frames_sample}") - duration = float(vlen) / input_fps - delta = 1 / target_fps # gap between frames, this is also the clip length each frame represents - if frames_sample == "middle": - frame_seconds = np.arange(0 + delta / 2, duration + delta / 2, delta) - elif frames_sample == "leading": - frame_seconds = np.arange(0, duration, delta) - if frames_sample == "rand": - frame_seconds = np.arange(0 + delta / 2, duration + delta / 2, delta) - rand_offset = np.random.rand(*(frame_seconds.shape)) - 0.5 - frame_seconds += rand_offset * delta - frame_indices = np.around(frame_seconds * input_fps).astype(int) - frame_indices = [e for e in frame_indices if e < vlen] - - else: - raise ValueError("Must provide either positive target_fps or positive target_frames.") - - return frame_indices - - -def read_frames_decord( - video_path, - video_reader, - video_meta, - target_frames=-1, - target_fps=-1, - frames_sample="middle", - fix_start=None, - save_to_disk=False, - cache_dir=EXTRACTED_FRAME_DIR, - frame_indices=None, - tol=10, -): - """get frames by decord""" - - if frame_indices is None: - frame_indices = get_frame_indices( - video_meta["num_of_frame"], - target_frames=target_frames, - target_fps=target_fps, - frames_sample=frames_sample, - fix_start=fix_start, - input_fps=video_meta["fps"], - ) - - frames = [] - for frame_indice_index in range(0, len(frame_indices)): - frame_indice = frame_indices[frame_indice_index] - try: - frames.append(video_reader[frame_indice].asnumpy()) # (T, H, W, C) - except Exception as e: - data_processor_logger.debug(f"encounter error when get frame: {frame_indice}, error: {e}") - previous_counter = 1 - later_counter = 1 - previous_after_flag = True - if frame_indice == 0 or frame_indice == len(video_reader) - 1: - cur_tol = tol * 2 - else: - cur_tol = tol - while previous_counter < cur_tol or later_counter < cur_tol: - if previous_after_flag: - if frame_indice - previous_counter < 0: - previous_counter += 1 - previous_after_flag = not previous_after_flag - continue - try: - frames.append(video_reader[frame_indice - previous_counter].asnumpy()) - data_processor_logger.info( - f"replace {frame_indice}-th frame with {frame_indice-previous_counter}-th frame" - ) - frame_indices[frame_indice_index] = frame_indice - previous_counter - break - except Exception as e: - previous_counter += 1 - data_processor_logger.info(f"error: {e}") - else: - if frame_indice + later_counter >= len(video_reader): - later_counter += 1 - previous_after_flag = not previous_after_flag - continue - try: - frames.append(video_reader[frame_indice + later_counter].asnumpy()) - data_processor_logger.info( - f"replace {frame_indice}-th frame with {frame_indice+later_counter}-th frame" - ) - frame_indices[frame_indice_index] = frame_indice + later_counter - break - except Exception: - later_counter += 1 - previous_after_flag = not previous_after_flag - - frames = np.stack(frames, axis=0) - assert len(frames) == len(frame_indices), f"len(frames): {len(frames)} != len(frame_indices): {len(frame_indices)}" - - ret = [] - - url_sha1 = get_filename() - for idx, frame in enumerate(frames): - tmp = Image.fromarray(frame, "RGB") - if save_to_disk: - save_path = os.path.join(cache_dir, f"{url_sha1}", f"{idx}.png") - if not os.path.exists(os.path.dirname(save_path)): - os.makedirs(os.path.dirname(save_path)) - tmp.save(save_path) - tmp = save_path - ret.append(tmp) - - time_stamps = [frame_idx * video_meta["duration"] / video_meta["num_of_frame"] for frame_idx in frame_indices] - - return ret, frame_indices, time_stamps + return video_reader, video_meta diff --git a/fastdeploy/input2/__init__.py b/fastdeploy/input2/__init__.py new file mode 100644 index 0000000000..f4ede90624 --- /dev/null +++ b/fastdeploy/input2/__init__.py @@ -0,0 +1,15 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" diff --git a/fastdeploy/input2/ernie_processor.py b/fastdeploy/input2/ernie_processor.py new file mode 100644 index 0000000000..28d91bdbf8 --- /dev/null +++ b/fastdeploy/input2/ernie_processor.py @@ -0,0 +1,425 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import os + +import numpy as np +from paddleformers.generation import GenerationConfig + +from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer +from fastdeploy.input.text_processor import BaseDataProcessor +from fastdeploy.utils import data_processor_logger + +_SAMPLING_EPS = 1e-5 + + +class ErnieProcessor(BaseDataProcessor): + """ + 初始化模型实例。 + + Args: + model_name_or_path (str): 模型名称或路径。 + + Attributes: + model_name_or_path (str): 存储模型名称或路径。 + decode_status (dict): 存储解码状态信息。 + tokenizer (object): 存储分词器实例。 + eos_token_ids (list): 存储结束符号的token ID列表。 + eos_token_id_len (int): 存储结束符号的token ID列表的长度。 + pad_token_id (int): 存储填充符号的token ID。 + """ + + def __init__(self, model_name_or_path, reasoning_parser_obj=None): + + self.model_name_or_path = model_name_or_path + data_processor_logger.info(f"model_name_or_path: {model_name_or_path}") + + # Generation config + try: + self.generation_config = GenerationConfig.from_pretrained(self.model_name_or_path) + except Exception as e: + data_processor_logger.warning( + f"Can't find generation config, so it will not use " + f"generation_config field in the model config, details={e}" + ) + self.generation_config = None + + self.decode_status = dict() + self.thinking_parser_dict = dict() + self._load_tokenizer() + data_processor_logger.info( + f"tokenizer information: bos_token is {self.tokenizer.bos_token} \ + {self.tokenizer.bos_token_id}, \ + eos_token is {self.tokenizer.eos_token}, {self.tokenizer.eos_token_id} " + ) + from paddleformers.trl.llm_utils import get_eos_token_id + + self.eos_token_ids = get_eos_token_id(self.tokenizer, self.generation_config) + self.eos_token_id_len = len(self.eos_token_ids) + self.pad_token_id = self.get_pad_id() + self.reasoning_parser = None + if reasoning_parser_obj: + self.reasoning_parser = reasoning_parser_obj(self.tokenizer) + + def process_request(self, request, max_model_len=None, **kwargs): + """ + Preprocess the request + + Args: + request (Dict): may contain text and messages fields + + Returns: + bool: Whether preprocessing is successful + str: error message + """ + request = self._apply_default_parameters(request) + if request.get("eos_token_ids") is None or len(request.eos_token_ids) == 0: + request.eos_token_ids = self.eos_token_ids + stop_sequences = request.get("stop", []) + if stop_sequences is not None and len(stop_sequences) != 0: + stop_seqs, stop_seqs_len = self.update_stop_seq(stop_sequences) + request.set("stop_token_ids", stop_seqs) + request.set("stop_seqs_len", stop_seqs_len) + + if request.prompt_token_ids is None or len(request.prompt_token_ids) == 0: + if request.prompt is None and request.messages is None: + raise ValueError(f"The request should have `prompt_token_ids`, `prompt` or `messages`: {request}.") + if request.prompt is not None: + prompt = request.prompt if request.prompt is not None else request.messages[0] + prompt = prompt[0] if isinstance(prompt, list) else prompt + tokens = self.tokenizer.tokenize(prompt) + token_ids = self.tokenizer.convert_tokens_to_ids(tokens) + request.prompt_token_ids = token_ids + data_processor_logger.info(f"req_id:{request.request_id}, tokens:{tokens}, token_ids: {token_ids}") + else: + request.prompt_token_ids = self.messages2ids(request.to_dict()) + + if len(request.prompt_token_ids) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") + if max_model_len is not None and len(request.prompt_token_ids) > max_model_len: + request.prompt_token_ids = request.prompt_token_ids[: max_model_len - 1] + if request.get("max_tokens") is None: + request.set( + "max_tokens", + max(1, max_model_len - len(request.prompt_token_ids)), + ) + if request.get("temperature") < _SAMPLING_EPS: + # zero temperature is equivalent to greedy sampling + request.set("temperature", 1) + if request.get("top_p") < _SAMPLING_EPS: + request.set("top_p", _SAMPLING_EPS) + data_processor_logger.info(f"Processed request {request}") + return request + + def process_request_dict(self, request, max_model_len=None): + """ + Preprocess the request + + Args: + request (Dict): may contain text and messages fields + + Returns: + bool: Whether preprocessing is successful + str: error message + """ + request = self._apply_default_parameters(request) + if not request.get("eos_token_ids"): + request["eos_token_ids"] = self.eos_token_ids + + # processing stop_sequences + stop_sequences = request.get("stop", []) + if stop_sequences: + stop_seqs, stop_seqs_len = self.update_stop_seq(stop_sequences) + request["stop_token_ids"] = stop_seqs + request["stop_seqs_len"] = stop_seqs_len + + # processing prompt_token_ids + if not request.get("prompt_token_ids"): + if request.get("prompt") is None and request.get("messages") is None: + raise ValueError(f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}") + if request.get("prompt"): + prompt = request.get("prompt") + prompt = prompt[0] if isinstance(prompt, list) else prompt + + tokens = self.tokenizer.tokenize(prompt) + token_ids = self.tokenizer.convert_tokens_to_ids(tokens) + request["prompt_token_ids"] = token_ids + req_id = request.get("request_id", None) + data_processor_logger.info(f"req_id:{req_id}, tokens:{tokens}, token_ids: {token_ids}") + else: + request["prompt_token_ids"] = self.messages2ids(request) + if len(request["prompt_token_ids"]) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") + + # truncate prompts that exceed the length limit + if max_model_len is not None and len(request["prompt_token_ids"]) > max_model_len: + request["prompt_token_ids"] = request["prompt_token_ids"][: max_model_len - 1] + if request.get("max_tokens") is None: + request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"])) + if request.get("temperature") < _SAMPLING_EPS: + # zero temperature is equivalent to greedy sampling + request["temperature"] = 1 + if request.get("top_p") < _SAMPLING_EPS: + request["top_p"] = _SAMPLING_EPS + data_processor_logger.info(f"Processed request {request}") + + return request + + def process_response(self, response_dict, **kwargs): + """ + Preprocess the response + + Args: + response_dict (Dict): response for engine, contain ids fields + + Returns: + Dict: response contain text fields + """ + req_id = response_dict.request_id + token_ids = response_dict.outputs.token_ids + + response_dict.usage = {"completion_tokens": response_dict.outputs.index + 1} + if token_ids[-1] == self.tokenizer.eos_token_id: + token_ids = token_ids[:-1] + full_text = self.tokenizer.decode(token_ids) + if self.reasoning_parser: + reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict) + response_dict.outputs.text = text + response_dict.outputs.reasoning_content = reasoning_content + else: + response_dict.outputs.text = full_text + data_processor_logger.info(f"req_id:{req_id}, token)ids: {token_ids}") + if response_dict.outputs.text == "" and response_dict.outputs.reasoning_content == "": + return None + return response_dict + + def process_response_dict(self, response_dict, stream, **kwargs): + """ + Preprocess the response + + Args: + response_dict (Dict): response for engine, contain ids fields + + Returns: + Dict: response contain text fields + """ + if stream: + return self.process_response_dict_streaming(response_dict, **kwargs) + else: + return self.process_response_dict_normal(response_dict, **kwargs) + + def process_response_dict_normal(self, response_dict, **kwargs): + """ + Preprocess the response + + Args: + response_dict (Dict): response for engine, contain ids fields + + Returns: + Dict: response contain text fields + """ + enable_thinking = kwargs.get("enable_thinking") + token_ids = response_dict["outputs"]["token_ids"] + is_end = response_dict["finished"] + req_id = response_dict["request_id"] + if is_end and len(token_ids) > 0 and not kwargs.get("include_stop_str_in_output"): + if token_ids[-1] == self.tokenizer.eos_token_id: + token_ids = token_ids[:-1] + delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id) + if is_end: + full_text = previous_texts + delta_text + if enable_thinking and self.reasoning_parser: + reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict) + response_dict["outputs"]["text"] = text + response_dict["outputs"]["reasoning_content"] = reasoning_content + else: + response_dict["outputs"]["text"] = full_text + data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}") + del self.decode_status[req_id] + return response_dict + + def process_response_dict_streaming(self, response_dict, **kwargs): + """ + Preprocess the response streaming + + Args: + response_dict (Dict): response for engine, contain ids fields + + Returns: + Dict: response contain text fields + """ + enable_thinking = kwargs.get("enable_thinking") + is_end = response_dict["finished"] + req_id = response_dict["request_id"] + token_ids = response_dict["outputs"]["token_ids"] + + if is_end and len(token_ids) > 0 and not kwargs.get("include_stop_str_in_output"): + if token_ids[-1] == self.tokenizer.eos_token_id: + token_ids = token_ids[:-1] + delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id) + if enable_thinking and self.reasoning_parser: + reasoning_content, text = self.reasoning_parser.extract_reasoning_content_streaming( + previous_texts, + previous_texts + delta_text, + delta_text, + previous_token_ids, + previous_token_ids + token_ids, + token_ids, + ) + response_dict["outputs"]["text"] = text + response_dict["outputs"]["reasoning_content"] = reasoning_content + else: + response_dict["outputs"]["text"] = delta_text + if is_end: + data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}") + del self.decode_status[req_id] + return response_dict + + def messages2ids(self, request_or_messages): + """ + Convert multi-turn messages into ID sequences. + + Args: + request_or_messages: Either a request dict containing 'messages' field, + or a list of message dicts directly + + Returns: + List of token IDs as strings (converted from token objects) + """ + if self.tokenizer.chat_template is None: + raise ValueError("This model does not support chat_template.") + spliced_message = self.tokenizer.apply_chat_template( + request_or_messages, + tokenize=False, + split_special_tokens=False, + add_special_tokens=False, + ) + + req_id = None + if isinstance(request_or_messages, dict): + req_id = request_or_messages.get("request_id", None) + tokens = self.tokenizer.tokenize(spliced_message) + token_ids = self.tokenizer.convert_tokens_to_ids(tokens) + data_processor_logger.info(f"req_id:{req_id}, tokens:{tokens}, token_ids: {token_ids}") + return token_ids + + def ids2tokens(self, token_id, task_id): + """ + token ids to strings + + Args: + token_ids (List[int]): token ids + task_id (str): task id + + Returns: + List[str]: strings + """ + + if task_id not in self.decode_status: + # prefix offset & read offset & history token ids & history token strings + self.decode_status[task_id] = [0, 0, [], ""] + + prefix_offset = self.decode_status[task_id][0] + read_offset = self.decode_status[task_id][1] + previous_token_ids = self.decode_status[task_id][2] + previous_texts = self.decode_status[task_id][3] + decode_str, prefix_offset, read_offset = self.tokenizer.decode_token( + previous_token_ids + token_id, prefix_offset, read_offset + ) + self.decode_status[task_id][0] = prefix_offset + self.decode_status[task_id][1] = read_offset + self.decode_status[task_id][2] += token_id + self.decode_status[task_id][3] += decode_str + + return decode_str, previous_token_ids, previous_texts + + def _load_tokenizer(self): + """ + load tokenizer + + Returns: + tokenizer (AutoTokenizer) + """ + vocab_file_names = [ + "tokenizer.model", + "spm.model", + "ernie_token_100k.model", + ] + for i in range(len(vocab_file_names)): + if os.path.exists(os.path.join(self.model_name_or_path, vocab_file_names[i])): + ErnieBotTokenizer.resource_files_names["vocab_file"] = vocab_file_names[i] + break + self.tokenizer = ErnieBotTokenizer.from_pretrained(self.model_name_or_path) + + def get_pad_id(self): + """ + get pad_token_id, if not pad_token_id, use eos_token + + Returns: + int: pad_token_id + """ + # if isinstance(self.tokenizer, (LlamaTokenizer, Llama3Tokenizer)) and not self.tokenizer.pad_token_id: + # return self.tokenizer.eos_token + return self.tokenizer.pad_token_id + + def pad_batch_data( + self, + insts, + pad_id=0, + return_seq_len=False, + return_array=True, + pad_style="right", + ): + """Pad the instances to the max sequence length in batch.""" + if len(insts) == 0: + padded_insts = np.array([[]], dtype=np.int64) if return_array else [[]] + if return_seq_len: + seq_len = np.array([], dtype=np.int64) if return_array else [] + return padded_insts, seq_len + return padded_insts + + max_len = max(map(len, insts)) + if pad_style == "left": + padded_insts = [[pad_id] * (max_len - len(inst)) + list(inst) for inst in insts] + else: + padded_insts = [list(inst) + [pad_id] * (max_len - len(inst)) for inst in insts] + if return_array: + padded_insts = np.array(padded_insts, dtype=np.int64).reshape([-1, max_len]) + + if return_seq_len: + seq_len = [len(inst) for inst in insts] + if return_array: + seq_len = np.array(seq_len, dtype=np.int64).reshape(-1, 1) + return padded_insts, seq_len + return padded_insts + + def update_stop_seq(self, stop_sequences): + """ + Update stop sequences from request. + """ + stop_seqs = [] + if isinstance(stop_sequences, str): + stop_sequences = [stop_sequences] + for seq in stop_sequences: + if seq != self.tokenizer.eos_token_id: + stop_seqs.append(self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(seq))) + stop_seqs, stop_seqs_len = self.pad_batch_data(stop_seqs, pad_id=-1, return_seq_len=True, return_array=False) + data_processor_logger.debug(f"processed stop_seqs: {stop_seqs}, {stop_seqs_len}") + return stop_seqs, stop_seqs_len + + def process_logprob_response(self, token_ids, **kwargs): + full_text = self.tokenizer.decode(token_ids, **kwargs) + return full_text diff --git a/fastdeploy/input2/ernie_tokenizer.py b/fastdeploy/input2/ernie_tokenizer.py new file mode 100644 index 0000000000..2bbc798c5c --- /dev/null +++ b/fastdeploy/input2/ernie_tokenizer.py @@ -0,0 +1,394 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +# cipher_token=WjI1fQOvhN # do not edit this line + +import os +import re +from shutil import copyfile +from typing import Dict, List, Optional, Tuple + +import numpy as np +import paddle +import sentencepiece as spm +from paddleformers.transformers import PretrainedTokenizer +from paddleformers.transformers.tokenizer_utils_base import PaddingStrategy, TextInput +from paddleformers.utils.log import logger + + +class ErnieBotTokenizer(PretrainedTokenizer): + """ + 一个更好用的 `ErnieBotToknizer`, + 能 encode 目前 sft/ppo 阶段的特殊token,也支持多模态。 + """ + + resource_files_names = { + "vocab_file": "tokenizer.model", + } + pretrained_resource_files_map = {"vocab_file": {"ernie-bot-10b": None}} + pretrained_init_configuration = { + "ernie-bot-10b": {}, + } + model_input_names = [ + "input_ids", + "position_ids", + "attention_mask", + "labels", + ] + padding_side = "right" + + def __init__( + self, + vocab_file, + bos_token="", + cls_token="", + eos_token="", + mask_token="", + pad_token="", + sep_token="", + unk_token="", + additional_special_tokens=None, + verbose=False, + **kwargs, + ): + """doc""" + if additional_special_tokens is None: + additional_special_tokens = ["", ""] + super().__init__( + bos_token=bos_token, + cls_token=cls_token, + eos_token=eos_token, + mask_token=mask_token, + pad_token=pad_token, + sep_token=sep_token, + unk_token=unk_token, + additional_special_tokens=additional_special_tokens, + verbose=False, + **kwargs, + ) + self.vocab_file = vocab_file + self.sp_model = spm.SentencePieceProcessor() + self.sp_model.Load(vocab_file) + # pre-process map-type all spec token for decode accelerate. + + @property + def space_token(self): + """doc""" + return "" + + @property + def space_token_id(self): + """doc""" + return self.sp_model.piece_to_id("") + + @property + def gend_token(self): + """doc""" + return "" + + @property + def gend_token_id(self): + """doc""" + return self.sp_model.piece_to_id("") + + @property + def im_start_id(self): + """doc""" + return self.sp_model.piece_to_id("<|im_start|>") + + @property + def im_end_id(self): + """doc""" + return self.sp_model.piece_to_id("<|im_end|>") + + @property + def vocab_size(self): + """doc""" + return self.sp_model.vocab_size() + + def get_vocab(self): + """doc""" + vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)} + vocab.update(self.added_tokens_encoder) + return vocab + + def _tokenize(self, text): + """doc""" + return self.sp_model.encode_as_pieces(text) + + def _convert_token_to_id(self, token): + """doc""" + return self.sp_model.piece_to_id(token) + + def _convert_id_to_token(self, id): + """doc""" + return self.sp_model.id_to_piece(id) + + def spec_init(self): + if not hasattr(self, "all_spec_tok"): + self.all_spec_tok = set(self.all_special_tokens) + + def convert_tokens_to_string(self, tokens): + """Converts a sequence of tokens (string) in a single string.""" + self.spec_init() + current_sub_tokens = [] + out_string = "" + # prev_is_special = False + for token in tokens: + # make sure that special tokens are not decoded using sentencepiece model + if token in self.all_spec_tok: + # if not prev_is_special: + # out_string += " " + out_string += self.sp_model.decode(current_sub_tokens) + token + # prev_is_special = True + + current_sub_tokens = [] + else: + current_sub_tokens.append(token) + # prev_is_special = False + out_string += self.sp_model.decode(current_sub_tokens) + return out_string # .strip() + + def prepare_for_model(self, *args, **kwargs): + """doc""" + if "add_special_tokens" in kwargs: + kwargs.pop("add_special_tokens") + # logger.warning(f'ErnieBotTokenizer v2 does not support `add_special_tokens`') + return super().prepare_for_model(*args, **kwargs) + + def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]: + """ + Save the vocabulary and special tokens file to a directory. + Args: + save_directory (`str`): + The directory in which to save the vocabulary. + Returns: + `Tuple(str)`: Paths to the files saved. + """ + if not os.path.isdir(save_directory): + logger.error(f"Vocabulary path ({save_directory}) should be a directory") + return + out_vocab_file = os.path.join( + save_directory, + (filename_prefix + "-" if filename_prefix else "") + self.resource_files_names["vocab_file"], + ) + if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file): + copyfile(self.vocab_file, out_vocab_file) + elif not os.path.isfile(self.vocab_file): + with open(out_vocab_file, "wb") as fi: + content_spiece_model = self.sp_model.serialized_model_proto() + fi.write(content_spiece_model) + return (out_vocab_file,) + + def tokenize(self, text: TextInput, **kwargs) -> List[str]: + """ + Converts a string in a sequence of tokens, using the tokenizer. + + Split in words for word-based vocabulary or sub-words for sub-word-based vocabularies + (BPE/SentencePieces/WordPieces). Takes care of added tokens. + + Args: + text (`str`): + The sequence to be encoded. + **kwargs (additional keyword arguments): + Passed along to the model-specific `prepare_for_tokenization` preprocessing method. + + Returns: + `List[str]`: The list of tokens. + """ + # Simple mapping string => AddedToken for special tokens with specific tokenization behaviors + # all_special_tokens_extended = dict( + # (str(t), t) + # for t in self.all_special_tokens_extended + # if isinstance(t, AddedToken) + # ) + + self.spec_init() + text, kwargs = self.prepare_for_tokenization(text, **kwargs) + + # TODO: should this be in the base class? + if hasattr(self, "do_lower_case") and self.do_lower_case: + # convert non-special tokens to lowercase + escaped_special_toks = [re.escape(s_tok) for s_tok in (self.unique_no_split_tokens + self.all_spec_tok)] + pattern = r"(" + r"|".join(escaped_special_toks) + r")|" + r"(.+?)" + text = re.sub(pattern, lambda m: m.groups()[0] or m.groups()[1].lower(), text) + + no_split_token = set(self.unique_no_split_tokens) + tokens = self.tokens_trie.split(text) + + # ["This is something", "", " else"] + # for i, token in enumerate(tokens): + # if token in no_split_token: + # tok_extended = all_special_tokens_extended.get(token, None) + # print(f'>>>{token}|{tok_extended}|{all_special_tokens_extended}<<<') + # left = tokens[i - 1] if i > 0 else None + # right = tokens[i + 1] if i < len(tokens) - 1 else None + # if isinstance(tok_extended, AddedToken): + # if tok_extended.rstrip and right: + # # A bit counter-intuitive but we strip the left of the string + # # since tok_extended.rstrip means the special token is eating all white spaces on its right + # tokens[i + 1] = right.lstrip() + # # Strip white spaces on the left + # if tok_extended.lstrip and left: + # tokens[i - 1] = left.rstrip() # Opposite here + # else: + # We strip left and right by default + # if right: + # tokens[i + 1] = right.lstrip() + # if left: + # tokens[i - 1] = left.rstrip() + # ["This is something", "", "else"] + tokenized_text = [] + for token in tokens: + # Need to skip eventual empty (fully stripped) tokens + if not token: + continue + if token in no_split_token: + tokenized_text.append(token) + else: + tokenized_text.extend(self._tokenize(token)) + # ["This", " is", " something", "", "else"] + return tokenized_text + + def _decode(self, *args, **kwargs): + """doc""" + kwargs.pop("clean_up_tokenization_spaces", None) + kwargs.pop("spaces_between_special_tokens", None) + return super()._decode( + *args, + **kwargs, + clean_up_tokenization_spaces=False, + spaces_between_special_tokens=False, + ) + + def _pad( + self, + encoded_inputs: Dict, + max_length: Optional[int] = None, + padding_strategy=PaddingStrategy.DO_NOT_PAD, + pad_to_multiple_of: Optional[int] = None, + return_attention_mask: Optional[bool] = None, + ) -> dict: + """doc""" + if return_attention_mask is None: + return_attention_mask = "attention_mask" in self.model_input_names + if return_attention_mask: + required_input = encoded_inputs[self.model_input_names[0]] + if padding_strategy == PaddingStrategy.LONGEST: + max_length = len(required_input) + if max_length is not None and pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0): + max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of + needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(required_input) != max_length + if "attention_mask" in encoded_inputs and encoded_inputs["attention_mask"] is not None: + attention_mask = encoded_inputs.pop("attention_mask") + if isinstance(attention_mask, paddle.Tensor): + attention_mask = attention_mask.numpy() + elif isinstance(attention_mask, list): + attention_mask = np.array(attention_mask) + elif not isinstance(attention_mask, np.ndarray): + raise ValueError(f"Unexpected type {type(attention_mask)} of attention_mask, ") + else: + attention_mask = np.tril( + np.ones( + (len(required_input), len(required_input)), + dtype=np.int64, + ) + ) + attention_mask = np.expand_dims(attention_mask, axis=0) + if needs_to_be_padded: + difference = max_length - len(required_input) + if self.padding_side == "right": + if attention_mask.ndim == 1: + pad_width = [(0, difference)] + else: + pad_width = [(0, 0), (0, difference), (0, difference)] + elif self.padding_side == "left": + if attention_mask.ndim == 1: + pad_width = [(difference, 0)] + else: + pad_width = [(0, 0), (difference, 0), (difference, 0)] + else: + raise ValueError("Invalid padding strategy:" + str(self.padding_side)) + attention_mask = np.pad( + attention_mask, + pad_width=pad_width, + mode="constant", + constant_values=0, + ) + encoded_inputs = super()._pad( + encoded_inputs, + max_length, + padding_strategy=padding_strategy, + pad_to_multiple_of=pad_to_multiple_of, + return_attention_mask=False, + ) + if return_attention_mask: + encoded_inputs["attention_mask"] = attention_mask.tolist() + return encoded_inputs + + +def add_special_tokens( + tokenizer, + special_tokens_info, + use_ocr_specialtoken=False, + use_crop_specialtoken=False, + special_token_ids_start=254208, + special_token_ids_end=256256, +): + """ + 增加 special token + + placeholder [<|IMAGE_PLACEHOLDER|>, <|AUDIO_PLACEHOLDER|>, <|VIDEO_PLACEHOLDER|>] 共3个 + + 模态起始截止 special tokens [<|BOI|> <|EOI|> <|BOA|> <|EOA|> <|BOV|> <|EOV|>] + + ocr special tokens [<|LOC_0|> <|LOC_1|> ... <|LOC_1000|>] 共1001个 + + crop special tokens [<|CROP_COL_SEP|>, <|CROP_ROW_SEP|>, <|CROP_IMAGE_SEP|>] 共3个 + <|CROP_COL_SEP|> for col 维度切 图片width(替换原明文逗号) + <|CROP_ROW_SEP|> for row 维度切 图片height(替换原明文回车) + <|CROP_IMAGE_SEP|> for 区分原图和crop图 图片width(替换原明文两个回车) + + 共2048个 unsed token + + Args: + tokenizer (ErnieTokenizer): tokenizer + special_token_ids_start (int, optional): special token 起点 ids. Defaults to 254208. + special_token_ids_end (int, optional): 词表最多支持大小. Defaults to 256256. + """ + special_tokens = [ + special_tokens_info["image_placeholder"], + special_tokens_info["audio_placeholder"], + ] + + if use_ocr_specialtoken: + special_tokens.extend(special_tokens_info["ocr_coor"]) + special_tokens.extend(special_tokens_info["ocr_begin_end"]) + + if use_crop_specialtoken: + special_tokens.extend(special_tokens_info["crop"]) + + # add special_tokens + additional_special_tokens = {"additional_special_tokens": special_tokens} + tokenizer.add_special_tokens(additional_special_tokens) + + # check + first_special_tokens = tokenizer.encode(special_tokens[0])["input_ids"] + + assert first_special_tokens[0] == special_token_ids_start, f"[ERROR] first_special_tokens={first_special_tokens}" + assert ( + len(tokenizer.get_vocab()) < special_token_ids_end + ), f"[ERROR] vocab_size = {len(tokenizer.get_vocab())} >= {special_token_ids_end} 增加过多special token了!" diff --git a/fastdeploy/input2/ernie_vl_processor.py b/fastdeploy/input2/ernie_vl_processor.py new file mode 100644 index 0000000000..63ae5bc310 --- /dev/null +++ b/fastdeploy/input2/ernie_vl_processor.py @@ -0,0 +1,285 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import numpy as np +from paddleformers.generation import GenerationConfig + +from fastdeploy.engine.request import Request +from fastdeploy.input.ernie_processor import ErnieProcessor +from fastdeploy.input.mm_processor import IDS_TYPE_FLAG, DataProcessor +from fastdeploy.utils import data_processor_logger + + +class ErnieMoEVLProcessor(ErnieProcessor): + """The processor class for ERNIE MoE VL models.""" + + def __init__( + self, + model_name_or_path, + limit_mm_per_prompt=None, + mm_processor_kwargs=None, + reasoning_parser_obj=None, + ): + data_processor_logger.info(f"model_name_or_path: {model_name_or_path}") + tokenizer_path = model_name_or_path + preprocessor_path = model_name_or_path + processor_kwargs = self._parse_processor_kwargs(mm_processor_kwargs) + + self.ernie_processor = DataProcessor( + tokenizer_name=tokenizer_path, + image_preprocessor_name=preprocessor_path, + **processor_kwargs, + ) + self.ernie_processor.eval() + self.image_patch_id = self.ernie_processor.image_patch_id + self.spatial_conv_size = self.ernie_processor.spatial_conv_size + + self.decode_status = dict() + self._load_tokenizer() + + # Generation config + try: + self.generation_config = GenerationConfig.from_pretrained(model_name_or_path) + except Exception as e: + data_processor_logger.warning( + f"Can't find generation config: {e}, so it will not use generation_config field in the model config" + ) + self.generation_config = None + + # self.eos_token_ids = [self.tokenizer.eos_token_id] + from paddleformers.trl.llm_utils import get_eos_token_id + + self.eos_token_ids = get_eos_token_id(self.tokenizer, self.generation_config) + self.eos_token_id_len = len(self.eos_token_ids) + self.pad_token_id = self.get_pad_id() + self.limit_mm_per_prompt = self._parse_limits(limit_mm_per_prompt) + self.reasoning_parser = None + if reasoning_parser_obj: + self.reasoning_parser = reasoning_parser_obj(self.tokenizer) + + def get_pad_id(self): + """get pad id""" + return self.tokenizer.pad_token_id + + def _load_tokenizer(self): + """ + load tokenizer + + Returns: + tokenizer (AutoTokenizer) + """ + self.tokenizer = self.ernie_processor.tokenizer + + def _apply_default_parameters(self, request): + """ + Apply default value for parameters in request + """ + + def set_value(req, key, value): + value = getattr(self.generation_config, key, value) + if isinstance(req, dict): + if key not in req: + req[key] = value + else: + if req.get(key) is None: + req.set(key, value) + + set_value(request, "top_p", 0.7) + set_value(request, "temperature", 1.0) + set_value(request, "repetition_penalty", 1.0) + set_value(request, "frequency_penalty", 0.0) + set_value(request, "presence_penalty", 0.0) + return request + + def process_request(self, request, max_model_len=None, **kwargs): + """process the input data""" + task = request.to_dict() + task["enable_thinking"] = kwargs.get("enable_thinking", True) + self.process_request_dict(task, max_model_len) + request = Request.from_dict(task) + request = self._apply_default_parameters(request) + + return request + + def _parse_processor_kwargs(self, kwargs): + """解析多模态处理器参数配置""" + if not kwargs: + return {} + + try: + if not isinstance(kwargs, dict): + raise ValueError("mm-processor-kwargs must be a dictionary") + + # 验证参数类型 + data_processor_logger.info(f"kwargs:{kwargs}") + expected_types = { + "spatial_conv_size": int, + "temporal_conv_size": int, + "image_min_pixels": int, + "image_max_pixels": int, + "video_min_pixels": int, + "video_max_pixels": int, + "video_target_frames": int, + "video_frames_sample": str, + "video_max_frames": int, + "video_min_frames": int, + "video_fps": int, + } + + for key, value in kwargs.items(): + if key in expected_types and not isinstance(value, expected_types[key]): + raise ValueError( + f"Invalid type for {key}: expected {expected_types[key].__name__}, got {type(value).__name__}" + ) + + return kwargs + + except Exception as e: + data_processor_logger.warning(f"Invalid mm-processor-kwargs format: {e}") + return {} + + def _parse_limits(self, limits): + """解析多模态限制配置""" + DEFAULT_LIMITS = {"image": 1, "video": 1, "audio": 1} + + if not limits: + return DEFAULT_LIMITS + + try: + if not isinstance(limits, dict): + raise ValueError("limit-mm-per-prompt must be a dictionary") + data_processor_logger.info(f"_parse_limits:{limits}") + return {**DEFAULT_LIMITS, **limits} + except Exception as e: + data_processor_logger.warning(f"Invalid limit-mm-per-prompt format: {e}, using default limits") + return DEFAULT_LIMITS + + def _check_mm_limits(self, item): + if isinstance(item, dict): + # 请求包含prompt和multi_modal_data + mm_data = item + else: + # 请求包含messages + mm_data = {"image": [], "video": []} + + for message in item: + if isinstance(message.get("content"), list): + for part in message["content"]: + if part.get("type") == "image": + mm_data["image"].append(part) + elif part.get("type") == "video": + mm_data["video"].append(part) + + for modality, data in mm_data.items(): + if modality in self.limit_mm_per_prompt: + limit = self.limit_mm_per_prompt[modality] + if len(data) > limit: + raise ValueError(f"Too many {modality} items in prompt, " f"got {len(data)} but limit is {limit}") + + def process_request_dict(self, request, max_model_len=None): + """process the input data""" + + request = self._apply_default_parameters(request) + if not request.get("eos_token_ids"): + request["eos_token_ids"] = self.eos_token_ids + + stop_sequences = request.get("stop", []) + if stop_sequences: + stop_seqs, stop_seqs_len = self.update_stop_seq(stop_sequences) + request["stop_token_ids"] = stop_seqs + request["stop_seqs_len"] = stop_seqs_len + + if request.get("prompt"): + multimodal_data = request.get("multimodal_data") + if multimodal_data is None: + multimodal_data = {} + self._check_mm_limits(multimodal_data) + images = multimodal_data.get("image", None) + videos = multimodal_data.get("video", None) + outputs = self.ernie_processor.text2ids(request["prompt"], images, videos) + elif request.get("messages"): + messages = request["messages"] + self._check_mm_limits(messages) + outputs = self.ernie_processor.request2ids(request) + else: + raise ValueError(f"Request must contain 'prompt', or 'messages': {request}") + + metadata = request.get("metadata") + # 如果metadata包含之前输出的token,将这些token添加到input_ids末尾 + if metadata and metadata.get("generated_token_ids"): + self.append_generated_tokens(outputs, metadata["generated_token_ids"]) + outputs = self.pack_outputs(outputs) + request["prompt_token_ids"] = outputs["input_ids"].tolist() + request["prompt_token_ids_len"] = len(request["prompt_token_ids"]) + request["multimodal_inputs"] = outputs + + # 截断超过长度限制的prompt + if max_model_len is not None and len(request["prompt_token_ids"]) > max_model_len: + request["prompt_token_ids"] = request["prompt_token_ids"][: max_model_len - 1] + if request.get("max_tokens") is None: + request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"])) + data_processor_logger.info(f"Processed request {request}") + + return request + + def append_generated_tokens(self, multimodal_inputs, generated_token_ids): + "append already generated tokens" + + num_tokens = len(generated_token_ids) + multimodal_inputs["input_ids"].extend(generated_token_ids) + multimodal_inputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]] * num_tokens) + + start = multimodal_inputs["cur_position"] + for i in range(num_tokens): + multimodal_inputs["position_ids"].append([start + i] * 3) + multimodal_inputs["cur_position"] += num_tokens + + def pack_outputs(self, outs): + # Stack or nullify image-related fields + if not outs["images"]: + outs["images"] = None + outs["grid_thw"] = None + outs["image_type_ids"] = None + else: + outs["images"] = np.vstack(outs["images"]) + outs["grid_thw"] = np.vstack(outs["grid_thw"]) + outs["image_type_ids"] = np.array(outs["image_type_ids"]) + + outs["image_patch_id"] = self.image_patch_id + # Convert lists to arrays + outs["input_ids"] = np.array(outs["input_ids"], dtype=np.int64) + outs["token_type_ids"] = np.array(outs["token_type_ids"], dtype=np.int64) + outs["position_ids"] = np.array(outs["position_ids"], dtype=np.int64) + + return outs + + def process_response_dict(self, response_dict, stream, **kwargs): + """ + Preprocess the response + + Args: + response_dict (Dict): response for engine, contain ids fields + + Returns: + Dict: response contain text fields + """ + enable_thinking = kwargs.pop("enable_thinking", True) + if enable_thinking is None: + enable_thinking = True + if stream: + return self.process_response_dict_streaming(response_dict, enable_thinking=enable_thinking, **kwargs) + else: + return self.process_response_dict_normal(response_dict, enable_thinking=enable_thinking, **kwargs) diff --git a/fastdeploy/input2/mm_processor/__init__.py b/fastdeploy/input2/mm_processor/__init__.py new file mode 100644 index 0000000000..ba59bc1654 --- /dev/null +++ b/fastdeploy/input2/mm_processor/__init__.py @@ -0,0 +1,23 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +from .process import IDS_TYPE_FLAG, DataProcessor, fancy_print + +__all__ = [ + "DataProcessor", + "fancy_print", + "IDS_TYPE_FLAG", +] diff --git a/fastdeploy/input2/mm_processor/image_preprocessor/__init__.py b/fastdeploy/input2/mm_processor/image_preprocessor/__init__.py new file mode 100644 index 0000000000..c11444e675 --- /dev/null +++ b/fastdeploy/input2/mm_processor/image_preprocessor/__init__.py @@ -0,0 +1,20 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +from .get_image_preprocessor import get_image_preprocessor +from .image_preprocessor_adaptive import AdaptiveImageProcessor + +__all__ = ["get_image_preprocessor", "AdaptiveImageProcessor"] diff --git a/fastdeploy/input2/mm_processor/image_preprocessor/get_image_preprocessor.py b/fastdeploy/input2/mm_processor/image_preprocessor/get_image_preprocessor.py new file mode 100644 index 0000000000..0ff6f7d1ed --- /dev/null +++ b/fastdeploy/input2/mm_processor/image_preprocessor/get_image_preprocessor.py @@ -0,0 +1,34 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +"""get image preprocessor""" + +from fastdeploy.utils import data_processor_logger + +from .image_preprocessor_adaptive import AdaptiveImageProcessor + + +def get_image_preprocessor(args): + """ + get_image_preprocessor from args + """ + + if args.vision_model_name_or_path is None: + return None + + data_processor_logger.info("use AdaptiveImageProcessor") + image_preprocess = AdaptiveImageProcessor.from_pretrained(args.vision_model_name_or_path) + return image_preprocess diff --git a/fastdeploy/input2/mm_processor/image_preprocessor/image_preprocessor_adaptive.py b/fastdeploy/input2/mm_processor/image_preprocessor/image_preprocessor_adaptive.py new file mode 100644 index 0000000000..15b15a4d22 --- /dev/null +++ b/fastdeploy/input2/mm_processor/image_preprocessor/image_preprocessor_adaptive.py @@ -0,0 +1,587 @@ +""" +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +"""image preprocessor adaptive""" + +import math +from typing import List, Optional, Union + +import numpy as np +import paddle +import PIL +from paddleformers.transformers.feature_extraction_utils import BatchFeature +from paddleformers.transformers.image_processing_utils import BaseImageProcessor +from paddleformers.transformers.image_transforms import ( + convert_to_rgb, + normalize, + rescale, + resize, + to_channel_dimension_format, +) +from paddleformers.transformers.image_utils import ( + ChannelDimension, + ImageInput, + PILImageResampling, + get_image_size, + infer_channel_dimension_format, + is_valid_image, + make_list_of_images, + to_numpy_array, + valid_images, +) +from paddleformers.transformers.tokenizer_utils_base import TensorType +from PIL import Image + +from fastdeploy.utils import data_processor_logger + +OPENAI_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073] +OPENAI_CLIP_STD = [0.26862954, 0.26130258, 0.27577711] + +IMAGE_FACTOR = 28 +MIN_PIXELS = 4 * 28 * 28 +MAX_PIXELS = 16384 * 28 * 28 +MAX_RATIO = 200 + + +VideoInput = Union[ + List["PIL.Image.Image"], + "np.ndarray", + "paddle.Tensor", + List["np.ndarray"], + List["paddle.Tensor"], + List[List["PIL.Image.Image"]], + List[List["np.ndarrray"]], + List[List["paddle.Tensor"]], +] + + +__all__ = [ + "AdaptiveImageProcessor", +] + + +def is_scaled_image(image: np.ndarray) -> bool: + """ + Checks to see whether the pixel values have already been rescaled to [0, 1]. + """ + if image.dtype == np.uint8: + return False + + # It's possible the image has pixel values in [0, 255] but is of floating type + return np.min(image) >= 0 and np.max(image) <= 1 + + +def make_batched_images(images) -> List[List[ImageInput]]: + """ + Accepts images in list or nested list format, and makes a list of images for preprocessing. + + Args: + images (`Union[List[List[ImageInput]], List[ImageInput], ImageInput]`): + The input image. + + Returns: + list: A list of images. + """ + if isinstance(images, (list, tuple)) and isinstance(images[0], (list, tuple)) and is_valid_image(images[0][0]): + return [img for img_list in images for img in img_list] + + elif isinstance(images, (list, tuple)) and is_valid_image(images[0]): + return images + + elif is_valid_image(images): + return [images] + + raise ValueError(f"Could not make batched images from {images}") + + +# Copied from transformers.models.llava_next_video.image_processing_llava_next_video.make_batched_videos +def make_batched_videos(videos) -> List[VideoInput]: + """dummy""" + if isinstance(videos, (list, tuple)) and isinstance(videos[0], (list, tuple)) and is_valid_image(videos[0][0]): + return videos + + elif isinstance(videos, (list, tuple)) and is_valid_image(videos[0]): + if isinstance(videos[0], Image.Image): + return [videos] + elif len(videos[0].shape) == 4: + return [list(video) for video in videos] + + elif is_valid_image(videos) and len(videos.shape) == 4: + return [list(videos)] + + raise ValueError(f"Could not make batched video from {videos}") + + +class AdaptiveImageProcessor(BaseImageProcessor): + r""" + Constructs a adaptive image processor that dynamically resizes images based on the original images. + + Args: + do_resize (`bool`, *optional*, defaults to `True`): + Whether to resize the image's (height, width) dimensions. + resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`): + Resampling filter to use when resizing the image. + do_rescale (`bool`, *optional*, defaults to `True`): + Whether to rescale the image by the specified scale `rescale_factor`. + rescale_factor (`int` or `float`, *optional*, defaults to `1/255`): + Scale factor to use if rescaling the image. + do_normalize (`bool`, *optional*, defaults to `True`): + Whether to normalize the image. + image_mean (`float` or `List[float]`, *optional*, defaults to `[0.48145466, 0.4578275, 0.40821073]`): + Mean to use if normalizing the image. This is a float or list of floats for each channel in the image. + image_std (`float` or `List[float]`, *optional*, defaults to `[0.26862954, 0.26130258, 0.27577711]`): + Standard deviation to use if normalizing the image. This is a float or list of floats for each channel + in the image. + do_convert_rgb (`bool`, *optional*, defaults to `True`): + Whether to convert the image to RGB. + min_pixels (`int`, *optional*, defaults to `56 * 56`): + The min pixels of the image to resize the image. + max_pixels (`int`, *optional*, defaults to `28 * 28 * 1280`): + The max pixels of the image to resize the image. + patch_size (`int`, *optional*, defaults to 14): + The spacial patch size of the vision encoder. + temporal_conv_size (`int`, *optional*, defaults to 2): + The temporal conv size in resampler. + merge_size (`int`, *optional*, defaults to 2): + The merge size of the vision encoder to llm encoder. + """ + + model_input_names = [ + "pixel_values", + "image_grid_thw", + "pixel_values_videos", + "video_grid_thw", + ] + + def __init__( + self, + do_resize: bool = True, + resample: PILImageResampling = PILImageResampling.BICUBIC, + do_rescale: bool = True, + rescale_factor: float = 1 / 255, + do_normalize: bool = True, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_convert_rgb: bool = True, + min_pixels: int = 56 * 56, + max_pixels: int = 28 * 28 * 1280, + patch_size: int = 14, + temporal_conv_size: int = 2, + merge_size: int = 2, + **kwargs, + ) -> None: + """init""" + super().__init__(**kwargs) + self.do_resize = do_resize + self.resample = resample + self.do_rescale = do_rescale + self.rescale_factor = rescale_factor + self.do_normalize = do_normalize + self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN + self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD + self.min_pixels = min_pixels + self.max_pixels = max_pixels + self.patch_size = patch_size + self.temporal_conv_size = temporal_conv_size + self.merge_size = merge_size + self.size = {"min_pixels": min_pixels, "max_pixels": max_pixels} + self.do_convert_rgb = do_convert_rgb + + def set_pixels(self, min_pixels=None, max_pixels=None, msg=""): + """设定pixels""" + if min_pixels is not None: + assert isinstance(min_pixels, int) and min_pixels >= 0, "min_pixels must be positive int" + data_processor_logger.info(f"{msg} AdaptiveImageProcessor set min_pixels = {min_pixels}") + self.min_pixels = min_pixels + self.size["min_pixels"] = int(min_pixels) + if max_pixels is not None: + assert isinstance(max_pixels, int) and max_pixels > 0, "max_pixels must be positive int" + data_processor_logger.info(f"{msg} AdaptiveImageProcessor set max_pixels = {max_pixels}") + self.max_pixels = max_pixels + self.size["max_pixels"] = int(max_pixels) + + def get_smarted_resize(self, height, width, min_pixels=None, max_pixels=None): + """dummy""" + actual_min_pixels = min_pixels if min_pixels is not None else self.min_pixels + actual_max_pixels = max_pixels if max_pixels is not None else self.max_pixels + resized_height, resized_width = smart_resize( + height, + width, + factor=self.patch_size * self.merge_size, + min_pixels=actual_min_pixels, + max_pixels=actual_max_pixels, + ) + return (resized_height, resized_width), ( + resized_height // self.patch_size, + resized_width // self.patch_size, + ) + + def _preprocess( + self, + images: Union[ImageInput, VideoInput], + do_resize: bool = True, + resample: PILImageResampling = None, + do_rescale: bool = True, + rescale_factor: float = 1 / 255, + do_normalize: bool = True, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_convert_rgb: bool = False, + data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, + input_data_format: Optional[Union[str, ChannelDimension]] = None, + predetermined_grid_thw=None, + ): + """ + Preprocess an image or batch of images. Copy of the `preprocess` method from `CLIPImageProcessor`. + + Args: + images (`ImageInput`): + Image or batch of images to preprocess. Expects pixel values ranging from 0 to 255. + If pixel values range from 0 to 1, set `do_rescale=False`. + vision_info (`List[Dict]`, *optional*): + Optional list of dictionaries containing additional information about vision inputs. + do_resize (`bool`, *optional*, defaults to `self.do_resize`): + Whether to resize the image. + resample (`PILImageResampling`, *optional*, defaults to `self.resample`): + Resampling filter to use if resizing the image. This can be one of the `PILImageResampling` enums. + do_rescale (`bool`, *optional*, defaults to `self.do_rescale`): + Whether to rescale the image. + rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`): + Scale factor to use if rescaling the image. + do_normalize (`bool`, *optional*, defaults to `self.do_normalize`): + Whether to normalize the image. + image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`): + Mean to use if normalizing the image. + Can be a float or a list of floats corresponding to the number of channels in the image. + image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`): + Standard deviation to use if normalizing the image. + Can be a float or a list of floats corresponding to the number of channels in the image. + do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`): + Whether to convert the image to RGB. + data_format (`ChannelDimension`, *optional*, defaults to `ChannelDimension.FIRST`): + The channel dimension format for the output image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - Unset: Use the channel dimension format of the input image. + input_data_format (`ChannelDimension` or `str`, *optional*): + The channel dimension format for the input image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. + - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. + """ + images = make_list_of_images(images) + + if do_convert_rgb: + images = [convert_to_rgb(image) for image in images] + + # All transformations expect numpy arrays. + images = [to_numpy_array(image) for image in images] + + if is_scaled_image(images[0]) and do_rescale: + data_processor_logger.warning( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: + # We assume that all images have the same channel dimension format. + input_data_format = infer_channel_dimension_format(images[0]) + + height, width = get_image_size(images[0], channel_dim=input_data_format) + resized_height, resized_width = height, width + processed_images = [] + + if predetermined_grid_thw is not None: + assert len(predetermined_grid_thw) == len( + images + ), f"len(predetermined_grid_thw) {len(predetermined_grid_thw)} == len(images) {len(images)}" + + for img_idx, image in enumerate(images): + if do_resize: + if predetermined_grid_thw is not None: + (resized_height, resized_width) = predetermined_grid_thw[img_idx] + resized_height *= self.patch_size + resized_width *= self.patch_size + else: + resized_height, resized_width = smart_resize( + height, + width, + factor=self.patch_size * self.merge_size, + min_pixels=self.min_pixels, + max_pixels=self.max_pixels, + ) + image = image.astype("uint8") # TODO : 需要手动加上,否则多除255 导致结果会出错 + # 直接fromarray,不要靠paddleformers里面的 + image = Image.fromarray(image) + image = resize( + image, + size=(resized_height, resized_width), + resample=resample, + data_format=input_data_format, + ) + if do_rescale: + image = rescale(image, scale=rescale_factor, data_format=input_data_format) + + if do_normalize: + image = normalize( + image=image, + mean=image_mean, + std=image_std, + data_format=input_data_format, + ) + + image = to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) # [C, H, W] + + processed_images.append(image) + patches = np.array(processed_images) + if data_format == ChannelDimension.LAST: + patches = patches.transpose([0, 3, 1, 2]) + + channel = patches.shape[1] # [time, C, H, W] + grid_t = patches.shape[0] + grid_h, grid_w = ( + resized_height // self.patch_size, + resized_width // self.patch_size, + ) + patches = patches.reshape( + [ + grid_t, + channel, + grid_h // self.merge_size, + self.merge_size, + self.patch_size, + grid_w // self.merge_size, + self.merge_size, + self.patch_size, + ] + ) + # [grid_t, grid_h/merge_size, grid_w/merge_size, merge_size, merge_size, C, psz, psz] + patches = patches.transpose([0, 2, 5, 3, 6, 1, 4, 7]) + + flatten_patches = patches.reshape( + [ + grid_t * grid_h * grid_w, + channel * self.patch_size * self.patch_size, + ] + ) # [grid_t * grid_h * grid_w, C * psz * psz] + + return flatten_patches, (grid_t, grid_h, grid_w) + + def preprocess( + self, + images: ImageInput, + videos: VideoInput = None, + do_resize: bool = True, + size: Optional[Union[int, List[int]]] = None, + resample: PILImageResampling = None, + do_rescale: bool = True, + rescale_factor: float = 1 / 255, + do_normalize: bool = True, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_convert_rgb: bool = False, + return_tensors: Optional[Union[str, TensorType]] = None, + data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, + input_data_format: Optional[Union[str, ChannelDimension]] = None, + predetermined_grid_thw=None, + ): + """ + Args: + images (`ImageInput`): + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. + videos (`VideoInput`): + Video to preprocess. Expects a single or batch of videos with pixel values ranging from 0 to 255. If + passing in videos with pixel values between 0 and 1, set `do_rescale=False`. + do_resize (`bool`, *optional*, defaults to `self.do_resize`): + Whether to resize the image. + size (`Dict[str, int]`, *optional*, defaults to `self.size`): + Size of the image after resizing. Shortest edge of the image is resized to size["shortest_edge"], with + the longest edge resized to keep the input aspect ratio. + resample (`int`, *optional*, defaults to `self.resample`): + Resampling filter to use if resizing the image. This can be one of the enum `PILImageResampling`. Only + has an effect if `do_resize` is set to `True`. + do_rescale (`bool`, *optional*, defaults to `self.do_rescale`): + Whether to rescale the image. + rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`): + Rescale factor to rescale the image by if `do_rescale` is set to `True`. + do_normalize (`bool`, *optional*, defaults to `self.do_normalize`): + Whether to normalize the image. + image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`): + Image mean to use for normalization. Only has an effect if `do_normalize` is set to `True`. + image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`): + Image standard deviation to use for normalization. Only has an effect if `do_normalize` is set to + `True`. + do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`): + Whether to convert the image to RGB. + return_tensors (`str` or `TensorType`, *optional*): + The type of tensors to return. Can be one of: + - Unset: Return a list of `np.ndarray`. + - `TensorType.PADDLE` or `'pt'`: Return a batch of type `torch.Tensor`. + - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`. + data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`): + The channel dimension format for the output image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - Unset: Use the channel dimension format of the input image. + input_data_format (`ChannelDimension` or `str`, *optional*): + The channel dimension format for the input image. If unset, the channel dimension format is inferred + from the input image. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. + + """ + do_resize = do_resize if do_resize is not None else self.do_resize + size = size if size is not None else self.size + resample = resample if resample is not None else self.resample + do_rescale = do_rescale if do_rescale is not None else self.do_rescale + rescale_factor = rescale_factor if rescale_factor is not None else self.rescale_factor + do_normalize = do_normalize if do_normalize is not None else self.do_normalize + image_mean = image_mean if image_mean is not None else self.image_mean + image_std = image_std if image_std is not None else self.image_std + do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb + + if images is not None: + images = make_batched_images(images) + if videos is not None: + videos = make_batched_videos(videos) + + if images is not None and not valid_images(images): + raise ValueError("Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " "paddle.Tensor.") + + if images is not None: + pixel_values, vision_grid_thws = [], [] + for img_idx, image in enumerate(images): + if predetermined_grid_thw is not None: + predetermined_grid_thw_one = [predetermined_grid_thw[img_idx]] + else: + predetermined_grid_thw_one = None + patches, image_grid_thw = self._preprocess( + image, + do_resize=do_resize, + resample=resample, + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + data_format=data_format, + do_convert_rgb=do_convert_rgb, + input_data_format=input_data_format, + predetermined_grid_thw=predetermined_grid_thw_one, + ) + pixel_values.extend(patches) + vision_grid_thws.append(image_grid_thw) + pixel_values = np.array(pixel_values) + vision_grid_thws = np.array(vision_grid_thws) + data = { + "pixel_values": pixel_values, + "image_grid_thw": vision_grid_thws, + } + + if videos is not None: + pixel_values, vision_grid_thws = [], [] + for images in videos: + patches, video_grid_thw = self._preprocess( + images, + do_resize=do_resize, + resample=resample, + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + data_format=data_format, + do_convert_rgb=do_convert_rgb, + input_data_format=input_data_format, + predetermined_grid_thw=predetermined_grid_thw, + ) + pixel_values.extend(patches) + vision_grid_thws.append(video_grid_thw) + pixel_values = np.array(pixel_values) + vision_grid_thws = np.array(vision_grid_thws) + + data = { + "pixel_values_videos": pixel_values, + "video_grid_thw": vision_grid_thws, + } + + return BatchFeature(data=data, tensor_type=return_tensors) + + +def round_by_factor(number: int, factor: int) -> int: + """Returns the closest integer to 'number' that is divisible by 'factor'.""" + return round(number / factor) * factor + + +def ceil_by_factor(number: int, factor: int) -> int: + """Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'.""" + return math.ceil(number / factor) * factor + + +def floor_by_factor(number: int, factor: int) -> int: + """Returns the largest integer less than or equal to 'number' that is divisible by 'factor'.""" + return math.floor(number / factor) * factor + + +def smart_resize( + height: int, + width: int, + factor: int = IMAGE_FACTOR, + min_pixels: int = MIN_PIXELS, + max_pixels: int = MAX_PIXELS, +): + """ + Rescales the image so that the following conditions are met: + + 1. Both dimensions (height and width) are divisible by 'factor'. + + 2. The total number of pixels is within the range ['min_pixels', 'max_pixels']. + + 3. The aspect ratio of the image is maintained as closely as possible. + """ + if max(height, width) / min(height, width) > MAX_RATIO: + if height > width: + new_width = max(factor, round_by_factor(width, factor)) + new_height = floor_by_factor(new_width * MAX_RATIO, factor) + else: + new_height = max(factor, round_by_factor(height, factor)) + new_width = floor_by_factor(new_height * MAX_RATIO, factor) + + data_processor_logger.info( + f"absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(height, width) / min(height, width)},\ + resize to {max(new_height, new_width) / min(new_height, new_width)}" + ) + + height = new_height + width = new_width + + h_bar = max(factor, round_by_factor(height, factor)) + w_bar = max(factor, round_by_factor(width, factor)) + if h_bar * w_bar > max_pixels: + beta = math.sqrt((height * width) / max_pixels) + h_bar = floor_by_factor(height / beta, factor) + w_bar = floor_by_factor(width / beta, factor) + elif h_bar * w_bar < min_pixels: + beta = math.sqrt(min_pixels / (height * width)) + h_bar = ceil_by_factor(height * beta, factor) + w_bar = ceil_by_factor(width * beta, factor) + + if min_pixels > h_bar * w_bar or h_bar * w_bar > max_pixels: + raise ValueError(f"encounter invalid h_bar: {h_bar}, w_bar: {w_bar}") + + return h_bar, w_bar diff --git a/fastdeploy/input2/mm_processor/process.py b/fastdeploy/input2/mm_processor/process.py new file mode 100644 index 0000000000..ea2559a0fe --- /dev/null +++ b/fastdeploy/input2/mm_processor/process.py @@ -0,0 +1,512 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +""" process.py """ +import copy +import os +from collections import defaultdict +from typing import Any, Dict, List, Union + +import numpy as np +from paddleformers.transformers.image_utils import ChannelDimension +from PIL import Image + +from fastdeploy.entrypoints.chat_utils import parse_chat_messages +from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer +from fastdeploy.utils import data_processor_logger + +from .image_preprocessor.image_preprocessor_adaptive import AdaptiveImageProcessor +from .process_video import read_frames_decord, read_video_decord +from .utils.render_timestamp import render_frame_timestamp + +IDS_TYPE_FLAG = {"text": 0, "image": 1, "video": 2, "audio": 3} + + +def fancy_print(input_ids, tokenizer, image_patch_id=None): + """ + input_ids: input_ids + tokenizer: the tokenizer of models + """ + i = 0 + res = "" + text_ids = [] + real_image_token_len = 0 + while i < len(input_ids): + if input_ids[i] == image_patch_id: + if len(text_ids) > 0: + res += tokenizer.decode(text_ids) + text_ids = [] + + real_image_token_len += 1 + else: + if real_image_token_len != 0: + res += f"<|IMAGE@{real_image_token_len}|>" + real_image_token_len = 0 + + text_ids.append(input_ids[i]) + + i += 1 + if len(text_ids) > 0: + + res += tokenizer.decode(text_ids) + text_ids = [] + return res + + +class DataProcessor: + """ + Processes multimodal chat messages into model-ready inputs, + handling text, images, and videos with 3D positional embeddings. + """ + + CLS_TOKEN = "<|begin_of_sentence|>" + SEP_TOKEN = "<|end_of_sentence|>" + EOS_TOKEN = "" + IMG_START = "<|IMAGE_START|>" + IMG_END = "<|IMAGE_END|>" + VID_START = "<|VIDEO_START|>" + VID_END = "<|VIDEO_END|>" + + def __init__( + self, + tokenizer_name: str, + image_preprocessor_name: str, + spatial_conv_size: int = 2, + temporal_conv_size: int = 2, + image_min_pixels: int = 4 * 28 * 28, + image_max_pixels: int = 6177 * 28 * 28, + video_min_pixels: int = 299 * 28 * 28, + video_max_pixels: int = 1196 * 28 * 28, + video_target_frames: int = -1, + video_frames_sample: str = "leading", + video_max_frames: int = 180, + video_min_frames: int = 16, + video_fps: int = 2, + **kwargs, + ) -> None: + # Tokenizer and image preprocessor + self.model_name_or_path = tokenizer_name + self._load_tokenizer() + self.tokenizer.ignored_index = -100 + self.image_preprocessor = AdaptiveImageProcessor.from_pretrained(image_preprocessor_name) + + # Convolution sizes for patch aggregation + self.spatial_conv_size = spatial_conv_size + self.temporal_conv_size = temporal_conv_size + + # Pixel constraints + self.image_min_pixels = image_min_pixels + self.image_max_pixels = image_max_pixels + self.video_min_pixels = video_min_pixels + self.video_max_pixels = video_max_pixels + + # Video sampling parameters + self.target_frames = video_target_frames + self.frames_sample = video_frames_sample + self.max_frames = video_max_frames + self.min_frames = video_min_frames + self.fps = video_fps + + # Special tokens and IDs + self.cls_token = self.CLS_TOKEN + self.sep_token = self.SEP_TOKEN + self.eos_token = self.EOS_TOKEN + self.image_start = self.IMG_START + self.image_end = self.IMG_END + self.video_start = self.VID_START + self.video_end = self.VID_END + self.image_patch_id = self.tokenizer.convert_tokens_to_ids("<|IMAGE_PLACEHOLDER|>") + self.image_start_id = self.tokenizer.convert_tokens_to_ids(self.image_start) + self.video_start_id = self.tokenizer.convert_tokens_to_ids(self.video_start) + self.sep_token_id = self.tokenizer.convert_tokens_to_ids(self.sep_token) + self.eos_token_id = self.tokenizer.convert_tokens_to_ids(self.eos_token) + + self.token_type_mapping = self._build_token_type_mapping() + self.is_training = True + self.role_prefixes = { + "system": "", + "user": "User: ", + "bot": "Assistant: ", + "assistant": "Assistant: ", + } + + def _build_token_type_mapping(self) -> Dict[Any, int]: + mapping = defaultdict(lambda: IDS_TYPE_FLAG["text"]) + for token in ( + self.IMG_START, + self.IMG_END, + self.VID_START, + self.VID_END, + ): + mapping[token] = IDS_TYPE_FLAG["image"] + mapping[self.image_patch_id] = IDS_TYPE_FLAG["image"] + return mapping + + def train(self) -> None: + """Enable training mode (produces labels).""" + self.is_training = True + + def eval(self) -> None: + """Enable evaluation mode (doesn't produce labels).""" + self.is_training = False + + def text2ids(self, text, images=None, videos=None): + """ + Convert chat text into model inputs. + Returns a dict with input_ids, token_type_ids, position_ids, images, grid_thw, image_type_ids, labels. + """ + + outputs = { + "input_ids": [], + "token_type_ids": [], + "position_ids": [], + "images": [], + "grid_thw": [], + "image_type_ids": [], + "labels": [], + "cur_position": 0, + "pic_cnt": 0, + "video_cnt": 0, + } + + IMAGE_PLACEHOLDER = "<|image@placeholder|>" + VIDEO_PLACEHOLDER = "<|video@placeholder|>" + IMAGE_PLACEHOLDER_LEN = len(IMAGE_PLACEHOLDER) + VIDEO_PLACEHOLDER_LEN = len(VIDEO_PLACEHOLDER) + st, image_idx, video_idx = 0, 0, 0 + while st < len(text): + image_pos = text.find(IMAGE_PLACEHOLDER, st) + image_pos = len(text) if image_pos == -1 else image_pos + video_pos = text.find(VIDEO_PLACEHOLDER, st) + video_pos = len(text) if video_pos == -1 else video_pos + ed = min(image_pos, video_pos) + + self._add_text(text[st:ed], outputs) + if ed == len(text): + break + + if ed == image_pos: + self._add_image(images[image_idx], outputs) + image_idx += 1 + st = ed + IMAGE_PLACEHOLDER_LEN + else: + item = videos[video_idx] + if isinstance(item, dict): + frames = self._load_and_process_video(item["video"], item) + else: + frames = self._load_and_process_video(item, {}) + + self._add_video(frames, outputs) + video_idx += 1 + st = ed + VIDEO_PLACEHOLDER_LEN + + return outputs + + def request2ids( + self, request: Dict[str, Any], tgts: List[str] = None + ) -> Dict[str, Union[np.ndarray, List[np.ndarray], None]]: + """ + Convert chat messages into model inputs. + Returns a dict with input_ids, token_type_ids, position_ids, images, grid_thw, image_type_ids, labels. + """ + + outputs = { + "input_ids": [], + "token_type_ids": [], + "position_ids": [], + "images": [], + "grid_thw": [], + "image_type_ids": [], + "labels": [], + "cur_position": 0, + "pic_cnt": 0, + "video_cnt": 0, + } + + messages = parse_chat_messages(request.get("messages")) + image_message_list = [] + for msg in messages: + role = msg.get("role") + assert role in self.role_prefixes, f"Unsupported role: {role}" + content_items = msg.get("content") + if not isinstance(content_items, list): + content_items = [content_items] + for item in content_items: + if isinstance(item, dict) and item.get("type") in [ + "image", + "video", + ]: + image_message_list.append(item) + + prompt_token_ids = self.apply_chat_template(request) + if len(prompt_token_ids) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") + image_start_index = 0 + image_message_index = 0 + for i in range(len(prompt_token_ids)): + if prompt_token_ids[i] in [ + self.image_start_id, + self.video_start_id, + ]: + self._add_text(prompt_token_ids[image_start_index : i + 1], outputs) + image_start_index = i + 1 + image_message = image_message_list[image_message_index] + if image_message["type"] == "image": + img = image_message.get("image") + if img is None: + continue + outputs["pic_cnt"] += 1 + self._add_image(img, outputs) + elif image_message["type"] == "video": + video_bytes = image_message.get("video") + if video_bytes is None: + continue + frames = self._load_and_process_video(video_bytes, image_message) + outputs["video_cnt"] += 1 + self._add_video(frames, outputs) + image_message_index += 1 + self._add_text(prompt_token_ids[image_start_index:], outputs) + + if self.is_training: + assert tgts, "training must give tgt !" + self._extract_labels(outputs, tgts) + return outputs + + def _add_special_token(self, token: Union[str, int], outputs: Dict) -> None: + token_id = token if isinstance(token, int) else self.tokenizer.convert_tokens_to_ids(token) + outputs["input_ids"].append(token_id) + outputs["token_type_ids"].append(self.token_type_mapping[token]) + pos = outputs["cur_position"] + outputs["position_ids"].append([pos] * 3) + outputs["cur_position"] += 1 + + def _add_text(self, tokens, outputs: Dict) -> None: + if isinstance(tokens, str): + tokens = self.tokenizer.encode(tokens, add_special_tokens=False)["input_ids"] + outputs["input_ids"].extend(tokens) + outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]] * len(tokens)) + + start = outputs["cur_position"] + for i in range(len(tokens)): + outputs["position_ids"].append([start + i] * 3) + outputs["cur_position"] += len(tokens) + + def _add_image(self, img, outputs: Dict) -> None: + patches_h, patches_w = self.image_preprocessor.get_smarted_resize( + img.height, + img.width, + min_pixels=self.image_min_pixels, + max_pixels=self.image_max_pixels, + )[1] + num_tokens = (patches_h * patches_w) // (self.spatial_conv_size**2) + + outputs["input_ids"].extend([self.image_patch_id] * num_tokens) + outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]] * num_tokens) + + pos_ids = self._compute_3d_positions(1, patches_h, patches_w, outputs["cur_position"]) + outputs["position_ids"].extend(pos_ids) + outputs["cur_position"] = np.max(pos_ids) + 1 + + # Preprocess pixels + ret = self.image_preprocessor.preprocess( + images=[img.convert("RGB")], + do_normalize=False, + do_rescale=False, + predetermined_grid_thw=np.array([[patches_h, patches_w]]), + do_convert_rgb=True, + input_data_format=ChannelDimension.LAST, + ) + outputs["images"].append(ret["pixel_values"]) + outputs["grid_thw"].append(ret["image_grid_thw"]) + outputs["image_type_ids"].append(0) + + def _add_video(self, frames, outputs: Dict) -> None: + patches_h, patches_w = self.image_preprocessor.get_smarted_resize( + frames[0].height, + frames[0].width, + min_pixels=self.video_min_pixels, + max_pixels=self.video_max_pixels, + )[1] + num_frames = len(frames) + num_tokens = (num_frames * patches_h * patches_w) // (self.spatial_conv_size**2 * self.temporal_conv_size) + + pixel_stack = np.stack([np.array(f.convert("RGB")) for f in frames], axis=0) + ret = self.image_preprocessor.preprocess( + images=None, + videos=pixel_stack, + do_normalize=False, + do_rescale=False, + predetermined_grid_thw=np.array([[patches_h, patches_w]] * num_frames), + do_convert_rgb=True, + input_data_format=ChannelDimension.LAST, + ) + outputs["images"].append(ret["pixel_values_videos"]) + outputs["grid_thw"].append(ret["video_grid_thw"]) + outputs["image_type_ids"].extend([1] * num_frames) + + outputs["input_ids"].extend([self.image_patch_id] * num_tokens) + outputs["token_type_ids"].extend([IDS_TYPE_FLAG["video"]] * num_tokens) + + pos_ids = self._compute_3d_positions(num_frames, patches_h, patches_w, outputs["cur_position"]) + outputs["position_ids"].extend(pos_ids) + outputs["cur_position"] = np.max(pos_ids) + 1 + + def _extract_labels(self, outputs: Dict, tgts: List[str]) -> None: + input_ids = copy.deepcopy(outputs["input_ids"]) + labels = [self.tokenizer.ignored_index] * len(input_ids) + + tgt_count = input_ids.count(self.sep_token_id) + assert tgt_count == len(tgts), f"len(tgts) != len(src) {len(tgts)} vs {tgt_count}" + + tgt_index = 0 + for i, token_id in enumerate(input_ids): + if token_id == self.sep_token_id: + labels_token = self.tokenizer.tokenize(tgts[tgt_index]) + labels_token_id = self.tokenizer.convert_tokens_to_ids(labels_token) + labels[i - len(labels_token_id) : i] = labels_token_id + labels[i] = self.eos_token_id # + tgt_index += 1 + + outputs["labels"] = labels + + def _load_and_process_video(self, url: str, item: Dict) -> List[Image.Image]: + reader, meta, path = read_video_decord(url, save_to_disk=False) + + video_frame_args = dict() + video_frame_args["fps"] = item.get("fps", self.fps) + video_frame_args["min_frames"] = item.get("min_frames", self.min_frames) + video_frame_args["max_frames"] = item.get("max_frames", self.max_frames) + video_frame_args["target_frames"] = item.get("target_frames", self.target_frames) + video_frame_args["frames_sample"] = item.get("frames_sample", self.frames_sample) + + video_frame_args = self._set_video_frame_args(video_frame_args, meta) + + frames_data, _, timestamps = read_frames_decord( + path, + reader, + meta, + target_frames=video_frame_args["target_frames"], + target_fps=video_frame_args["fps"], + frames_sample=video_frame_args["frames_sample"], + save_to_disk=False, + ) + + frames: List[Image.Image] = [] + for img_array, ts in zip(frames_data, timestamps): + frames.append(render_frame_timestamp(img_array, ts)) + # Ensure even number of frames for temporal conv + if len(frames) % 2 != 0: + frames.append(copy.deepcopy(frames[-1])) + return frames + + def _set_video_frame_args(self, video_frame_args, video_meta): + """ + 根据已知参数和优先级,设定最终的抽帧参数 + """ + # 优先级:video_target_frames > (video_min_frames, video_max_frames) > video_fps + if video_frame_args["target_frames"] > 0: + if video_frame_args["fps"] >= 0: + raise ValueError("fps must be negative if target_frames is given") + if ( + video_frame_args["min_frames"] > 0 + and video_frame_args["target_frames"] < video_frame_args["min_frames"] + ): + raise ValueError("target_frames must be larger than min_frames") + if ( + video_frame_args["max_frames"] > 0 + and video_frame_args["target_frames"] > video_frame_args["max_frames"] + ): + raise ValueError("target_frames must be smaller than max_frames") + else: + if video_frame_args["fps"] < 0: + raise ValueError("Must provide either positive target_fps or positive target_frames.") + # 先计算在video_fps下抽到的帧数 + frames_to_extract = int(video_meta["duration"] * video_frame_args["fps"]) + # 判断是否在目标区间内,如果不是,则取target_frames为上界或下界 + if ( + video_frame_args["min_frames"] > 0 + and video_frame_args["max_frames"] > 0 + and video_frame_args["min_frames"] > video_frame_args["max_frames"] + ): + raise ValueError("min_frames must be smaller than max_frames") + if video_frame_args["min_frames"] > 0 and frames_to_extract < video_frame_args["min_frames"]: + video_frame_args["target_frames"] = video_frame_args["min_frames"] + video_frame_args["fps"] = -1 + if video_frame_args["max_frames"] > 0 and frames_to_extract > video_frame_args["max_frames"]: + video_frame_args["target_frames"] = video_frame_args["max_frames"] + video_frame_args["fps"] = -1 + + return video_frame_args + + def _compute_3d_positions(self, t: int, h: int, w: int, start_idx: int) -> List[List[int]]: + # Downsample time if needed + t_eff = t // self.temporal_conv_size if t != 1 else 1 + gh, gw = h // self.spatial_conv_size, w // self.spatial_conv_size + time_idx = np.repeat(np.arange(t_eff), gh * gw) + h_idx = np.tile(np.repeat(np.arange(gh), gw), t_eff) + w_idx = np.tile(np.arange(gw), t_eff * gh) + + coords = list(zip(time_idx, h_idx, w_idx)) + return [[start_idx + ti, start_idx + hi, start_idx + wi] for ti, hi, wi in coords] + + def _load_tokenizer(self): + """ + load tokenizer + + Returns: + tokenizer (AutoTokenizer) + """ + vocab_file_names = [ + "tokenizer.model", + "spm.model", + "ernie_token_100k.model", + ] + for i in range(len(vocab_file_names)): + if os.path.exists(os.path.join(self.model_name_or_path, vocab_file_names[i])): + ErnieBotTokenizer.resource_files_names["vocab_file"] = vocab_file_names[i] + break + self.tokenizer = ErnieBotTokenizer.from_pretrained(self.model_name_or_path) + + def apply_chat_template(self, request): + """ + Convert multi-turn messages into ID sequences. + + Args: + messages: Either a request dict containing 'messages' field, + or a list of message dicts directly + + Returns: + List of token IDs as strings (converted from token objects) + """ + if self.tokenizer.chat_template is None: + raise ValueError("This model does not support chat_template.") + + prompt_token_str = ( + self.tokenizer.apply_chat_template( + request, + tokenize=False, + add_generation_prompt=request.get("add_generation_prompt", True), + ) + .replace("<|image@placeholder|>", "") + .replace("<|video@placeholder|>", "") + ) + tokens = self.tokenizer.tokenize(prompt_token_str) + token_ids = self.tokenizer.convert_tokens_to_ids(tokens) + data_processor_logger.info( + f"req_id:{request.get('request_id', ''), } tokens: {tokens}, token_ids: {token_ids}" + ) + return token_ids diff --git a/fastdeploy/input2/mm_processor/process_video.py b/fastdeploy/input2/mm_processor/process_video.py new file mode 100644 index 0000000000..91120096c7 --- /dev/null +++ b/fastdeploy/input2/mm_processor/process_video.py @@ -0,0 +1,205 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import io +import os +import random + +import numpy as np +from PIL import Image + +from fastdeploy.utils import data_processor_logger + +from .utils.io_utils import EXTRACTED_FRAME_DIR, get_filename +from .utils.video_utils import VideoReaderWrapper + + +def read_video_decord(video_path, save_to_disk): + """get reader and meta by decord""" + # video_path = get_downloadable(video_path, save_to_disk=save_to_disk) + if isinstance(video_path, VideoReaderWrapper): + video_reader = video_path + else: + if isinstance(video_path, bytes): + video_path = io.BytesIO(video_path) + video_reader = VideoReaderWrapper(video_path, num_threads=1) + vlen = len(video_reader) + fps = video_reader.get_avg_fps() + duration = vlen / float(fps) + + video_meta = {"fps": fps, "duration": duration, "num_of_frame": vlen} + + return video_reader, video_meta, video_path + + +def get_frame_indices( + vlen, + target_frames=-1, + target_fps=-1, + frames_sample="middle", + fix_start=None, + input_fps=-1, +): + """ + 取出对应的frame index + """ + assert frames_sample in ["rand", "middle", "leading"] + if target_frames > 0: + assert target_fps <= 0, "target_fps must be negative if target_frames is given." + if target_frames > vlen: + acc_samples = vlen + data_processor_logger.info( + f"target_frames={target_frames} is larger than video length {vlen}, " + f"will sample {acc_samples} frames." + ) + else: + acc_samples = target_frames + data_processor_logger.debug(f"sampling at target_frames={target_frames}, frames_sample={frames_sample}") + + # split the video into `acc_samples` intervals, and sample from each interval. + intervals = np.linspace(start=0, stop=vlen, num=acc_samples + 1).astype(int) + ranges = [] + for idx, interv in enumerate(intervals[:-1]): + ranges.append((interv, intervals[idx + 1] - 1)) + if frames_sample == "rand": + try: + frame_indices = [random.choice(range(x[0], x[1])) for x in ranges] + except Exception: + frame_indices = np.random.permutation(vlen)[:acc_samples] + frame_indices.sort() + frame_indices = list(frame_indices) + elif fix_start is not None: + frame_indices = [x[0] + fix_start for x in ranges] + elif frames_sample == "leading": + frame_indices = [x[0] for x in ranges] + elif frames_sample == "middle": + frame_indices = [(x[0] + x[1]) // 2 for x in ranges] + else: + raise NotImplementedError + + elif target_fps > 0: + assert target_frames <= 0, "target_frames must be negative if target_fps is given." + assert input_fps > 0, "input_fps must be provided if target_fps is given." + data_processor_logger.info(f"sampling at fps={target_fps}, frames_sample={frames_sample}") + duration = float(vlen) / input_fps + delta = 1 / target_fps # gap between frames, this is also the clip length each frame represents + if frames_sample == "middle": + frame_seconds = np.arange(0 + delta / 2, duration + delta / 2, delta) + elif frames_sample == "leading": + frame_seconds = np.arange(0, duration, delta) + if frames_sample == "rand": + frame_seconds = np.arange(0 + delta / 2, duration + delta / 2, delta) + rand_offset = np.random.rand(*(frame_seconds.shape)) - 0.5 + frame_seconds += rand_offset * delta + frame_indices = np.around(frame_seconds * input_fps).astype(int) + frame_indices = [e for e in frame_indices if e < vlen] + + else: + raise ValueError("Must provide either positive target_fps or positive target_frames.") + + return frame_indices + + +def read_frames_decord( + video_path, + video_reader, + video_meta, + target_frames=-1, + target_fps=-1, + frames_sample="middle", + fix_start=None, + save_to_disk=False, + cache_dir=EXTRACTED_FRAME_DIR, + frame_indices=None, + tol=10, +): + """get frames by decord""" + + if frame_indices is None: + frame_indices = get_frame_indices( + video_meta["num_of_frame"], + target_frames=target_frames, + target_fps=target_fps, + frames_sample=frames_sample, + fix_start=fix_start, + input_fps=video_meta["fps"], + ) + + frames = [] + for frame_indice_index in range(0, len(frame_indices)): + frame_indice = frame_indices[frame_indice_index] + try: + frames.append(video_reader[frame_indice].asnumpy()) # (T, H, W, C) + except Exception as e: + data_processor_logger.debug(f"encounter error when get frame: {frame_indice}, error: {e}") + previous_counter = 1 + later_counter = 1 + previous_after_flag = True + if frame_indice == 0 or frame_indice == len(video_reader) - 1: + cur_tol = tol * 2 + else: + cur_tol = tol + while previous_counter < cur_tol or later_counter < cur_tol: + if previous_after_flag: + if frame_indice - previous_counter < 0: + previous_counter += 1 + previous_after_flag = not previous_after_flag + continue + try: + frames.append(video_reader[frame_indice - previous_counter].asnumpy()) + data_processor_logger.info( + f"replace {frame_indice}-th frame with {frame_indice-previous_counter}-th frame" + ) + frame_indices[frame_indice_index] = frame_indice - previous_counter + break + except Exception as e: + previous_counter += 1 + data_processor_logger.info(f"error: {e}") + else: + if frame_indice + later_counter >= len(video_reader): + later_counter += 1 + previous_after_flag = not previous_after_flag + continue + try: + frames.append(video_reader[frame_indice + later_counter].asnumpy()) + data_processor_logger.info( + f"replace {frame_indice}-th frame with {frame_indice+later_counter}-th frame" + ) + frame_indices[frame_indice_index] = frame_indice + later_counter + break + except Exception: + later_counter += 1 + previous_after_flag = not previous_after_flag + + frames = np.stack(frames, axis=0) + assert len(frames) == len(frame_indices), f"len(frames): {len(frames)} != len(frame_indices): {len(frame_indices)}" + + ret = [] + + url_sha1 = get_filename() + for idx, frame in enumerate(frames): + tmp = Image.fromarray(frame, "RGB") + if save_to_disk: + save_path = os.path.join(cache_dir, f"{url_sha1}", f"{idx}.png") + if not os.path.exists(os.path.dirname(save_path)): + os.makedirs(os.path.dirname(save_path)) + tmp.save(save_path) + tmp = save_path + ret.append(tmp) + + time_stamps = [frame_idx * video_meta["duration"] / video_meta["num_of_frame"] for frame_idx in frame_indices] + + return ret, frame_indices, time_stamps diff --git a/fastdeploy/input/mm_processor/tokenizer/__init__.py b/fastdeploy/input2/mm_processor/tokenizer/__init__.py similarity index 100% rename from fastdeploy/input/mm_processor/tokenizer/__init__.py rename to fastdeploy/input2/mm_processor/tokenizer/__init__.py diff --git a/fastdeploy/input/mm_processor/tokenizer/tokenizer_vl.py b/fastdeploy/input2/mm_processor/tokenizer/tokenizer_vl.py similarity index 100% rename from fastdeploy/input/mm_processor/tokenizer/tokenizer_vl.py rename to fastdeploy/input2/mm_processor/tokenizer/tokenizer_vl.py diff --git a/fastdeploy/input2/mm_processor/utils/Roboto-Regular.ttf b/fastdeploy/input2/mm_processor/utils/Roboto-Regular.ttf new file mode 100644 index 0000000000000000000000000000000000000000..7e3bb2f8ce7ae5b69e9f32c1481a06f16ebcfe71 GIT binary patch literal 146004 zcmb@v2VfL8(?2{ad+uON$H1LAY>MfWC3h031oSW{sOP-O*@UYa+3X zh(|PQ->#F#SC?)Obx$XXINhvM=Z4Rg-K;X~U-`jO6UuosEl$l6d1-Zm@3U3hJ z!)#QOh+V*akGR3H!_EZ9eof?goX9dgZp+e`D`8HtMx;G_fhYUa6b@s6(D@gNDY%Zd=Yu5EUFoLlX&~^;&DO`-s;9~S1orU{8E=C{YV)Q8~+5dvRg!?rv zMwh@ZM%QpLx`~U?ZTbc7U3v!h1ujM=NU1^Mad?6@pus6^3hL#o!i)ObJ$+ zg#xe4s=%$rs==+z>cDNl8pCbIn!|0y+Q99=I=~G_|75H?12m%v@l!2v-6Toq>oxGLTgpThlI zdxi1Bb$B(?I;_U0)}b{_>yYX_tb?n6Y8_PFv<|FZ z);gei0c-y%rnO%s)7rPvr`A4|O>6IprZv8TX^pFpWbIXGogS~~^zuyzbKtsR1s ztnEWgYrBxi*0#k>Ya4s0wY5FT+RAQPTiUx=Tj0KVG1J=2W?GxtMp&B!nbyWZBdm?A zrnO0;Y(^|iPFo<0~F@GXQ5e_O2E{=iUNW^xX!0uL*0QoQT9~MXLRtZX`CrT zLkmEvokNWpY1KH#v@b0f>s8kv50wCYz>nU6Zcm2JJ4NT|bGilIy<|CAE*8v6unMdO z3uB#FPu7>c!zQtXYzf=U9P9ym#!Mc_tMf>HoWIXMdnR9+<_p*&G{`Lq1p z;6|>ZRJ2~v$wfC6eN^-bI6yXUo4+l$Ex*lX3kC-&*=o8tFv_;ow!@YL4ji!^e}x0x zTpY-ijRW!E00RdYIPe@CfFPK^n>W~F^Idk^JOkOd1h@$J67V_TQ^3c7bAb2Fe5QwK zy!`X!UG9bZmM~L$ziiCvz4-3Mw=X_?ap1+S7du~Ud9nUQ0$5v+v=qMZMj3xSfBF0w z(etB#RKQNa=I5K>t_Cay%tfk3fH2@ah;F{PdE(}fo2zf~n=5awx;En4$ZJDk(+#>d z@LK!i>@kxGZeemX1;h=Y_S4#B29Dh`V@Xr!a! zm^dy@h?8_yoQC~z4wlGy^!yL#L;46d-6!H*Wz`8&d_Z4`59w?4-;c#7uu(41Mfyg3 zE=hbr-@>B*4w~(2`d(as&G!TSC@zZ6=(6}md@H^a-@}smLHr0?@QS!Bexf_D|1#)T zafR+d$375O=^;I$$Mi&8qu<2O;yV2 z%tP6Wa-9@Xviz(7E655-Lt3P#^pI;=8CI5+W93-|Wg|g%RZ^BCY$tD471m>QRs&jh zlk}0ka*13j{bUZgS#FVQSZmfsu9B-^U-`>ia+zGty0ES+LKc>RvWP4ytxa^VJrSn9ZN3&JrE(s_DNm~qvGdy=nl!G(KKh_Tm& z`(UoL#)bQ4!~Iw>InEWHgR04fF1%nCZBP)tvYIm-_PVz$%x7I&ruhihYE`gE<7)l z6k}X?J}N3ATzGyeCfqa^Ae+eN3NJ{-`6CxzhzfIeS%DP7UHr$~qbMwNCp{q)0KMeI zOOP-0loKyW-h6-yFGcxyxC<{$#V`+WrU|72ypRholht0VEEU2$!P<%oYx-5ON%rGBXA zztt^^8~^y!>a*{ZlQnsQ9N8*=sczTpmJgAs-{q zHnHIP-zDzvA+~H%_@>w_8r*e$Q>yq3hPfJsTNi} z)ogSaVuw&4txHC^o^@-Ka=PvNk~9C2WTU38bT;Y)tPk=UtNUVa#Pvt6w%2lqN62X8 zue8(%;EEoVwiC6W*6`c8%Fmv&s#lz*Y9!n;NYzC@Jrdzcrs-?0&VZ%D?2s||aG{u1f$xP zYq{44Ubnmpde`xu?S0HAk58men$I`B(l^w1u@A&)sxA1?@Va-9yy6VNR)=e(A@F?o~n`R41CZ*V>{f0_K-@;@zLDPS*fyuhagsbFxy=z>QJepV<) zp|XXV6&hA(R-to+zAW@}p~rzTFg$Q`;Hkm|3U?^Hw@B+Edy3pBTBGRvqQ6)RS<6{t ztaGfptT${GZR>)9gIWcx4oWZ9qS&TlSL~(i?d+@VY4&gIKNY9qfyG-DA5%QF_!q^W z2m1#X4=x)V5j;A0VeppV%fUB;pNE8oObba2`7-2QiDD&2mq;y{tK`IzmrMDS8eQs# z(q&5bDSf8&?a*4GiJ{laR4g;G%x7g@maSHHR@wKo|RTsx==Z|a@)$&EAOs+sY?DTU8>Bj@T5Q>XWOdR=-oDV2uVf`qr3MGk?wcHAmK5RrAAI9<`d*npo?@+9hiDt^IBt|2hNf z9H{f8Zo#@k>YlA9>P6Q(RPSN^()Hh}|8ZEkumufdg9#1(XgH$bca2InifOc}(Tm2> zO{mG%ranzaH9g$)&t@f?HElM$+016^npbH)qxt6MCz}7&qE3s=E#7N!v!$(Nx0XX% zPH8#6<))TDwF+$2rPZQVTU#A!b-Q(e)?Hg~Y~$HxXq%~RmbZDo&C|AiZEbDi+wN%l zW4k)-*0j6czI6M#?c2AX)&6w*uiM{h|5pd!4&6HJ=z zxL^4A@Ee`OI&bWJw@a-qGrN4zHCNY;U5|D>-}S4mPyhLf;1S*tfe~dQnnuJ#OpI6_ zu`l9S#QPDCBLgDqL`Fo$M~;eI5Y;fMPt?q)^-(*b_C=*dor(G=>g%Ws1?H1Cl zeYc6-9Nms|d$-%i-7a*y-0fDk``!MGX3^f!xuXk52S>M#9vOWu`g!;8?lIldy1(qv zzQ>v#cY2oU*}Lban4&TLVm^%Ju`Od`Vu#1hie2A}_bS}0U9Xs4bK?reb&5-kyAq!( zzES+x`04SR;*Z9kkAKm-X7A;_|LoJL&(l6n`#$Jbt>3_Y3;TW2zi9u3{df2OYe3xr zy$8HMkPR$2u;;+J1CI~9J*d>6HiO0wT0ZFE;EIE14o(~Va!Ao3ZHM$9vUGJMg4 z`fGIe(X&S%9sOubnK9kREF5!U%+s-z#?~7#QGCwPyFhw3UAGR>-)FsyglyiPu_m=PQ`cLdT04N&n7jU)OFILNv9_F zn4C5x$CL(BMo!r}<(sLwrgoZ|I`!wNFQ%287C&vyG{>}i(<@Jpo<3{(h3S{4-43-fEw zpFMxk{O1co7K~i5W5Kh9r5A#|3x_W}x=1W)w5Z>rZHo>s`e@PRMGqFs#RV3ZTijrA z_~O2cCoEpDc=O@|i{D@T!{U2O*pfU;N-U|hr0tT}C8L(iTC#S@-X&+2Tv&2z$)8Jo zm)e$ATiRl2^wObArz~B*bm!6|OFvn9b?KvJ#yZq&fTq}yNsJ5cTis%(XS4>&4Y{kwMM^}8Z;>wCgD~*)}Rt{eI z!^(TBXjSf2!K-SlYPqWWs$r|9uUfe(an*@cU#z;m>i5-Ns|&9#zq-NdPOJN@9#psvHh%5cwR6{QT${S~?AmYF-dX!%UCwpI)>U2CVqN!j!`4k*w_;u5x)bZZSa*Hh zAM3r>7hYd}eS`Jk>-(;sxPIaKt?LhO@Yv96!{`mOH>};TXTzBd7dG79@N}c!M%%_p z8=Gv5*f?FH*_&9==|HaFWGwRzCy$(xsM-o80)i`SOITgq)|uqAv;pDp9IBy3r~<^EQ- zHTTw#tu?o{*&4HTs-rII@+qG?vw|i_axIK7#_3bUTM{ggtefsvVx8K_S=MJA8MR!!((P&4P9RqfZ z*^#hg!;X|4=XPA$@$-&5J09(Lx|8qp-Wjm7(9U8zOYf|-v)0arJ6rDTv2(=E89P_) zOxk&3=a)Ob-+684FFPOZ{A-urF59lkyPEEb+%box1zn?oW4LOe~OSODvgKC$U{(*TkNQ zBNHbkE=b&zxIgiH;`fOeNtBc)sYFt(q=rc?le#4JPa2*yJ!xf9a?+Wk3rV+=o+kSx z=S?n>Trs&xa);#Z$-|PTC9g={ot%<7D8h2-1G_xJEUdH0msQ+H33J#F^H>>0Ud z)}DoXw(dE+=i@zB_B`5a?9IP7bZ`B=o%Z(KJ9O{Zy>s_&+`E78`Muxoy}Q@k7qBm6 zU#)#@_r>lTy>HIG#rwAJJF@SSeOLB9c32#R9AzC19N~_Bj){&{jsuPl96vf9q{x&4 zDP>Z^Qo>V)rz}X>l5!~J!;~LW?x(WUys0Hq>!h|zjY}Pqnvl9a^=RrhslTK?-|xS_ z`2HIETkr3=f5iTo`&aMZv;Xw|3;S>He|8|pfzSiZ4n!Rod|>i{r3ZE#IC9{V13w*j zbkK6J(7|#C8ypNj*!SRsgYyq=I=KJf`GemdynE0*l>1PLL$wdJJrsLr)S;P&wjMfk z=)*%l9=d;+AI^KY9HKg ziXE$VtmUyD$3`5Rd2G$GJ;zQTyKpT1*z@E5$BQ4YalG~MnB$|4&pN*5_}=619{=X} zo#QW01e^#tQS(Hr6FpCiJTdFU+7tUuym#W;6W34NKk@9O$I09$ttUfIRz6w#WTTU< zPIfxk?PT1^fhR|voOp8D$+;(&o?Lr!-^q7RUObt8^1;bJPx+q8bE?Ftnx`6^YH_N= zsmN1KHcW@>eHJ~?>T+?^jD`ZoxXMYuQR@93Y-Z& zQ}0a2Gd<1>KQsNz$}@>)4xM@T%r{{zSFc*RV%E$V)2B_HGI`QFZ@)Eh!uWAx$BZ5| za>VdqLx&6=G;l!wetrA&j*sgV8`HB#_vmg>kr7?HbPn&-p?$lyZCbZ#*`j%~rX{WZ z-d<%`ijQ|Ad!ydoWy?^Cw+~RCvSpaV(#YYV10C%`ZFq$k5!9+=? zLk2fd5F4Km=Z=U(7I+HJB8yg??5#RPMcA4o#OS9G68_4aGroq(QOCJ32X7P};bH~&(0SYZwdfY^XuHip?GqB>Q%D3Oo3OwXrV*pj+(^_U((dRLY7eqU^hQ=G zSY8bZk7)$V*Nrh-Gn8OU^QT^bxUTjz=E9;P9JZJ~kkXSZrWIkUhv28JT9DDXdc4gOC@ehADP7yF*GCo82iY z0);wkv%@1&Ipd9D@M5+&;v&*)&O}GT@SZ3xjXT5qb-I)gRS;r0Vx%)BOvfNhBFxhnHrbiFq${;2V*FLi5p1|1 zunsgk|G*_QWSF-R<{9P{=8GK|XbTmTijboW+m{&a_ho)8Fa>#{1_(}LlT*CH0zoZu zat0m6>n1Oz!X{^ifpV2BTSh3-S@F(UHRv1_vEP@Fzjl$Lq53OZrb!BK8*0xI>kbiU zz$PiIZD6MoE_~!rnX39+{FSPr|S!$i|)$ zYYB-7O^9$PAycAd_pb#(6~Qgy>`h}-3Un0ww?I%_7h8;@S4=2Sn}5>;MBCzGRcgwY z!bNZjlfkSW)dQz2z7B7D?}iQ^d&A6_`c&T;V^J|4_J$6Y&&lT|_9nIh{S)Hsy&!X8 z?IZdI_KA!|-i|PPtV7xx2Bt`Ai2hW7p%P6}sBI`}(+X18F0_3&bW2rzTS7u(TS}M= ziH(a@_l<+l(-T}#_Qs7>H+4VJ#FpR)i;asxiYAddaoI8;A)$#q))o(Sf$D-+o$PA! zL6j;bJSrl=H{Kp^2T#Mo5@J#9KwDg7U_xY^<}t_tX;ik1k)@AZ>WHge9~{>Q4mv`w z7<(^gfa;I0hxL6upbt`I53{#a#evwCifZ@?E$vO>kwQUiyhA|lf^6~F5ut>H+G|bm zkBN{)W+EHfRA-*xUt39vx;&p_g*v5+IF^?&xhs zaIS=a1Y6An=&9~du`(pCOAH1A>_BL0(@h-)nin-tv&d{GH98aupG9wiu^}~wW=$hIc6Y7cl z{{B>g=YLliL_xQv!%GvQv{8tsBJ7`w!X^mSKrK}P@VK)YEmc(vFdBV2At5dnHdXf= zs)zc9Qoa|tLds6c=HzO-CmUr8c<<<8y=ym?*k#w!sCMofAH~)Q?z{$>5qz^ zfkJc9oo1Sffv(g})`7psPs_P0T6YYUnR;$)Xe6+v3Sy9|sRCC|@^ST2-`BLhD|=^~ zdc7KH&&*5>^!Cgg)m;knh50K34U`yS^9Q?X>81-oO>m!3D}{N4xDr4j{ZY+Y2?;)S z=a6Do>i-B)7-@5oBB7mM4RE}TrcX%l^m}uR=j-YH^aHB%qm}(KeU*!=J9sy8_%u@X z3mVYjrSwoawC~&Jl(<1Ev^kM&IMhL?maDTm2gw48_h2+00mp}GWu_9k`PN5wX=iq? zhlWSIjY1Ts&Vd>JSfxJ)=?PL0s92|E&VimWG&Is_a=oo&W2){_Q$uaG{;;zeF<4s| zD1qCc4UpP1L@T)j7(D%BF*HLDDZLX}0Jc;|We>yVxBJ_$`?-#DB4l?;TqleHGB~16 zV9iL(B+|@Z3M+jD)^H5+fbayH%|92S5^MpOS~zB?t|(n`b{&RM+!EqSrD`xEG$Fy6 zPFdl;JfT%5FhR}HylVt{gQms9TzuR9H=w zvdW#3S?*Yrt0+it7o0|Y-&m{?Io3_N`d?QyjW8P9orK5`SJ?^C2~n6J2BlF^RlJKH zb)TcKQsbJSrHUXTF%NIoTz^dMu>){_+KfGKbL6iyR!*WJ#*frrKBYda5B1|GXeR%V zN{A6uSSCwSYS^n%82iWCP;R6j2}lC;0Q3Y@1*`$g0JH@R2gI`nG=rs3DVd9! z%XnHP%hN3E>uTjuo(3B=sEB+{yN#+e&IqI3avoq1?nlxD;~?$kW$CaSKvfL`;q7R* zMWsc!F@?q&zS4Mmgoj)RkLlsc1o^;Co}A zPMBCnl8>jFGKvPvcW9ohK;7ju)S(`A=O0jAgvH8!G@m`91?-M_Qs&V3IuA*6s%7U;HI#m2grLlJ{AN)vukiys#a=_1H45w+p=SsY@ zl{={$>eF9Vp`0v(mdSMLCL*b~r3FQcWuW;C^_D|ulBFfWlV~CK?N5`;>kEpT71oNS6LXnDp@+j?; z@6tTWyVOpm&^vMoHA7y}avd#~A5c$YE%paJqjho=tp)GxyfoR3xwHjomUwKY{vM4f z7UgX=W>F7*)Vyv?qCRpdMH`F2&&Sw{D9r=12E~c9lqeR^0!6plG(m0wG%_z)no}zaQK-?ICVDQRN}k)Xr*0b6GY*i$ z@S&|1NtHbsQ6S{8y`sxVqzZ;7wXu|?aN`o~GbYeVk6BoDI!z8s0OhjWMH+*)BEMxG zCFn3@H%bnsUf|;f5e>dypmLTV$|wCP0$*ILmz@BcsEl!+Hb{I~q{77aRMPki^*f3C zDC%b!PwOokQJ+7ksHGaxMF84Tyk$20c<`g2c^mhAE$G{p8+c}p`9kI6;YI5#WoU}* zYX0C+hSH3`(3iial90EY9=`OE#g|T4zM`eZ&$QCZpu-+tLNQ)926; z2WW|BMezFxEdk$#gQr8l=UTEfmG&r0RgCUrHICCtxq|u`!{863>G0=RDo}lS(!8wm zj-s{3OR6C6gZKMUCgcU}w_9eQ45gRq(FWBIy^}+th;zkhUx;L$~aFqR+ z%7`EuCiCEnMZBwb`y93p#~yFB;~Dyvcm?2F07}d$|qI{aNK4xQT+I<70^Lwda32fjy*fKzk8O6A-7KnFw&|u>awC34LklLRQOS+UYq4 zZF?7ZDC7g<5Bir?+9?$JD#&O~-Hmo=*L3K?�egE!tu*v~K}gWeK5h%eT<6pQ9}X zpua7m0meeK63uq{FLRa~t&-p_h{Go@P zuo`|G>e5_>QVto8ewr8Y{jj(7Z5l00KwsIYG2|yf9tZv1F_s*s79QM)pJ#-B6e@*z)tPCI#2a1@)mQbp8OooJ;0vcz7%WZ2aS(u z494OT@-2$>Xh;o)Cc~{|Ir+36f+64KU2K}59e6I(+(_5B9zu8XH*%IjSZKxlMb_ttZ@K9#G@18n@Xq^DjMqqaFs)IM^t)waro)a=VneL0@;18zDan z&}Qp^!}dXZtQ>~EJ)XMCZ>S$+iCJoq7rq5_%UA^^XWDjzEok(hUfQy;-owD(=gR>GX9wnjnNm#WT+o`3O`{j1vI zAAQ)s=(F>|uS2NETKJ#BPlcZW|4Z;QKYL_8g`WufG|wsA#bH~GXXj&dpT}g>Gf9t zx*p{&1tbDK1T6+&mcUq`$Ap74O^pw3(DW%9U#C-zEpAX_Ms`qqaYObxWWz0Lyi#>i zpvEy5=zjCSoM$O$uHlQ#O~!KbsZq%sWArhf^Ht_r9gjJl?jL9;C3}$J!%Ai?ug(3G zjHq@sFwawZL$|TA`<1N^`%K&Tu;caoMfGhpPr}?x={J;hL)rMc&%@U?zUq^hUnm(> zW$HZD+!N!2Dqq?0ne?f2NDul_U!hCc{JscRdI0n(+O&RBeO}q`iVoL&(=|6Nt@MFV zwm$j?`r!hlm-L(yw!TW^mgU!dw`?;tS66d*8|Ly#_rs=BGs&q#{)YKb`^<}DI)4YwZCJjEPJ@xuf28Z{Tu?eUr(boU{*UemTP=6(|3 zel9a_HD6OSxP3JjRl3AIhj9Ct6RCNN;=AI*-+auaz}tRm4yETgn7_O0`)1$#@Xg_G z(C4q!&;6Xce*f&7_hdaY?VsWm@6?zKe;e9*=Ii=1+db@W^WALsP9C{=<(_N*oBz7r zTm(Iexw(6e{!hMJw*KaCh7N~Z>p7>3|EkX@`E}2i)VO3A<^=_a6C&Dt0si^OlIGWL zc~bpCV0}dQ)j9LyhHo$KI|TYjQ3xT(|W&{xzVth$9S-LTAnwr$i8N0 z`NI4|$H9J6`d;f$t>0nO!@gIxex}}5a{#52UGoJ!_5j!8gtGIYztvnog}cWBgxNFm zS9Yu_12(+QSIrsJc=b7amA4v;RQ{NYXxm%Gt1y(IZE|J%E1O@<8`3bxg3YL(N4aV& zaDz_o>RXw81mm768~p-SBqmMXELmVG+qh}#a5gM%{D?tpSlzv4M@wwrSVeMoZc@=whRoj_vTlcDeA5tq_YZnoYdCQqy5h0((O& zIC};gEpUQKPV%Q*6hOHt59LLg{Pk~LvCbJ#}>hW*2 z$e!NgU(!qDt!j?dh0HSka|wZQl}5&~wsCQThq0#dgZlSnVevzT4raCbjEIe66%Y{1 zN^7^McJmDyGI}uc$GJy?n74MNa_OaZ|3u^ndNdRsWeij9t&xM{hSAlLBP&#-OK>aE z1-O;zGq_dg1GrV`UAWcgINa)VXyoW#Bgrvx^steXI7;QPP1nWAg(4a+s@(wX=F+ae zc5`Z1ood6tRpsKFcDPC&nTsd6HC|Y|7VUay*U+w~c9o=Y@*=FqK-QG+#PRJaLWGtv zNEq@A)UFR)buNnHzADR_xA2vTg0>XqqSq7tOtv8Nj^j&6Z?>E!5Wdr4E7(f5imhgA*xGC$gnq{G=c=uytP z6vQ)%cZ&1qdu$oQm-6h2`tBb&)JL7&c{8%GDY#?ki|#OvuUYcQWVwgWmAm9_oYIiQ zC(FIypgPweANZFCeYFTm%!`#94k`+%zKeU%z~gy;;(Rb4OrCrwA4gs|dt}n?uG$(jbR^$fGy%n1ru!2djKRKX-CUQ|tX|E__rrDo2CPv7oen7B!kS(1Sei{j=gX zD4UG-oXh88+}#DLa5j&tmGI>>PVJh_=WulflfXA>t@!k8u|@HvaZeu0V|Xtf$EWbA zd>Wt5XYiSP7E<|A2`WjURGF$$Q+ykR^>BRW7EYbBU0QW z6Enj&dV$3#X4sA5Mz9fLlt7GyU*zBL+dN$i5F;~FA{=>xGr}&4XO(WLMAa;0{BC3z zmyBJ;3S*uTWrQK8Jn|0YY@3`a`^he{kn|My#95IlCW|p*kch&m4MlO1gExPGvd-dk ziGKKUs}3*5{n=0K6PC(mvoUNC&ZUZAZCP=8N?+j2t+()97S19wCWxEZihH8tyJR5$+yXm9MA7hE7W6zr#(Gz2GMCGjMldEnB5T z-SGC$jWx@}nrC87HKyeO>!m0|G|(a0d9er~iC8T|?&3GNyW~8$d-&&YcSADrz^3pN z_jSzADn>liG0t3n0-h+Zs&KS;8JuiqL%+o{@)W|7MFkx{8Ez8)8163EmMZs8;O+)M zQt|dfS1-8B!&?p&7NWw0O{c}V4d@mAI4$7>?vtgG?>z#&5-AGl6soON-`M>c zCC5~Hyz^C*_JfB>lNdFuChY^-6yTDC^rK;#x z9M~m0!rjA9!gX`n(H;VaYrVZlbIMHwo*=D&Jag_h7v% zE8puXM1<*(?6R{EA&GK6+$8Y??k>DBP|qHKn^}9DN}*z|sTlD<$2fU-8F-?+qQY@* zR27VjC8?ORmGRaOw2AUMg&F~pxC8DktS+hiHJ9X%ke0;Ex+_h&N0icacejYD{~i&n zTf_={H#Cu3ijLzx8LPo6MW9ZhMt0q+@GebZuhVo?J%M+;it4^_lXw!`U9vaaJvb%=r#j4{)5=$3R8QI?xQdZV zu3wjU)uzc@_2xvpk5`oLfV)e^!QI1m!gbbD$^ISyDB$ST$W3wi_1u&sBx7!-a_a;) ziEn|s3$t96+g7;lTD+%1@TSQvf2xl6O5A3 zChnMi6i;=KpR$mZUFN>f!*m)of~tH3=tfN_nV3Emkm1e(3fsVKi%at7*dJiv6rNwO zl5khtB`)Kk4^%o;mP*@}KS%ss9qKBd{YZEDJl=-4=B;r4VM7eCCVR8U98=UKpo0 z7UfoM!}*QHaC&2L9?Wy#Ogw*{iwE%ByfZJsOY%~@w2>R9I+n#3*X4OdUYRcztwkHr zR_Wm?ox+8DgfGC1#5` zI7x0UPEwnXv(y%fMMe>^80X3@!?|k9#R{xzCNF->|;$2mMKZ!H#-H&*=rd#JP14WyW!`s6I0ar$tp@RdDiC4OWZQVGUVh z)|9nmZCDgviBq+*o#%zG>yPsj{3JicPxCYUUH%^Iw{uzNdu5&N#lPTR@e8o;zT=np zkNhWomH*6dV0M_!@9+$M54PVU{)GR||HPc}InMsVK~};L9ylA&2j_m}#F<}taMD)+ zQAiYqwP?favbYF=!diP&g~NK z;@r@);+!}y-WNZKE8?oSCVm#z#SL*&+!D9lr)P-_@vFEm9*W1}H|Z^XapqNFX_uvN z9^q=aMsAQ>U?<}=Q{AIIp%b6dQ+!c0kKJW=*e@)d-DbDgO?HD_XFva+`ZVVPrx&14 z^To~{t$H)JID0Yru(S7O?~QpfdofO4{fFNBrXE`XeYO&MZB<^4*WlGfOVR58+<*Uv z-Yeeuf9}8Q-_V0~pH^05+4?XqsC%)Y^q}tPO8@yn1ERk} z3+8&Ye`ogYtiJseW0p6@s_g4=fv`l1U~CJ~bI(#3t3oldl*I^D9wSploK{g8V_j8@ zb=9c`&ibf@F|Upu^VG>6Q!t`Ur+KuH7UNtyoK=Ujkq_Y1<3n_q(g2u}i zTl$W^Cw-C}T_&6xM_1_@{Y=;C2Hm7vbeq!Y7o6=@8E3gw$ElRHaV}*e)`Ydd`M<3( zql#oH>@YjdPT_>z6*x6-P1f0dd=uZyxA3ie8{f`%@SS`Y-<|!ezpQiq_?P@^%oM-n z-}4`Es^=AcjbGm~Z-{rsJ1l5Q9G5?MK!T;jV_zV6LC;M?Bg++J@Z=CCw184f> z#z}tpML`iLiU_L+!pt%l=lPYwd46R@c~MbR7F9)cQB%|wbwzyyG5e&Tt9WHpE}bIv&{YKG(T}jpX2udP8M~Y%ZC$2KNnx%j7^-zCobR= zzE@7*!|8kX!~^k2Jds|~2WQ?DlO<(2oOZWfZpIwBxbF8@_l2I`&S?%8qc~;b6~fcWRY4D~^*9Hw-7Y*BjV-#1N{c-3r<*sa+eAs&x~U ztMZ*%NadjJun?_7j4t^qJP-a6Cp#-!6gmpp-WzTWUDjEOfsb`Sb2-wJIUCSLVXywF zaQ&y&$SmZ8Ia&bDz)oe;*$g(5&BEM#4ohGQ*h2BU_ycFxaip-|Vo2))*B_U%^KyZ% z09+i>oJvxi`G@s2BHT2WE+SGBg18>fS(0<)X%fhWoa#HuEn4wwO$ z1wg$)4+*TsqkKZ?2%y}i4JCn>`CBVqdPS?8a~<+|i|m-)=s$gmx)XKY|EfImqBF=` zW8O4>apDNQ;|jfF{$~D~?MbyB`L{pwhC9f-?#8nDYHb+5JfIx&H{~iH*Aw$Tu!n%p z%-QA^9exaV*mtM@e9g1wZroos-*<)l2hV(Hp7;;Ze~U78?UDO^HwOIHE6LRAz5X*F zC@(9WGxW`_`Ph8)=9vFJ6m=Q*_tZ`*UJZA*jJv(uvF>p9{XhBU-PfOdHKpk~?e*2r z|3B{B<(l`v*^Jk7%;MqSM%@(94Ws&}L*7FpryOb*Mk2 z_#r}mMYzM&cOVd^z&JP8Uqhqdg`?Z*R5NxygByu zP}P%FYrxHtyZ}YN^AB|V%b$4*J^gaz{pr_x>ML=N z%?sv1^DFe08|GKe81q2(mNl&^)chg)Q<>=w{yp?xL(qc%%b)q7J9YM!Hq+gq*?sin zF;3diqxGM89&gDr|6U$PO|p$QW(^gk`UP~un9O7<>~8kE{|!fb=rZ8n&C18!`?B(K z^IP@5fBO4>)5(9YhjTo2_v?SRKirb=y6u3R{|DP4tAAu+|5W2QhI6cfC_7SFX3CCK z>x3M0s47^WtAVvaf2^l8!<*F>xC&u4r8QOvJ79&dFjfe=z|M=p6$Il6oV|G*?Uz$`D6=^GGNtJ0EW=d6QJ7!DOX$NLZHEAbi zO|@wkW=>6LH|F=9u=}Zp=tak|+BSgBV3lnsPBot_rsGue*&=~H!dlr<`V=#udpObj z0oL_D$4uxEeIcH}7XA{p_)EGdYhv~J5`L8+jtSXYj=)c@Opp^;fP7oN&G4n2oW%0r z_Zk+ly!fqxLo86H$uwq@N98dVBu~f_%r4K!_gHaxPM%{WSHW+rN$kyp!)W+0eYSj zz<$7Q^5n#N@UK(=>%#Z&CER^>p8~N`{1Ce!l$007%JE~In*AI54a=8G;ti}Zn*@1> z#9OiM+zPmocppf74E7}8eL7q<56cT#--K!VX1*CANgTTdF#q3=y$T2LMkohX1&?9n z`#AaoM}Ihn-3#Z@D?G3Q_!(B*Kj&ZI9sQU5OXTns{|YI;=6IWe6~PO@FY=4Pzv15i z|CWD?oWE0Vx3HS*=ddxPJAdyC(K zo6ghW-a&8VScABW^cg$@_^B}GZ#s&D7keLJ`A+qre$_QdLD1+wTK z?um8IO2Ab=_r!{472vA3dt&Xg8gSL;J+TT}1Gwt>o>&j91zh$2Tr8^`ymJ)EylrBdc?pA{9=4fs7dK|(>fmnNAkEfJ=$))v6 zF0Eg3LBBi!Rlng~wXdG{`{{YVpLi~wqZM9=7g!H?3GE}HeQJ`BweUVu%G!9tBxD`D z=ajOptP8vzc2`PSU)Bd6Cc}W^_)p*sWkcYNWFz2>Wn`WmDkIWHaE+@wK0n zEo2MeEwQ6q%2u)!@Yb?5@HVmy@V40HEAhKD?SQwJ*nuHCsyDVWT)o4^9)~V)BV+{J zDD3y;GFnE%?ScKjBhNN93qDRAByj{7=8_A81Uh8IPej21n`mAWx(VpISToY#_j_q$Ee*3axC^E zFgZ@`U%;>Ej0X-)jr0>Gc7MpX)LsUqwegz-@8Au-Md@zfljUUKQzhP*$?0-B+?jGF z+}UzA+yt2bcdnd^-2n6CJnXxeFXsbaAQu2%C>H`(d)0mQnoVB0U+%~L$^#Oo>&t`k zAUJkd;@vmKgQK9~m^=o0PGMhxMV`f1PF$W>Z@T3d@(VommHZ0s*Yazm{7!yH{_=bI zJ-$G~sDN*}f0RFh+RO4X6_h{8pHSizymu}ruga^yui@QvE{qx1u`}xi_DST2X3cRjfWdi9pU4ygiLLya8% zdgSo<+sIK=j~ss4Mh<~DKus||HNzEvF{B07-<0LzsYek{J&IWLD3VuOF}d}~;iE^6 zVtVB8ag7|xqRFc*8k-&=g7nznt49T2t?LVE{q6;e<|#D(GuSmnTv}e)HC|fFduzKU zhqh(>wSMPXzvsZJ?{?_o9eBI#)bbu$%lm09FQDa*<98lTK-U{u*L!MR@6_@>TFY~- z<%QPrTx)ruwLI5aUT7_!Lu>f}t>trQEgzt@d=9PU1GJXUp|yMftg36EM`?KjE6dk` zD_w73jrk^UrST1{Hs1!W^uB?0=U;#;?Qg&qdyHB<;ZHDD{RX>?X}gSRyNqeOjA^?J zKgt2U@1gCo0@^OisqM0y+Ahnf?XsM(%dmG#j}4w!x6T1vjS!w#!S)BPewoHoj}R6; zHdyqiV9|D5Zf(cq)^=QOt?T`@uJ_lv-e2o_53S`rw0`%{n%zU|bPuh~J+kR>Kdr}o zv>x~Suk<+H>p+LL7OgS1DQ)hp^|+VT-QGgkfL^cxCu7(56j*}!vuav{wb1&SYke)WzUEqA3$3rY*4IMoYp(UR(E6HdeJ!-U=2~A1*#SEooLbwEov`yk zYTa$f&e#hfwFWn2SJ@S~(&L82kIMm9+T4)cWH;bSryH_6_L@tr*$w>mTujV25YuQ@EdTI^JwT2b)kURu>V0R-{X;_Q0yP+}9;Ct6V{DRuM*oXSQd>^qN z$Pb`HKa?L*0cC^3g$=I0@xbvTlT8=e}`3?4x>*YzvCBUkGzu&QMh+=cU7CTjZfGg}j{E9!nDS_31g`)z1_jFBxdLblT*WG9S@5f~G@VJz&4(GTx% z)VPOHPrZeDgq<2wN7<2<; z&n>kx7GupF{v_M`Oy_Qefb73c=GwL3e4DA>VAjighxw+r?O8ixobS_zX6|uu?rvEM zeXQS2LjUTwZjg)@(6F_kP3uE*HiDLH294MX+OHinT}NoRaA>wJ&}b3RWKqyy(a>By zps`|LiS~g7+7FiJ09c%ZU}+A4g*gnCq~O*X)OOIizOr&?E0*-u?kJ z#>dbUUqCB-4V~~Uw7_N9U{_#!U4z9X*nRUi_Q1R+)?!tO0mGg&xQE$qSOHb{4{`M9 zujWmp!+s|7F24W$Mf5dqily+^nh(V~;Onsps8Zc?<@OM%9wF5|q3}Z21Xi@WnOA5Hxck8T6gH&=>yJKwD6FS; zHy^X<<|8%(FbgmnFvonvKQ|xqFU-fH9^f~??`VfV08h49HUkf1I_|aXY3>ak08(f;6@&5fcZr;te7_kv;edMbOywNQ_JA5Hq-I8+aDbB z$J=gyyzTbK+iri9(ovgq)CTr8;0_=I za1ZbR@W@OTzhhnF55QB{1s%k>h27c+nQn z4$vOZ0nic93D6JFA20v_85NLG0T~4qmqEp4P;nVlTm}^vLB$PFaYJ51+3Gu%9H`w! z@TZ%(4O%4}9D4}vKLmds>$W(9yIZ(Rr=956`OvTPQ80F&lzaIn=F`~qYyJf3y$k8R z3+es;*n97QD6ahvbk5A|E_JC3f&v1{Djk+ymkt6-u?q+&U;)9kw}`z)jlH+Dt0@{g zdSgk9i5fLAxkil|6HB}j6H_#ava|1V&h8=_bFcUJ{(KKTJF~Mhr+m-1f4}EUGqATA z^xX^$ZU!80bNw(+z#y&^WjU^`L0N}uy9~X-A6&p6T)-b(z#m+Anc*m(kFwU#n_q{r z9%Tc{MwCq`n^E=}u3^>fHGV(J0hA_`gD8hk{t2vkALTI05tO4SCr~~{`2^)8$|;o7 z*a_rQlrt!2QOi|O^h}47UOV?$1wrNL>!ZFOvX`*V+xL` zIHuv4j$;OnnK)+Qn2lo&j@@v~#W4@Zd>p&u*aOD`9E%Li;0n#)3eDgO&EN{n;0n#) z3eDgO&EVC|;ML9G)y?45&EVC|;ML9G)y?1-&EOc#;26!|+s)t_&EiXjyJ92Cbd;GW zvr*=v%tu)WZn6kv3CbR{k2wdIX$HS<7LVZf<2YhoVZ}H@Zry?0x8y*`-hyNC%HxJ4wkWiGh;TQd2ZehdhsMSk31q8k8Rsid|Ul&eSh=aj^?eHqseXWX`2bq_PY5T z9O$V(6V%5)b4HRdp3VF9;Jy=r6JvrYe+)QmV#b0 zXbd`hrkn3GBpJ^PM^PX0V9XI~%<4tdhYW+Ze=L@cD7!IkjyOC0L#!P!c;qF}@1|e; zke{*v87%JstLzOsaDAEK0CHN8(Sq|Gn5*~b8T~`N{2rqyj?O*4-vTBW$3efdYxH{C zzyE{>ZSOOlKMR$=-tjlB+<4BNt(kSt`|ZW9{TKS6kx6PDuWZfxeIHr=A3w2kVcjAN z{HeUx|CaT72)Xaakn_%LMe!C^JKRNv^snL{;3J48;opDG)^tMt@or$y9{iJI{lOt{ zocHn120c55zS4JKi`;4alf6)0Cyh2bU=(uB7WwQ7c%zM(o?C=1h*iCg-NY2g=6wSm z#BBoBD`7qP+t4F-3A=E8udo-__X+zDFFPO{z1%r!%`xyUJ%w>6gM8=xxQD{X2pt83ixq>51%rqcaN!oBEOh)bHhxPsav5;rSNQb)0EW<&6tTkjbPsc5{204E@2UN)QLgeU4cc^lk+6Y-I(Kk2#`F?(G@Nqvh^(=1IJErXUQx; zkLG%B16J&4lihs)7{IWcj)*%2nLP_SwFP$o*?5N`y*~h-ISy;?l;Jw}Dfs3YaJRFN z?zT)jH*4hwp!pw0{?QS`OWcQs8t#N)4)?L43QujsQ^WAoC_GgLE#NTd_&D^`k5Lbg zRzAW#bj{lVA0 zIqdhqAxE1-OWy<2=SjGTOh7TCbqiA6P$5pm(3(+DVjC&{IxBdjM9T zh0AE+GFrHd7A~WO%jnyEwDJ>Lxq?=HLMuO^mCI=5GFrI|uM@?zc4*@Y+W6Kqr?=p6^!X6;_z~JUfiZr9drqR9g64DDa20Ky#r%GW`TZL0ToDezoAN#^vm=HJXzyFJ zcNLJofHslwfT!+(&r-~zGx;&bfLX-#xeM@~cS;J^)H+GJK9-D3yHz8rh5A_G9G7(Nk#3z=LP+vEl0U zN4Uw-6#P-h`laIIBB$4#ee6qu$vOqz%v6jCL=FQvCojn`m@J8TSr^cRy}Os}fx@AQvm&l>I< zvrr{iGx)|V92^06^Pk}p!#n14dPHXB`&5TE{z-K>IvVETIx@lW`8vuWdWYc^b_c^K zc9qT}FpFLN+Yf3X{q2wR`5R2H1pfT>?@31KUw@-l3=_az{)1*~xb!#A-D6MRZvC~@ z`ZQkwzrr6lz;pfpQ&Z+YZJVw}fp^S=&w#xn)~!G?(6uSgvjz?08Ld2H@fe$VetS=M z0UQZ4hQe$dhU2C=Hs1%^7Pc<_J=F$g@VPKHKlP%==VhNi|5?~h8z>i|s{jnU4St3u zaQb%0`ag~D%#t#^ggYivXhB}W-eM~OuwKAiAH|IS%OCQ@skLXM056IKPjbTH&BPz& zLFSxhXT18jcA3Cz|#wc-qvdt(h3Y<{{DW3$Poe5$iX z?0sHFyU`AO>gH#@{J(kDSQWGo{5?&36nFm5f42YUC;#R_M)xLp^S{&4-&ESVGVRIl zG_(EhN8Tzg|HADJcb??)8*4L31UpJ~)Tdy&$~+#_g8F@lKU6;6?d7t!dA>ayN8E@z3C1eg zP^_bkXkAI0hL!O>NpGy8Eg_|(>?s@F&A2myKv$fe7CsfuJhR@^xZ3ox@U?ISYfb4& z^rzRM-!nya4I&X`q8w{bZLkVeDcXtlqJ!utI*HDrN^}w3u%A)~(L?kUy+l7zEe41| zSf?5yhKgZgxTq1AiYvucSh2nyD_1vR?W!P?VRbWIlero2kV7}3S#Tod9I%#X{_G4*bbh)E0Rgln=+QXACzk#r%7S5Yj)1=mS1gDH zV#&4yS>D8zxZ=DWX~#KXR@|^>v^#N!-=RHekLw=91D~G66E%1dFPwW5Z+!X?A6y9_ z$O6X~Vlyys#=d78%kB$SNl!i6r4Og&M`@DMJnAqylwT zlWIIQj0{67!^v=5sVDWg(m)ze4_zB;%`*B#mhEl9^7=%U?QOxby)Bqm!411{C}FeN z@yJoejvUzi3j27taCX?2Lyb>=y#K@oJ97l!*FZjy?t_b6Ti51bGL)*V7VCOJk=hyhWB3wbX*j3>wd=b}#Yn%$$ik*G1cfxge7`_$0 zMb`3n!gn~oA>6?An*wt4u%E&$_-k$p$i~LL3g6?`JHj1gmmz-z=l6trCSQ!mGRH(r zICe-8@ef%>GEv640_)}QODIGIu3O+e1_G$S8lN^IRy2beY>}s=6qUHgUc{!$(tjGt2sBrEgy5QVhbjMSm6?}T)Z547*j2G_r#V)0IE2xOw96&*Ss9!Cr z;r-|$cENd95$jGwe-S&kfwls0Js3OQD?wo)Xg5?0#d(+*hVyVS9OoKQgYyV60$Hb# zVkCU|(PA`ej)Av>2Q@B*M`^jZ9KWs*SK!xG;wt>QMcjh1(ACY>pidd+4jQ$=9^!T~ z%q(^xu;*;B6M+MEj&_zgbfQjyW54LBaP-YFg0at8df!E_+P3I1n=PfB}sjOy$e)wwXLdF3I@Rd z44`-{XBh0lFqmgp>%_2D#jw_uVJ*+FR$y2wGOV>@SnJ5J){bGVBlx=;>ZG_UXSge8 zxGON+6&b!dFnqOV*l7>$9|Z3V#ZfDUqddb=D~6*y!%-`SqefhGhICkod_IbW){qiw zvDf%|aXoyW8^jH`z6ozzkTL9Zh2#)81;b7^hMn#VFWZB^Ia3Rpo26Q=ny#9x8mk(j zN>$l7ZFHRLkZM2LzQ`^{*-hEbcBOrheUWV*{j@K#skFXnUu1pII>G9M)oYfIEZ18u zwfN2AM~mYY-4rnjANfsre*p2C!bdh&RwlC%FUYQ;XA_a1RB3Ts;UiqMT&iqm@gw!3 z3R~@q=sB%dSZ=x=E?i{a@vCi~xE?DkjNg|d7t4a5sBFjg<-3a$6+Wo%N4~q|Qn3=> zQ9FJO6f4a&itG8lwD8^OKi1-Mnx(1mL9LbM!cV0A;eD(R0@JpF(-LkLU}D21oAcBl zMXA*R! zzUd3_O<#a-`U3pW7vP7!06+8vL>y=g{729X^|{eri=Y*E!*3)4Jxxa`#P>2B8{oIS ziDwxk#c;mKpBowhieU3b@mM+7&lOELr!9MU}%?ApR|F5wGlg@c%UFE zg}J`gtj&dWe&<*lzE7;(wL4CxnOpP>p0@*j}n0MK>QYr z^9aK|;Tmw^8)Tb(3v9Z9<4weTZsT|l$A|Fs|BmtqF&=`fGuYAKLOa2Qc0&8M7M(Fp z7Zi7V?||ZgqBdLrN7@OFbOD;OH8f;vXvWsyPCI4jkIcq!PloiZb=eT?`%MtmP5y^oPJV+76Uc{6%_AH8lyukWMB_tC@q=-qwv zj`rw2dUPK>x{n?;qeu7Aqh|EtK6=oM9yFtMx;vj7bZZTIoQX0EWe&<*lzGT6hC~9D zJph%_EN04w)X<f>g|k4!Cv?dK{8qfl(*X&mTEgjLVqM`w+Bjq~ZtQ36vL6E=kAxm;7GP<^#|s9ru&G zUcd)*`T%tL0Cf5Qbou~!`#*r=P~4_m=RVGVM)?(m?m6@T##4heA2n_c)>-B9CI` zV7@r-j1rBq3hi2O=P}C{fKjn18Ne3g-HEoyPhN`S7Vt|C-0>avC3J@iI9`Hp>N3ue z*~ndkSN|T4zu-v`MFUwG3#v@RF$2c}91C&16vt|mdK6@-37;W{{R*OMG&|BB#Si+HTrb73 z8l{%CdJC=IHMM#Jt=_`;Xv??I@~vlDzJW2{1lHe1wCW+UU@2zA8O}m7eh;bm4W!}c zkcK~?=FNcj=QO4a53Dr`auqq`8a?%-8`C7tk}N#^H#Gpys0MfxbafPTbrf`U6n$+5 zjvWPU9R+P26=jA`(9dQ-s2LDy79;T1&*f+<4mPbFAaxr$v=WfIjUDftVB0F;eNw`@ zwMLI_p-0&54p@E<-lku0|3f_aCnDW+Y$EhDYQegJLu)uCpm`hP{T`kjB_MhmJ--F& zyMx}}0tMay3~s`&qXaj%gYQNOc-#g&Zo_M%1WaxNCf~zfqXd_?gWYKjkBJ@T`Zhcy zN{r$*Ab%Sk2_-xccJM&h!P}sOMP@C+LWY;X8Y8;}s=WgoxQ!9s0_EPpjNC@gFChbg z9-E=J-9v=wCrEZZW`savQ$U~VfLRKLYz+nG#o)UZr5m)md}y!(4evvD`Ubj_1J{vj z56V9eUiBp?^#YEU0h_BhM^-*y^)q1c8!+oPz~X0M)^C8r&%mq)z^dOc-d`}@Uw~Z? zFy3E)(+@CPzhaC(1G9d^7=ObU?*qGj19ts}S^E{U_A6$MQq<2F%Y8`eOBe?-1Thm| z0oq@opI-p77tmL#aehxKA)f{_$$Wun(D9gOd-VA__XXzdOPpW8In}DJ;{03CBs5yg zuM?BRe`4fR`dVPVm}d}2H(YCvqX&u?iZ@CuuBG6Zfn%28Tg>xy%=5SC&2{wTTgu)jF*8!hzxJ-u86q_$&rmlfgev464ET*#MN6gss zWy@zwwwUD##cs1qaRW@LT+yJ%5tvOXUyPW3wpGTY;kR`BX2g1m@pt|rZ%T1b8Sbe> zeS;y7j2I6O3v%ljmP0O><^KZ~on-KOKCHg^ zTUc3QT<(x49l^QHvN+ao7w?*2`OZPg8)68$!9r^ zN)+?hLvS93qCqi^`!JK&g{|E6JmSq4P%eXh+HzVWMx6W`j5znVqokbKh!OXJ5hslp zAyMhy79*&2aFS%;dRq?u?5Im2d&_X&GqT}tIr|mq0eHpzRtViPoNR?qTdq$v8Or%> z4etPJ?gML10&A!xa3b8_yu)R16nc!?nILH7c2DFLh2rz^8Gy&&HC{)IzD8bOEL-2bj zj$t^45cY;bCKX!bWzkTU@bi-Tpw!E)kY zIdQO@@B`}Rpc`o&+SB^_jc}Hp- zd>y<|oN!KvFL6LvN3hh7Z6C!Sk8;(I<>%{n#PNmfL$)(2>QRAgCo8j~qGTnH7Rp8& z3oOwspyNL%ouH(opn#+#)HhFb;yZynWazQYZtM<^*G6d0grg#r;Oh|S%|3~X zXFXZxSuf}2O6y3;EA(^Fe0XTbftJ2#Gh$z@12#dnRVq0vOH1qmrQ;lF&m3YyG_-^} zXhOp(FnDhi;vI<%og%#vf^NS=x{0WChs8Lod<-=>_c3fw6ATK3Sqt>@=L4!E456Z< zd_9A2A{V9YAS&YGECTF(Mdy&nNO_2@j__Vy&hGAlvkc9i(S$f?9AjhIiKA;QJ;%7{ zJfM&WKRuJT{&|FDUF&u z^ z;YXPv`v`W2FV-()ak(5In-iWZi%;(w?yBr4lj*{3DP+Tshua?y*Erx`XjrjvFfJ}C zWAFX^qUmU8?;|5ITsl1R0{1u(H5r;cyQFMZcGj%Y(mB}+e6ypXGJSlrq9U_=`M4)P z&CX#zW#Ol2{B%YiJB0sO%*`*JTU0c+IKPk{Hs%KxX*5N_`60y`O>s*>_ab_=$avV? zJs4MuLb`|0dxZSQi&8}$xG_i975^$b&ZTmlw9dYcoShGmyN8AbI5@;R=@jk)1|J>} z9ugiN;^0bWH5y}#U?XG`2qH7jEx>`fRLGQ=QwoeEI;Xh6{)0mNC`8EG%gGrfJ>v?x z1a=SS-@B$iojW#dz-uFhuj?7!FJAg4JJl&^YI)C1FXg`B*CWx=$};!s5wCoq>#;!S zvM!)cUVKGVzsR8SA#p7y_}BU_o8L{hp{Un_;>`Mzz^@|9ySZeI&B|Z=@q|G;S0!1t zvn(tec5r;j!ijX8Lqs1=V)SS6o&efAfoTNIIYmmUX++MAkd}~vQ*qyH$)8^-9}bRU z@t)j?3jwKPx*Z*(qFt3dj|q1K+A93~cz$-}>VIUZ<0n)lB~?v`S7-fWbtRu^b(vHU zs~zfzK^t#NpB+{nmd@RIHMBv!gIL0BMEz>iA8o4NNzUp|Q91LxT%k_l9f3YP$R*mv z#nF+EHa0N6Iw^JNxJY9I1y)~68>QcOs2hT|z@>u*i01B=z>(@5(nDzz>hHi;3cY3T z!N1_FwX!7&QB>NWxD zA1BWJSJsoh($@Ml1EilzNk<7v9gnwuEJc36KE@U7_bTwT9Iu!=p|L+f*&*e)pms5$ z#hou`5^Udnmu!1{MeIb!>>*u6L>;*pfT_rFA!kCvs(>qW#F2=#mlO}}80dOs#emc4 zIe`NzqE^#q{P6x3CBGl7VZP7~$d0zbE2Vb){?)5-#}aVpv4~Gvb5^?rOEBjEaMN*Z zSn`tin3#B4Dnn9ILV~qgTCbB~h5(L-dM$S$w8e!aqhesO;Go~Ki7%J!{PPQW5Jo!+ zbYTs;aK#=U;Qs+~FGrb9ZsY}&2Vl~a;A6o8rYs4J%To{>B7y~9AE3X3v!_7uU$oYa z-!`o0qLBsuDdV>d?X|d}hyOv>z_dP5snr>sT!T~lM5b3~_y~E|c0XL1=3lzcMzudrcnPm*p`uB^-Sj;P1(h1^@1j7zcu(3NcVV*edtL-`Xr z1nA2mJM)*Cqy@Z#?7*XJ@$B$9xo-aPp#iyrVx)|D(BV3 zW{=6to!w{BrxPl6jM2`R{!*gUkK)ZhtiFDP);nNjslV2xeFsl3Ac2R6qb4MztF6w~ z))J6KPr^eepP}LlG6dsc?DTov@%2?jGErij0m^1f%$MFx=FF^^njF*6yZgAbriRnA zXPy|L8NPko#*GX14GqYc@{d6S-)zioN}JNF*QB((1!aOJWn@-QU0h;AUj4q>(*1Lz zr_^S2sZSj-Bc*!dh?uCFRTX)<va_Tb+zI)@gzZ>Qp;)zFEI3u*;-^s?8%4 zVs-C~>APW4zST~f>@nR_t1~U$Q7d6BN z_nw%SF}815IK`b==$VIX7xrmrueDVMw)61`6gW{AXrctFa8PQ{s2Jv}m=JI63MFw+ z0+sTNSU~O(j!~V2WPAH%Cq`?63#y{EgEKmHOdXV-KdYC#Qkg$$aY5b2>ae&`|EM3k zb97wN8e%WowPE0<6V(?gR#ZoX^_!TJGGSnJ?u0DTu&FAfXx>}>Yxc~|tJr#?y3bnD z<^Cet=kAyzKR{DXDVN|4DWXnbRKoP46cxzbJ5u!1zy2ZLzvTzn&LNKu$l8JHh}=kw z)(*I84?komr{%o0_O^+Mor6>=F)~P|^APEIs1$sCNtn~-Pinga^%xwZtzuJ|mQ>eEBlb-hy}qP&V^vt3Zd?77o%InOg=>G` z>$I-_>LXR(3|c!RN>eg9CuL$~Y*??__~I=yd-PoRPQM}h7IYu{kMs3uBme#7n-zHf z$Sm;qTI>qs$2lTErQjS54~0><-W3w1C$6h;<(e=GGi`4}sWTh<0Z$aM_a)-LE?OHR zD3B>?A;SyAAVNYIUnwRk64{w3BFV@6?v}GcxW0R%5YBIw?i?qRMtv#WRE{Rl7_g@; z-VJgYIa<`pQ0|p090lHIlt2`1I*>vli3;guDk5~(#|ph$w(}=n#>aS~IwH*;bBVN9jh1H79^}G1Fb?z8 z5qmmf#t7-u$=_P%;9%>*)H2F29W+$<`X=$wPk6TiUyZDrsa|cZ6yEC~@y<&Q&JPV6 zm>D-jOPct)gQKSIF3Ztrqy0XJiHwSgiHeLNYs)89r-T$&q-P9{^6!&5a%=CBx5o5e zH8VleC`~XMHRxfT6fZv~TZPPqA)H$4=qQtA6*=iD1`oC_E%oq7=o$*aO*uq}1|MMY zj8_0{pzudKX#}|mx1yNf#AH#q2~Epr67bPcj!{vaDF1MjnQwJ9J)IFq5oV~U54O{$$7)61hw z8(p3sL4Gf3T3Z@cGQEKJ*I&pf33E)YU)FQQqYq|h<}W#=U%K+~NAt4$JL!S~i#OdM zK5xH9e13SVr02$)(se`Th&;7N(%{VOqRBl2l1FbDG<4U(N=!JYXPt zf^$*qvE{mIokfMh%GMTvYa1KhLf|10EQ}n8%E{PRK-CCSuryeDYM8rJNjffhe#8e% zC9g;x!c;!!o0hY(kVl_0J1=C~qXNGD#*Gxewt?l%MQd*0b!f}2s~@jc6F)ztjUBJ+ z(7wHrwM8e6wuZKhhMl+Cn_4=Q(RoAZ@MioSs3v$vKlvfr+hpAFJEg@u=}|m6BXLT( zbn_Uw+OTtyw&c~i?owVI$tsza9yhsCy2q38U575sA9s=8wk~F9PC-@3qaS5Eg9a|G z?*B@Ot-i|!?U;hXvD$SpmR0M^<{qk`e4g$nbP@ES2BlfKsq6(^Cr)Mw_UL$>mywvrd4NVyRX03F9WG@EVl)Uq+wCUJ} z8TU5zoATkD%#8zg>o@kLw^+W{X~ZOo-MG;{VhuHf-1+bD|oFe$j>vybeeru=+z<>&NI|&+%5)*1$!E zifnYi4nP~Dpmnk`?3m!IAI<-yH+dKIBR6j37o$ePO~kBS#jFKTSOxg|%ZxL}CN3QF z2HDDLqjLv4i>VAu75&TXMLS>wSKE(0_v)-~UhS8^^76}Xemm9im~H-|hN|V|A@L*D zl-4w6J4rkEf9adM49Y3pO~|VwUnSh`(ov_T^_bqH+x+8`M!j@mLAGz03&~theU*SG z-lk@TCwhoo90h8g@j3=f=3_C^9L7pvNohbVZ~4LVm^kpLkVGM8_A6nu`+=zAS$#Hp z;XWb^BTT>=f6O$ExR352_9^ajqB@Jcjg7)mN8LKWo)GFJ0s@&83A45<*c=^uZuacY z#vV(b&^>QLCfOz`9{;)IL`}&*7nh8UNttFEjf}NU`AG+@yEy}f;iTqRfjso%L&Kj!uJM`hycN8xuX6&T)nx@Rrfzi18tx zUTdkAcW@ANa>{Th6`&3n@>Fy^g|&`Ct14yIrDRq`YrmQ`?W+yFd#(R!+N`hE6(5PL zURK)wm718Cp|A8WTUr$%$V*<6TK1Kf?<3-CC9e@-e|h6Y|6wfw#YxobONo3Z@g()cuM@s)V#=E_K zbL`lgeEf#&#-iMrg_H&BPEK2Rq6RZH!eAwnWAp;#MF_PdJF2J}5+GOsRXOM)fB`kZ ztMDF9@`8{MmJ{7{X*v1#QvkWRmI7pHS-)3?03b{IQGmQDeLbFd^IyL(SWI|%3UFx$ zx$3KRZ0~L<*w|QGa~<5;S?I6~fua|>O&N|+HS}f%FGQK*X&L7YicoDM-@SHYT;Zyr zQBpildJmbOnfh|Q^vyA$SlW1!)W~)Y{&3OY*GF|wZOECN+w-N2M?dn*iGCdPQocXDdxjC~ES-5P&1b5UK*>=bqJwEmO#jET*jeP~qGhnof-jjNiL zS+y{SG-!Hg<2*a|sGixY*OK80Q%>rNrsgCKOApS~M!EU+9zLg6za`btP)F)=hi7`n z3;`Tr2#Tu!M=E1L-&k2g*`bk@nRGhJJirh64-fN8AFnfV3dZdhV&vQLtcuxU4svVo zQARP)Trk=vY>jd_lNm4EJ-YPgmzV#v;7HF^gR55c1U0K)yIfay`8DBEi(0olJ9{~N zmps*$L_O-J+7d9IlUoo=NIi?TbX~=G266Qz{8aMJ=lWYKq%CAyyoDNb1K0SmRj8$1k*FybkU_NJ-XBs_ZasVDc&vU-&_M zskSk(#ZNfcGJ2e_V0=+a3B-Un=h2lVvW^T>mPw!R*78PbZS8?;5tdts$+@5cUsicL z@YY9;NOR?ltN$FoT5$kl>Iz?8tg$b)N?l>9FKV!K9_hJE3u4Qmx9t%szb03e2{ivL}>S*I9+LU!IlYtz?+;C0Lu3PaNTWYVVV0>1mC=M; zc+h^kup*{*eO1l6VaYZ&$-~xGS8p61bI>!jwnt`7c96Aw&ZGgk!_zv-xku;E^M0za zCzlsx%|1M?Wc=YdSv{7XennNdxGp)guqOKF%xOi5wak-L3aH-$)YVkp!6)ae(22ka z=4>|lD}mz>tkM6`(%_R)8?t3PW#f*Hnz(mdeEhh*6Q>@o!-bUkZoNjOd2~n{H8W~p zV)^S;y;pu%=U?~X%HB2aHKf(1q>Y3rnKi#5jp7IOqyF3mVz%C*=wc0+DRhJZj0q9+ z2lVHQ@Dnw8ATv}ZgV}8IL`O1Xn-`)FZr8U<9J!&Qdd=`88|&oZYikB>7@2s`JF_;o zWL&nhO;A=%W**$m=Vif&X`|NJ@CaRq7pBFpjKCCcCV({dF@-?RkJjI&Cvvy~Ga0lt}Vt-{yA==C=G z6(Dq7(Jiv2sn_Na9aFrlRFUrSy#jCR_la+h*-~s_u}UU}^iS0-T7=oDka`KfVBAjd z;U+M>f%#FeTy?k6lqhZl?h4GM=vmu zJohqC?mFX3?o?tp@Ge#goxtgM1}>An3u)nG_VD7I@fw)3nN2U}6Yc0Nl?9ChLJx+Q zXJ(8n3fc|OA5gYpxIUi$xK~X`T=fhp%PF}HW!j}3XTM)zE67bvouwc{ z;6Z zS(b9Z%!&amJRrH%(6Z#!(MCHf>!8p>>LUGN_dY3zm0l_*Ek-~|agsv%@uTm^;KwI; z@pU~`Wpgl4NpIGQ&ED3jFhU0qF+NB8umGCO1mL0wX%P+|wnJA(KG?^{)fzwrC7Pzt zfqHcCVXQzS{BM7 z4LulJla*Me^%e%TY!Krl8{>qREUDIq@SpUmrW5Yr*YCB)+~9%BtM#$`$pKYe{d4e! zAtTP2`t7E*b>#ojV`gQcH_z)W&~As%U-3>TZh7ZPH~xZmbf4--v`K9``~4jptaWNR zlcY@eJYi2J0fa!mQQ&}c8M#$SdgH#4y8YAA)2Hp#>Gn-aZ}Q6@ou4-o~{SQnircd9eYxs*$l=PXCa?xSh&w*08aR2Fk8s(6v)S;`bvOC2gTJs{TEy{j- zOMP6!rt0c-by^#1ZQX{N!JF#i(a*Zvl1bg1twXvE&CDB-C%pGO76yP8 z%z!K!QXHnOhpx)-iC@k7=>qv}E4Q?iL4db%7L$(3pxN2vNmeL~dJ>Rn5B5TE?dqO|7)MKCmBKb^DiZvV=>%w8rSCTb_kp!Y9OSxof&k49);2YC zO&W?*w27o5spUs87UT9k_?~-y(BQ43d(0@)GYuuK54pXEPqVl@~(uv7m5PPq&($#iHg^BOAj6=hd_u*rPFopGE#W`WY z4yr@>V@#*fkEg6aIjlhQpjdp|39X?ym?4B%-MfED+DPgyJbLsMsg*W?;f*Ep^>_7G z$VzD#-wB&*09GziuCNpJI>93XdT@17!3Jz6*V$P>MPbmQ6;q3u83SsyQ9X&qkl@mJ zp9Gzdy4!{24e#cXt|S(|isZhDE=!18=qyRsAe?&a)ups&q>Z?e2;TW=;`){YAEx6) zbHFpC0SLR%u2DL6(AqMaQBG|}ITrRMQ$h4xgHhPsa*uzgj~K~+Jhn*hv3m7t!K)=_ zuCQwjOe$Du(o%ktJP~rfGsa`(VQFR6p@X9nfh8D@=m@+z%%g&E65Q-?%teON{H&-b zCk4|y;aG{J4j-DX#E(~x>tDEI(a1q5>vpK;u1g6l&0VykP(47NsOx-W(z(H}mQQS~ zJ2!Oyfx^#zQ9Z6B5&*X1I;vii*GuIU}F$yRR(={+v1@4TGo;;@MP0Kt;v&#j5iOb>|&>z~YbpFKFb zTS|CXR5@U=Kw3qr={pJGnXuVS6xd%XB;@K@nxUj|LeLz>D29s|-v(uyyh>@6sCwKi z%MVOS3Jip@Cm@5*8rdb-{RDo45U#V#G-z~R|2M->sFngBX z9XSSanT&Y3$aFTgwmi=gdRWZJ?7gj^*_*UDxzLOXJ`(DxF*(QV9TbN={#~?`mg&n8 z$UfjgVq)I)?dsq%`sCCZ2P%)I)h4A5Pu@VzZykLa$LOBb z!6{Rqnh!PHk_`}l26n2U3x!~<&Cm|uhGng*9OkIxqaZk?Kfx5y7^Xl`CB*nm(R{S`}o7NbI(ZI@gW4BoqHBs z+YWH;#Apy}qnVu-q;;_dN-G>yDhnm`P&qi;=qw#tGj%9h0Mut`W~Wi|(dboYDlZ6u z-+}tWghMUw3u!IylOd%3Us8`r(tR@0W1^JL*Bs@)oiatbI(hOH!KLMGp=ZmDXrZUR zcgrKZ6`ALjgCpdddSknX&=3-reDxZ2??8QXpu8c!si{f&1@GE>LvO`D#4`gV2XO-O zQA4m#fTz}}I2ns>14<*qLhZb5dZc?eJ1`-tVQd=Q3_;lc>MbT<7=Q5|N1Ft0o9FpM z#iNsai{@9?y)`Zg89@zeu8eKUnmcId>=duOc|#lC9iN;$=5^^_aczWG_XWf2-yVy* zcGs65T-C2BVbHkv@;O;P1#_$F-W{(^XxLagHlidhBr87J%QIu(tkU8+6KYJv$W6LvarCxZ%p8O)M-6p$`e8| zVxv4gv&!d`b{p3-7%-|5f98LZoq%+7rJh1dOFI=<7WgN4N1N=w&TY=C%r+d^VKy6O z15D;ySF;%>aCnD$ZlmWGsc#Y z2M%Ino_bR*aOb}e3sRpn;i8Kp4UUMuKArq{;q21gYlf}s)ob0bHG7s0EJ^Jj9Na&( zq)bm3eSGz5Ny)2UF?ty_;y5A8mJ#ydh!G!ROOR!f;rIx?_l6CX?~WQZ>fOrq8wT}G z9g{mcZ%k_M?`Pe9yS)7E+p}ihd~4vqw{FhfMC?zG9C=#0ziE?n|BOy|hS*d2xs2Z< z*aO>K@orG}yT^F(ii_>KM0%j_E;@YP!`#A>*ftTPaityF@>-^iMtZBU_3! zIeLs?Dj6>v7Q7k%cie|01f=6pH!Dq6Fy*DnC=nlSIVL1G3Eo?`=1NHWd zW41j(ZE5gpM{3)9wQtu^1P3N|U2Sb6Ba3b9>~e~objdkv@@Z-xn|ms%r;Hb`NuJ-QbE)h8y z-ChJ?2)9_~Dg?`Dp2|}>xk9kMLiRemU_3WNILc1}M%h94x7+JvbJR^_sPrkceh{Zo zXqf`p(F}0_6cEjf%+VPcy7Y8iM#kvO$@x)HIXO{L`QkC;bJPE0GBU9>=PK@qOn z_}r|G*|W>~H>QX5j^81)Z>%MD{;82Jp-FkM*<-WfhUP?;lteBCbdvcc{5rJe2L2ML zwdv4~Z~xaxcjAaR&L`V<6Nwrbb_O<1J%oEsGJcdoYd-QlXB^&7L4)C zB%Vh0g~+Bi9lbNU_MTmqH?CKQ&ZB)23xfNN(z@$24jiEVzDIgqcXUyAb+2^dT{At? zJ$mq5e)%Im+Rt=!A_Y>{hEwjNp)=#c81xm+T?QhGh2BES!9JU=j2wAo6LH`zCVV+( z_Ls={!`tlnS~5Y%fouVWTY`-|EmM}q88gb`_)9FMJQ?zt8!hFN?TGL=aSr>ets0$p z4kk@_IK!Y(8ig^1+E>k_HM{_;XW7IJROgM%RRre-`DDdJTIKZ#$&dCN?i}dr?dOE> z6gP=qM}EQ>x65FOo_G51aXZvD5IzY}~zDP=n^^8W#I~bkT~S zQ_^@m6;dyW85HJb(u*i?f<|Nff} z6egC(#FQr%CeQ=6PU32~HNpg8FSznDDDVoh3_C;8>(~WEwp8|xs3zA9m-*G4nk>i8 zmljCrh~qD?I|^{eNp{C_tb?||9oG&2!kr58jo``dXuusm$_}B9<>LmY1@#=9kuz8mI3Q!> z>wWugA3tE-tT^BNzGQ^iG{O2+?60Eei+L2$dmGLk`vJ7$!nEx;S62&#vy%e8)-EBgakGyUA|qeccleCJH|K z`TWQFPO|ooZUINickGbnu3jx`N5jnerK?x-BM@{1jYs;DDAspXS%X;a^FT9KkRNBKR0?h)1QX2_pk1cfjkJJVzrH#@tl3Y70F>zL{bcZDgEf{fh(!`@9R`r;ip7~M%D40e!hshn0?*TuH zZ~G_<_}ndsu03^vp*^Gi2mxgd%EA>!!C_ub)*6=ptf&i`F?6W$B>h0_kR^TVwpFUb z)tN(vW)6C{A!_^}cYohmncXV_=@tJd|M1??8I=)Y-^c$D5uvS*$zCzKV0dA7O?01v z{%%p^rur z?wXn5+?f-)x@Nd$*m=RbnU|Dg4meZS4Xh}ZUEPYaPvXvM^AH`7naV5?N{7f-HtLJU zym?{{1-i17NE!+>#gd>sG1nlA1>OrQ&*(NH;Xu;J4dr9s8?-=r+-Gb;(16USkptsN zdHXT@r|LEp#g6>P2>$w{e>SGl#U8b^XrU^JVWd@;&Q4kV*d zXioODOJ^1*jjM=DtXtN5@J?cJc#?;-bTFyI>}PRrV<*moIHMYz(|$K6 zta}xxkM8P;aQhQxov4jzg@`GfG(cPkS23u?oy;-INb^y!KX8D=g=)2-!O6)jQtMhQ zYHv-zin1m~m&S}TB#t4Hz;TE*LJu;gMHzWXYlO5FaDNTO2w&-kPU2VTIb%qsF+IKU zZ$dI+)1#v@t-kMz1ZD(o1>!PYnC?R}-w@jku$C8EHWd~NQC$fdTNXyX~6C5j9CJj z@HTk@Gzvu0=h#sL2dl}92pMNT%4acJX;ZsZjSd?Qkgt!$vf z*;<*>pD2@lqM1VkQZJr2ifrDa6=DK%aF2=ukBN&HFP3&ZlZQ)Vr@fdD8nIJzY#FFU zC#+YKF^J6cS9C|)>Z?XSY!nJ>q&!>VA#)s;B~!QZaBl(=&=}0^*(6NU|NEy7`^Z9f zsc;s!=xI;#Wvzivw!9Tb$BLn{kum|n)VCCsjEQY;U5OYKNbldyH-p^1VU z4an1Hwo19;&nDI8QEN+g_bI!3I zORK%2Je3Zqc7cfg-5#|&Fj-gE9iP5vuYUXh+k`%GnEqPJIK zpIBszbTWm4<@~5)i@JwaZLV+8H>@vESZ$EY{fiR?F1IGJLtOQ2coMNLTww{F+PBpg zs?oZK1PA+tchT^=u4+FEoyyVC#my4yGXy{o`o05J1_37kD;mmR+C&OgsHk9y(G>@# z5LdK{CI}<^Kt(?#EIPAhg^pjIzcMX#MUP(dDl`$5^Llk(k&?DNfAX2-1qI7K6@PA< zsxSM<)?%ZLT`bZxrL%Q!jEuFj*{D$V+4S{H5<2tiO?@6+G-F4bT>c7-ii2$T=d^eu zy;|#p)wx*-NYF_ravBh|2+s*S_-J_QlwBi7?U|C2GG(_;w|h$J!A?01 z`S}ewojP?JnV;8?+ma+FYfU>%4q?JlhkUwNh!NVDUf9 zJ4IdN2UsK0E}tQB2j57I$=lfq7ZqQAI)6!(R|M9)WGDj?z2ADPaP`nmS)E**G#>HY z{SYO7{Fss1a?pL>L{YX#hwLdE%cU||$beKGs0Vux$TrAhkUQfIjTsucOfK)*mBY^d zx=4EpJ|?dmV$EJx7RifxG3!En_-4;-Q+t$l6`r;)_ARcCG};$5%N+K_q%3M*$YZAM z8#qThQSbXdSQu23`9VA=zK0QbAqxOeZ$GH|*Z{}ARgVEY;V(A!{6%ybvGaqPDWn~(Mu9vYbyOlK9ZYu^Uez|<%G?QG`t zek@jRGNffwHx}<-RXs9L8vb;;ww!)`B+>_=;4Kbq1( zOfGe1=fDC`Gs}mu1EXC8OWDK@uoVi&_~rCB7{3(U^qZq}eG{s>byer-a_d%x=Y%(j z1&kYxJpt2%al_Sa*)MfZ81`~OV8P476B#!|pSFv)1f(#-58=EYd*O>jXtba%yl@)* z!c;q{eANoyNGIOzkF_gTtbTyBUj4Z}5M}40zgcACWgI=q7O?&Q9-)wtL_uhSB#DC-=Fp8C*%r#056^8 zF2Ry6%Ed&ru|pQM26_6ndSiHQg1DDg%L|d8hc^@KaTOaKtc^{xhfP)tHDlU3rYFQ! zBo3I&J_e;|G%2*mrp1-TOza(38acXeU{*$ONLCgdS-8}lECc^ip>_+qMwW%Zl+`Mj zy2dDpDVv6}P!iECpnJG)fOmwQ(k(x}us%{38{07>KhV32lf^_kkBYLWVU?&Aepew` zb_aciyhby`%28|Qp6w`Ww8i#6t`vII&xLS3c(>8v<;uXwu*ej$VJ0;H!IZd)L``Z$ ztiONkl-SCIfRwnTF8*<{JJvZP5~2 zk8Nztl_&ZUc-Od{>AqH9bqac|AMwOlZ7{UV$9>&pcSHsEdJAI59MPAeg=O;5@P63u zb>_K7*xBM*-bLT^=W1UM<*pxvM;!EQYy< zXZYu3SO6X^YVoE)F8k30ONG!#H$egQuv%Gel13uiq;uaAY30Ln;!R>DJtW}CL?0t< zm7EM?+WMn2+CWerW5)D{<|roN4v0UI_3~u2X0?k08k#-z!hxuFN8`5wiF(3x;?Kei z`8=FaOCQ-YPHt|t6j;c_peW^j#^j~KM9bPV**2_7-V6QbU@6wz2R_OiW|d7N7SbPn z&qP?k4NU%ZaD(xT8|;E56bj2j2EL1zrszcGk2AU%pcrEbVk*}3ZKUha(9@2v5=Z0A zxH=kk6MdmH%tIR+8Be1>M&*U=y#endvZJCkL`@x~y#D41zIp=LA@TBweERCg7i3)? zU1X+7Xyc;-EJQ)=HIU(SpAw^A6SarXm5fh$HcdKQ3hI=-kZ)5ss&5gJUP$v00#>c+ z{NhRnTa|@(o3yTeLdpRJM(YA`_~d}Jt#IH^Nzvzcrl z9pF(_>HBl;y)%>Ed+(Fpd+&`@QV1cW0U?znLqg~T2uSac01^oTBB(S46A+0A>RPs` zt7}Q<#?&oOUt5%82&gQvUSBv)p$RSNQ#~w~ygp7pvvLzM=&dMM+`S zs4!VQD~TIyEwkFmbxpWb18c#y81u-+JTkTOFq0uS#)L-OP>*gz(>G8UifyKpDSd?x z1A8IaPqfU4L`4Xh6NUWxiF$60=CvO3x#-$6JUlG77#`L{+iAjgax=J`ofrogA&i!! zY!{4UWo?a$B0aUMi;IDwFSUB#X7t3qVjO0*MIwSP;h87|Zz(7Th5+rIWmpir7a#uQ zx_1xG+(XFI&#d{?!SY?=gn0JE`qSm1bvx^xTXVW1q<#;v9zR#S`;BGC{aJH{%XH&G$Kppnrq}7-u-4$5nRGY9;Dvh1=)XoG2SCri0xt6pf zDCz>HLG4N@nUc+o&PS+!<%syVv(9Ti7_PdW(7I=q^NP`h;$^J5=DW*xH_X`c-e%53 z^WD5dtZXOXFZlP8o=B{ZKw?oDCx{-e_=y@$-XyNu6lc4 zY2Lua*7S}dfAK@|#hvryuiX1Ry%&}xB`v$q+tYt*dztZe<25(GU8COd)zdXKPk*&T zz53gmYfQGAlx@GIc^XHVp5^&K;a&>Qf(@*(G}O`=2P+~I5}Yjz)xNsyZp;|vVu+71 zvz5QEP6iHYZJi7Pk~$Z(&UBw2+cw-(w5mSSU*9mXY2Up0N7`e@+%no`C7- zwdNJHWx8>ZM~rNp%?!+ywyue8A^DMxcMn$t&t7w^p={~Pn{o@cz5JMU;l`%8 z#DxPn>3Lf^(=wMFCLd0G6CCeuEO1+Fj67#7tKfX8R2>ofg(B>T6Dq)5Qca>{J}Q-y zQ>Zg(K2_l~o zwF)krPimfCQoK5Y1n-?0R~~E?UY)b^>Rfm!n75*4u!^M34NdmSgOj2&&(puCExkaU z?=5^kXG5-|^AqO&?n!=D!J%PxVMl)Rm`(Ti&Jxrrb=&m4vd&uWP|~cKZBw|ZB{8vO zQ=v`j%GNyNq}I)NCt%t0e?^w0D>^d47b$a3PUd-=r~swf2&G@rTR|vEyq;n(NsNeX z>{P%Wxx~G+9vWTNAbZ6K&iLcE%evz|v*x9gF3otcXx&IzXf&+8SUPMNuDX8PSLZ%s@9hI29_@?$ZW~=&i0?t7*p6= z>=#(RBp;!akQx4-2?2I^mkzO?e-*sU7dvE~s&CB&BJ)9VFH+YCBU@`iRb_067CeMy zJ0cXMV#)J}*b;CLzN=guDQt_e@JaGtm*1O_Q0mp_8er>|5nVm2FW)_O#eDG)?>zB^ z{M=HpvgX*SrqJvN4}-=OdyDh0f7(ahMK%caM>c5v zkxqzf8rji$83gX&k~Bv=oyP4TQ9+qgy^%OIRMVFHkCsL8olnfC{>M@3b1d%4ai8XM z^sPP8d*_TWGk<=9LqhXo)c3jqR7<@^(C>k<jza9RBrVqmR8i+#C_nJp3+x{Pp1lVS~v_pX-i^sotk9 z^u067zohp-wMtceptr=I+B!Gx{*x=v+RF+$$*J}-<2I9p12@K|77b(~-~G2Sv6lM$ z0i{U{3I;ft4{(bUpdAayigrl_286&1oSR&xHbq)C%XnwOGnVrXb;g!J{kA2{q|ptc zB0Sv$_|U+?9K{dbCMVX3qp>p+d`-w16DJQleQTw{Fu*oxR=T_R&Jt7i02e(MePWu( zt0q1aij2rJcEPE>cTT&j%n2D1^qgnS;_Zm5PoO0sJLF1A8%^R8{xtT<1sV!sd#>iD zrta<$az`cxY1)x=29a4lc?juwghbZH_juj$1b!r?Ze%4egF;8#$sE7k!FxubZ_CCM z@1m8nw(nj%wlO!e|3YU-ZN*!=x34@~Q_`Q;IdgF`NhtMfa|v~F&4`Vkcf2pH{NOL; zNA?+*8AMfthn2>=_C!=K%|^XSuz`iCg`y5Kb`8tOPBBe^OrzA_zdrz$u{022I@o{Zr|UGvE%fIDMmq?mI7% zd1GAu{TqJpAmh{1wjDII2T?UQ$mkGy)?EA)s6(N$Sqi5?qX4g#;^R};v&w}Fo5Gfv zXPw`Zy>etu&$GSp(XEH)H>1?a5Uj;22c=EzS?(_Btp(ZhlAQ_ilD=4bV<4mA@Voui z>X)|_WUjrmn0R}ktSN56#zN~^hnHk#b*)dDaK+pfh+FvIV{SpnfOgcEGw01)j73rD zDe+Hw@~{N=?Io)$NancYW!E~+MR%0aBFy69CB>`qNPMYhqpQD-TV~9W54uv8ofC>` z&aTSnNM%*N)jONrtb;0MMyeZ}%RQrGRW3dbMxL90v5l1j(;cGnZ6$WdP8KW+r+T%& zA{<^^E3F)$>&0koxlRCX11<$5t0)}8CcH)?L7 zcCCASR|)SOWD>CT`8J#RFYYRzUcWYDQ)^;k>n8G=RFeUu3;wr^j%bywf!x5H(hv4l zn;N2}pa-aj|LsH{sTp++g-T=Y7hDxm&=D`hA2=}aZgjl0nZbH@%fx|p?949oUimkk z??P9=Olc3!W6vHgQH)kugV+X^6{@KwqwKpzU1IVB@;hhvaM=+zTq51#mRwjqXYaxo zAzsr}(w*%#WA(wpO+>yT`SF*!7XSLmx~PuRy%?`b^pI0d$Kulga=AU~WjstVBF(U3 z7AKVnELJdOH2h987L=0a7|m2ks90P_O81O;XEzpPAN>C8nZNDJj8~V>PssP=gPWdS z@$~DniSLX>d2SvUGAesPY{~AMJ)LhpQB!yP*NeNZABh`??B9|Jy{ur>wZ^hHI32 z#=d`^+x~}Rm7Fk&z^G`q=6pwZLI2^Rjf7jCbofeV@4F}Jau0kxLjLsQHyD38GDaNO z_$lBUM#R>@z%SZWp^nyW3$+fgMpHLKYot)^%7^ogfElmN1!Crl30NrP8`eDpB^0$3viw@5WtKT|X)41=gmei%ET1X)$p0C}Qle(da8;&iFviB~_ zvS>KDz)Z&9MGB_SDSrvv!UlEduoBGl^<8~N>dt<;s&nI`ICEAS)^xztGD24X6@r&k?D{iH}Q1QRCC<#HQ)Y<<4(GGn#hGgQ)8D>%y0(i#u0SG z1I<5+TKYb6-pMosF~~$y$}>z^4r82T=u^#$N*;-dr~(AVJVL9Z^Y>E`{mVTnQaIiY z6_RgwCHdG#RxBy5+*m_NsU*}lF~HGSD4j^VkvNk#b__`L4K2~qQc+)dl)X=i5I;0T zsj23fy^E44IsJHt5>sGFdw=!=)O6>^>9oYl0`C4xegPV#~Vhm&0v_jNEpTGU|CQK1q~8I@r~k=q6gN6ANz7u%E~J%xymIu zZuz_0Q&umtxz4?L=K#0mDQgd_wzu~-pIDqU=wH;HR<|zG!kuR}J=`)lt5n;h*lRp= za712sj?;Ixx8l`)fuPZ>Sa&sWF;gqB+Cee#La{*Uzo}6~)l|aMOt| z`c21yq1Qz*^qk|XmlZAFGlLI4azq@_0_p8KczY5^Z}YXDJ(Y*o)kPNWp)xKUA&La7 z7{zD^W)W(4cl}@+8^GM_so|mV)TSd)|Btk4lK+D@f%_vWCRqSZ6Xq>t@PPu{!D;RR zUy;7+?n?K>`mGI-zOkioUe*fkZ<;P%k8D5skvett<)xa>#z|euGYs*1`K&UH#aP< z!Wxb*8%s+aE{?=MSRMtXW)}N@#9W0r6F<6+3+@k}8y^Q(TOZ5ne4_2@%DIm(PQ`?N z$X}L~wImPoNrvC(U96)u^BiHP?TMYj+}zH|nNtJ%ipgT;%mgQ+s25U_0`=?_ST?xFE5+y(J>Ne&Ler!=EgfxG~QC^3EW6y?afhfrY)<6Q=eS2D5J-=f9-0 zk*MsPcu+3G=+u*l{7`)b;napass^Y?vcQRkOj=BL2LHS<)VWuzFD2Zf7^z*$AXu&OF8N;V|}qS7H1C&(daZKCt(KJ6y?o zv6>%ar(e8(rsZz4yqEh3qZk$c>^;QBI=4{|Rb z*R12cu4}5tT@*gCxie=6RV6!QWJKu0p{3Dqyha4fwB_(H+w)5Uzk6|aONPl2r`D(c zbrS322)}(aTPcb^LHl7~hBoHxqTym*VViIcM_j%lr+^58{{$8c3|S7pqrrj@&`sR&Q@leYL3}gO3Y;OAGlZ*-!U5 zr6klMNwI>zb&7_iLqTLNM^A3yvnLmAO3vKTRlccYylB(K`MuXRd%Sd4{i1`V0c9&nPjXjQBrnR%TAY>AnLBrXOKep0p{5xF^W!SQ>$lIL z*xz9bk9L6uvX&KO%uA(K0bYsGHLEn2%M=_Cv;^sFQ@UpUcI>t}q3{f9(Db+_;`((` zBw%eMAVJD2LP6F4*3uKTw2a`3*HhCH7Ceq_5X->O$~$@xBkplGp;Xhvr;ck94_ z6J4~p!Y(+g6q=J11%;HeT ztLx_-RVR1cT;GUqWWkRgD40+6tT-YcV&{f>BZ0b!0E!?0@{<)^NLztZjMM8GCD(E+ z`H;o4J0})Uo^?!fm((3QrsG?5wc$t--oO(n=LPSxrCb+6OJcfId!%-n*7|{ynV~bG z8i6X}gX8HN=QCy-*08WQ=g=oh`0jBy>czP3e(+ic(_vEw@Y-9)$!iGNfF93*Jt2w( zu7tVE^-=j|h@n`FL`or)#$rN{PX630*2CP`0F@d_rS2+-|1nbd!&v3xOuBb8kbPhU(8y!*L2F z)!D~KDA1e{k6bsKv1diYj+!91)VfuLUE61vUN`Ok;>2Sg94sw8@XP+QU$5c4hlj<} z3-=V3?zq~vXYLbgszXcmHtqUoW&MTQ2X`I(@qGOnYL>_@BkSlRHa{hzSrL#2mLbS` zHSw}X*b?P0_9$DDG-9nvOsy=s@>CJgMqB?EvMO0R&Y6lQ<(<>;L>~sQiW7hx(kvJn zGX2SrqAVUKEWpOvpt=qIc%UH2Wco%%_7JZ$P)a=l0N$g{K)S2IOkma_llrJ2qz*H8 zIYcf0OL1Y{@aa{59wxsU7yo{ABxhSQ`WRjohusgYwIB+Y7fcQW|WMPsq0^9f|!;ktGhv+6Hoc;q%(vC zIg&ZXW3@Jv6zAYGh7uR9&8pnn5@tA*v+iPhQCGr8ttWeubWS1*M?>$mFBCVePo3f| zQkXX`PIfc3DBSkhiJF>GVs)*{+SD^qbj=^Q*m5t{VILaqRDGy!u=a4ZO@S2bD6q*~ zUJL&tCxdw+_`w>aHASe}=ZIr~pPw(mf2PJ}3bl)~vnlfhGmTHGkAlQy+P@B~6*599 z!IebGL-EMb!)LR2gd-l6i{stX8`8cRB_|i2=uP%5Qs?K2)ior)uqh&=aCt%N>Eu&)CloNM=c1e$_!9wzB!=Ei-^cm?TbbFM|wd|p$g~yFa%#N z-b2c0P%t<{o#Pb5%;$8;9tq8nIgWrGoeQAS)KLMfz|AFkFK}1YbCADes?}<&nBX%D+z98QEnw%xaNBK~(N&J)Z zhs4f7yxB*dBfsJ1iK$OoiZ)yq&6I~v?#Pqmx-Mm&lU2d1_$=aJ&gdaS^+;EaOGvN^ z4@N1HsUa|(aPoBQ?GbvU)=7hdF-4EW`Mjq`!Upo>++TaKu-Ye}wm7vpX|(&LuG;<0 zF_Dd1#oxcyuy1}uQrF?ymRtR!$(?yc%~7HA3SZ>pYE^cK|BR5J!q9m;i~Ndv3u78* z_-A|PwqzD8D-X;a>T2&kOHD&Nx}K_M(ppT;Ne(;zFzKgY&IV^q(0 z^VZld>xsYMqu>MdDgW0R1$SMILggsGMsruxIK4~S97J^=hxxD-pIW3q{R1+@VW@XT ziU&|F)j-lx83Ft^EtRj=e9t{Ms`+%xN$DC@yeK!`Hz3L1(G>J0#R4=xEE^ptQ$5M^ z9vQ8B8t6OW{L9NQ9%^2Z8;JZ9@KpuHTr%XfkW18Vp?q}#jSB-%mWl=t9(RXyT@YRR zMYjvlqx_w*ANWlAI`{tRR*Y!hhGnXdM;0Hl^Ump#ULQ^+X#p#uk~Au=CmXe~IGHb84hM?QZwY(C_5EAm{- z&1Xg=8>%Ce>MEIfmbN}gO4HEJO7f0zDsu+#rz^^Y5}<{S^F|~At^kA*%vrP%imKL? z9b*nQOT(R5dU;Fuygl{BOUuIa^~1`R6c??kOK}?oBIK;t#Nt2$qu`>1?0G59cb=!Z zLs8-4l28Nv(30Mw`aSa^#vBvsvlEMgjSPZ{lCm2U90``$fCgp?17?jVl_<{kSl{WMTKHS!SY$blUWolp$=SwW0(t)DDK=< z9{o=}l`KcZmxXm$UnMMKnlnatB#g-nXpS!`yuq8bd$kyEM3veYrtKdYCEtzpTb$UA zfMqRz?9QzrQAN^*@VsX1*Gobo_MYLbn_B7X6L{Mt98s-?pN2iRM2OGR11tv5)MjAt zFO4pbUA!j0|7z1{bn}edHNDb?il#%>?v}G(*g_J;SF5|CEZrntA*Z<#Q?+{>2Z>z4 za4C7fl9<%PG2?AxWb5edm!v66)21u1M|@e)fhWn~SDV!C4XAdHX;Uw&6v%j}cCYBb z8r*4_e73X<{4x6Udn34jTU=)}tkdqD1VQrRviWN3Nhm>sm>=n%`%&~JXz0BD`MHs> z?_XCU0^$w&{kAzzESRyr5iy&I+W4CAu-X*79pxNrF!mJv-cE*wDU)mU z)AW1qd*eoqV%&Ov|9U!Z^(c4ifnhl#c6hN%7@Zf{=uE$V$jEr-N7)sR81o?m;ysbm z#D9*MBt>V!E~0Y=_1yxzPo_Uj0Af69JA3Nl5;}ceE9&4?ralv@RdaetojK>1UT;^Q zTb7)%>}>b4mpjg)2MH35`AgAt$qkdh#DkU1=Q|Oy%n1Seu=LgXB%RIhx=6lwt_1YY2AmHAcJ6bX|#s zg{390FHm?org2K1W~R|o!Nc?|=A59~JZhSy5I<>tU3~3z@-BIGRQ$epez1Ra4k;6# zox5v(WK7FJPS8xyqs||XDh~=QjT{P}y(}kZX>}Oz!T*dZxX-}Hsn0tQ-XY|GgogM6 zz|2~vwlV-lu>{TsM#bVzU?w!q#F|7h-w2_+=0WgQy1~597%f(ab}=>0HVsH}6Wtz$ z%uDh$>&Y$6=TpiU6tf=**2}$ul1+|g%OVa-cRYY{RRLAPoUyx|Qq59UBuBSS0un?D z{s%(#WzGBKD#cklEAN}cSzAU?vd;n~dj>_xcFtJLHG>H_{SSaO9w$Vm*oYmKq6o-c zvXlxvkzu63WPLwf(hoMLpS zM5Z%lZOtMFJPRaDY8G_1Oy7n8khm#3LyzDvS`8+QWIu$bj&UPUYglr|!=}UoUr9EJ z0Zp-7j9r|yDA49oVvdd`A5DP|$ATvHn)Uw$51M;TLa8_+KF+jd9x@QVG?_vH=|@i{ zYd1pACA2}PS2}ML`5yZ6DJr-p4iAZfd{`@>IN9TfH7;R3pioe&NuaB~+6ol-) z0VPO4mfeEWMWr^*G?m)aMS{FZJbyB_d+#dm^LS$Wo>{Z@btJvw8dn&c)0x8U9A9*8 z82}gKANNQq4@+N&Sk|3STZWq>Bb$d?1dltPrZ=X$#rIUvAGckpx$i2K)oX!D^&FEY zVbV+uF;hb}Q;BN-uQLU|=f)01HLNcxTHg>o<`9zNlUN(WC5$$nY0qC(6LG*PGTYxT zJJK1n>RY?LHaHkxxDPbGvE?yNVe@9tA6K6&n|jqLraTsNz55a9V?tpAJ;;&TuEqvH zHZgE;bTlv|YJ*9&no=y|1QLycc`=nmmwW2YPMaq?KQ+$}u;Zf>zNAN!$$edavaRLJ z0)@@(bt&^>q84UxYd{{n?8Up^C`M4DOKp`{5XP|QyPOwLCx-Z7Sx=*ybzti?mRutr-32ud_;pV7S}A=BVn3(`zrUY_E?@JK z#6t5c)vVEg=F;d-;iCK{rr-$FIuQUR0~t^K0_K=C0Cv!f4u_GJ8<@&baKO?ESm1(j zC#*F!&{+8e4SG@hHSU6#c#AV$K{~|@`EpLUt4StTUy@thM*B@HkJCN`jjEq~GD7nq zr@~ki$JvY$xX8Yvnu0 z)sHpLl65a~8Vzh(0IF+95}XU;Lqk5|0s@zfLiRI!`2p{VC?$JO>c5XnbWiKtMtI^rmhyDZGwihdiD754oMNWq=*3aJF!NJSX)z#C}(idqAmc+>m^*SmQ z(a{W@8l9Q$zsbsL#w;)`8kz=fj5XsD%<|Xaa+31#o)3q}?+9TEaqbZI-Lk_vqxcBH2$8Q2Zod;Qm) z5x$-1v>)ewf&CdIUmW~!@pZaC&U>cpkGSmK{*3d1gM-`vrSjV)KYg#1v>$>p0eeN? zm1fxZvTBv&R2Od#PtTwrrEg%MW0>|`sawT5G*n+-A@Pp;-&NW`?fGRY)lc3<;dc`* zRioVTpSqV^4Q!N8>8ZJM>nH9jFDntV`P0gcpvzQQo-82M%Ffs*HB}Z6sGpUQk)E5@ zpJNN{Atn4%ZBcIuGqV`pkkp_PYR#13I_fv?MO;~A!WOB^4B(SRg=kW{r=_m6cYuU9 zpY6{kI*xLfUd7mN-_I;WxU*?l)ZftLqmL?0z$ji=5@6WXZ^OjyaJe+oAhCskBmOb#& zc%lE>ORptCwNLb>rS_d{+P%HLFWLo7EyQ1M+ueO`SxVZHqqSkBnUimQpvf;`ZkmTr zetXV#)bXJV`i>CqU$QtizA@i#v2SjuTa2HbIO3JvlwPnDtzW0TwtY~Nw|}y~l~8y3 zv>^(oEzx0ZvKrAAR870aSs3}XhP$e!9_7?lfbW=gzfK0EB6UiKUwspj$S_TN6${alz)Y$}RH}^az>whH%6n zwdpbWt6I1<+Lv$&^4jru^-~>Yo)+dFW{aL$9Dk=Xq32Aep|zozi%Hw@t~k64MMsE) z-eV2BhV})rFm!KKIku`)hBBL0dW`QohUp) zX6uX~J+4+6T+o)D)m9Ls&>U3+6|@UQQ=4v}`v$N;h9au_GBf+CBPQO7oVy~2+F?}h zM*IVraW~+5wxpJhiG`lr63GC(+Jc3rSv;L8HptMkRkC;~WIXNq#yv(j_mZ~!U}EGE z;b$eT+%LXDY8v;83bKUvYhIf`PCF*Y+*t}s;5CtA95IiBV1)Isz-dM&B3m>TurW{@ z$+Zvx6ztKGWdiD|lvslSI+oE9r^316&QbQ(a$8d?KMzCk{pG*5Yoe3j`?3-2x#ZY!d+d@=?)oEgWdRJ;SPRw^SR6fEreHW>cOkN`XS zeHAUn0gGpR@TV2J+5pO$J2H)+7>PX$V6mTv<`48b)(+%kjq&HbP^-X??qv;p`-^HH)4Y_ei+(o)3(#74Rli2?x zPm{o`&`#w{b`H{H3uF0LM zM_>P0n>uQPB9i;pEJ|!`TO|$^xO)zVW{b}z)x~IS+)1Oi^)l}Q<2Ny}W8bXGJ?)8) zd!^5g3NDOubBilfyt~FH%qJ?w!fR2xPq>d*72}(us%!EoN>vZ1t)H*CHKQ}%3wAJh zMl7f1acuW#H5Whh#)9GDIQ*!&r9RyqORZyi1GSmGkh#yr(@%j4R>MXl>o>yTuj+`J{$8 zIE19i7q7wkMaAkCvN*;!*M}`+*C5px;ZR6*qxSgMUsKyXV&UP1#AD0l>cPQ?rbEr* zr}#aBn(kkJP0e|?)W&q;K|g{M#C#%3wAlq$j)g{H9jVes`8aUzl|k-$dH{rRGSw!C z(WG%{5hWT#BBQOlwWo3lb=~ueJi=QNCcef85b=dmhLaTZXt#-CsJ(qv+yBLN!b_eL z`FZv}ank!3PJJH*-iM}1z$ftk`##jCGEU!z7cZVt{uJkmJgdEZ&9?uOgN2hV5eLw- z^QO!lb_k*iHWtRb+FK^`B6M3Qmt$Sd7g%en0cj#XaM_{gK=LEm1MsTUt~)mG%;tv3 zlKpS5JpT6qa-Eb6byjSu4R=ebQeVEl=}2PNp=<58AN%6zxl2AefZpc0Yfd#x6m9FR z2+>S%fPX`raUF|0Pz@Wiur@bRs)5AdLFi&mn+1(5Fu66YL<&wcI*lZ%H=*jO8--D4 z(>k+gVU&C3q6?$zhbjlBbUoD8L*;Dk%0MSgLUX8kAe(o~lOdV)&YL33mhyC7uJq>ofXl)UfHiF=m#$)!`Di@(bx-fxSaW)sh=C;mjd^Y{R9-!m&ui!YNMXI7jc z9cNaZAv465Gb`}?i6-IXU6p(Slw*Bafh-6D2zcc3G6-HOKr-B|RWa@=+YdE=JV`hw zazgI4ntatx=QhzLx4kTAN0_B4^<+~w0DwzV6?FQq3FS(iV*i4D~KmA$UnZB*0T*};cW-;RdPvwmc~J0 z*T$ggq6}E-oibS(f<<(iQU?Pv?KjJ5zwy(4n||l?-=^O={VAHI-Q5k1-CaS^QGr~E z8b7PM17jis$E3^nG3e&KA2ru?cSimZ$DLL-=P|OG}w=Qj&N>`-A3l z-4DOi{pqUq2jM&2pB}ge>lGl*BW?0qkT?{pissKcqs9u+g{%l$(4MCXkTOv*a%*pE zrFRtiop{@#QIcGR4k&yKiLtd*YEEo6TwcF#lhN{e@_{iayzO8tUX;&j;7D5Lo%RM! zyq-noHk$8C<8L;Y?@v!O5ua7?CS>M>&sL+Ajf3XMQkCFlppScGjJW zd7OA9hoUQ8K&JikN_)Xa_A&5ORI(8gM$(B=W+tq;dlkAR2o6{?LzTpo5K>zy3y2Je zq@Oz?9uzLiU%}_Tl|CmMx%(ITIey*b=ZNletF)h+^!Tcww>rukQjHu89ige%+8P@h zNVP@KQQo0+|NrUgRp0iJT)q5UDH4GG?E=korPwUU{~?@Q#w*Y@$HELsx}>4sgDj*I zAn0%mDK+V21_#p3iEk!di^Sb=Q#zkNbse40zgW`y2mW2Lt%>Xt-+8bBI;ep5=ksL# zTRr$zB4ysGRx&=6aG~A~kSuU8pxvipGTlOg#*pSh-G34P8yJ@RkmPrWFNh!guf~0o z0I_6)xMd+(!WI0427Z$`v7GD}*3%1h&9oyIY6f99^2lFgjkB?VjCEng03F9y!E5ZY zCVgBf19a$@q85w?a$yfXo+0m>_6&uZHC}t*8GOdrB_zJd`$T{8zIa1Qf5S5_h@T28 zv9EHREXWohy;xdI!2x1Tys(079+tqXkTUzE+9<$?Gkl;Y-rxSo;gZC@^SxYYXQm5@ zY&wKy?5v_=;@gp;{qJvD_vW^|y>Tsjs}~$@jixgf|G>Wt%g72@oHWyj5JOuB)RDpl zqmW=UB?N8+I1xZV@Zllj4wc#}>c@i@5-W0<=S&`keEtjj5-W6xgQ9P(zCqIWXhjdt?rZ zdNOUqgNh^}9(Vmsx=Zf#kh?C32joD>5m3yEzv5Yf=Ut8VT(4!H2KGE1gRf_w=FK6| zq%lCp$Fci&wLU`X(+^3t1tz+ok$}EzhJd>@1Ru~->q((8Dg4c3VF+?tDQzI@gvQ*c z&mFm!${6B#9Ayr^`}cERUm)es#+h8y?JG9-_q5^%+~ys zJA7S8*E}Ykb|%mOf;6ABb5F|84GmEbCf<}=ODkZWo;ZtF1#X8-zG}+*P8}7&{VCP; z4sK$Jt4tcAU<2@S0S;l{gPnp_Y=oh`O?Jq0c z_p@dH+<*64YwI=5-e)%c@l5^k?A1@tdrtkzuF{!%Zm9oLxou%gT=UkMU#|Ye{!-Wn zi;x$81c;mW-UyyF`bNy$V~!KN7IFn(DNTDJs7I4m{>%FaD8ai>7dUfCUfz^lX3bk&ud>%!HcZC21*L2ddf=TZ)5J=qB4%d!##=N`QEr%m1*`o2WgP zzhUQ+P)Nf`IpL4H6zufYNOy66y#Fpp=16fwv5%czeehcBx4yBl9x%q`W=50jj^_XA zcpaHm>BN6Ck)x>BRy)0e@=UO}@9ryk;$^Pl%W_*5g+Z@W${)ZRj1A;a1(ZrNfu&F} zqSBr+kTK02y_adpeW6Jt7GvbQ(X}Wf5)#1DZr+!4iB%Z{PlH&8T>M-% zPg#{_tV*4bh`WB&u3fTpmm?rcwepXjtkGiqK&U>+^9BaiKt;9|fUf2!Fgd`g(Mi z(0Dg^BcTOrwkQs?Wo~a>?#9DvgrU(2SH{yfNcF)eSwnsB$4e%5j*pYq)U-JGUcs6e zhY=8+y8D&<8Y+PFfpmoYCb=^q&dv%aCj&2(f`%CAoJ}-@11>0O8g$8N7u1Kf2)XC! zpm{nJa3f_Xa#7{_i`D$;{eK!RE#H4rz46V>SrgH`zW2KpLbtgpIx-^^qk&P9g!2i;W~Mjgod1y)&lS?a>aq)eD%0Q~|yVGnP17uU- zgxcEZOXXLzCY<4PB0 zI+KW|!TBt!h=R^M%7 ztxxmk4WNJ$`)1-dn@PtI zGW*a1st+O{Le(VAzLCb)4n%ESH_vj9ZG}3cX>~m1x}lRg( zA)R&}2>yZ&WZ*B~Nq6zj!7JWwzqt8{n8Jp^XfksH zQ#A^dB?dwl@xP9^XO#S73>_0(qMS`lvy9QTK{TbvF9E1|m zDcL#hwiM3PouGlF1P4O(lj%bV8TQ($PR;^+$tGknCWugaga5^hH3#CLp zA+%>QW5_h9uALQiG*1>2-ao4aoanHUEY<)ZW%tM*m*uU$*jD+}W2K`x{S^(5&S{#wlbdS*NOD>MV?uX0oNS?EvnI{y^A?vH08$q~9mtsTn1c%p&0{L)wv$r?{%$WX^XU=1L`lAMYf4OZL|qk8>t)DeD=gA6jy>@JY2LU zFKF(e2Ag`6?b zNYjXLp)KvWT(VtU+%}MuG|)!suWLS&ur0WMY~P+gFOnmArnaVI7OrUb(EHNECN{DC zaY8bdm8!}r%T$q_-O)R))qxw&zx$PQ>;V4*)_EAOH*q|=eQ{m(DNDS4QzFaG#pqz!-3_{g-jQAHuWw+(4 z5M_6H-qD}q3OAgtYkRy(hoxGz`e=Pk-?hhbq$;2d^J0tpXZ#~(VQFakl7r%ZgqQE6 zrPfE2*ycTR=D#_=;mw6xf4(V`%YPEP5)FNXqNKP1(+?8SSEWMq%OpHQUmb63!jwR~ zBui;z5>b~v0}cWCE!l{?5~Tnzh%Lc6;2&bp;H=K~{`V!X+}Newn(C3axKuny7A_rK zmF5y((S^pYh+EiwmDF8qvEoFdzh_2pVr?oS{T?n?90E8lZ{^Y13-*_B%ZB6B6>&8g zp2Jbi2N%RPmZgQ685P?`+QE(sxt(3%mOV z;VZ}pXH<$o^GZ%ia>T0|`}=!D7^3UAR#1e8u$PtOKbjg%dQVl7@01o5)07f}-W(R*Rw3S25Tng6&-c!4%i`p6-ouR45Z{>CiUc=osN3@nfjCON z`(|b|Wqb3XwOO89kED^Y1HSoT5hWq}3wA9CpP3qJj=&1Qt-mI!aN<|co>_RJu<%$( z*Qw4J;t)SKtspeht1Y8v257fTe4nRToUZWBQp8k~__&lnBOGXVH#b6H?c?33fs5b{ z)p2w?N;C9y7Pqvccu&T98N#16Vrp!E;lWifgFK3C`}kPjj@0Ch9pX2{@`>MeJI7UY zX7YMU-of77#QfydB4Zc(Yc7t8)FqsKO<9Xupe?s-D6yxkq$hQ#_Qg%H4Kvfj%!~{P zJg-7W-Kq9Xensi__D(f(90rQ}(#4wnbKcC%3!KvsJw$b-W#TPZ3KZil0@yythK+S| zb;S-Q2N-B~kD>qK-KdcON5SOQ#pn{Duy1tt9`aTwuGhWQTjCGzRswvqwNm^7$}L4f z!Omq1`htz6v9I@*BWVT>wl6!{$U|$%=X1(h@mgdQyA>W;5_Etnrx7#LLd}SZgKf&S z<*qJmuuF(Cx3tL4GOLR&^#9lNx5`34qHgRKtWZMQ zANo@F*Jn`WNfO#q7z;HNw%m`6!u?LOj+;Gh8w?S12PX8aPweajg2GI8Q)n zt+;b0q$t)UcHyq-dyFIh7;^IvJ>Nck#dEso&#ki>uMt&T3(`YaA~DmJl*>O(4PhWVeyu3Ps?vFR%REvo?Cs6m4b`v-c$%uJP|K0I$t(ih%wR@Q zHzRc?P*RYlrd>}(Fvri;-1HseE{@p-rl?|P(e?Ad`hnwQ_8urNSyL0i&4;V-eP$YH z7cJJ6giT*H;#;*HTEY@&Z^z&@3!w253qu6ry`5QWCoB?`gsiWVwx6ezwuKnHG%d(% zt>ojl(Y5a#Dl0qm-rB{_r6;8Bo;7FZ{K&}pJLlA{%j7s`+bfnTj@$IH_1vdE-#`=_ zK7VR%V8B*)&)n4~YsLT6J+&qmOv#;01BTwa};y!32${r14X;APnb z?OAS~dA;k2|BT4k6>;%>2^~ALXARzH_iw*(V3vpLE=QNl?k!pV*;~3ZA#J=MS95W$ zX~zcC?O0hEIn&G%t$$rhR@l7h8+T7P5RTvJCm)U5;J{|Jbi@Lh*M;5C)f_zny&KZ?KJ@MzFOODlnHw6>xN}bXvFhu!Puy%; z{j0+j`8$42GGAWy>h^-7J#Vev{oU^Ca}L$5I$EGAAE+7S&6c^Q&dZ2v$n^!lRoBSE zu(*yVn&xb;53h)uKQN=|P)k(f3#)QFQ-F>YJv-eKT6L&6L;ft&s$m~f?O8xP1=je9GTHud+rMFemBO;wxfBac*-^e9%se6Yc!IiFOfnKkTUuLKs=9umWm<-g!AdVQE)%2Co8qP;2W3zoM9y;BX;qS<;3T zL3!z0OIKnhp@I#`giC^vmuX)8WcjFA>FY#C0eAy3}m$l63 zqM01cyh6fhZ<{Hz11$OE>uD!SK9?r^!&9l-)`sTg?5jL|?RqZj6zg-$@3KQ@KiHOduEpic%BG2#{g5%aQHh}B1GKW#{L3q$$@!W&pXoG{-~ z07L50`MT(H*t3R5x#9^={*wt$`MLj`Q0YU>tt)YUHHKK85Z>ZnZY*`yJ zeEd=^>nbb5n+ppw{rnQbjcrZPCSHyrD(J!N3%*MTGmLndeNVT{6ABfov8mgek|s5A zVDF$}o5HDp7d`qO`A@iqpto@*dVB%QHfO`!xf}Dgdgew)=XiML#6;zIa?SWt)vAn) zm9y^u6PcO0y0UgdE@?>kfIQJ&#M3OH@@)Bj&wmysk2>nkaOo>KzvVM zW`VqUIT9%J(ANmrX%bSsDvIz(5s%{B#XIMSs*lKbh#&GVh>M6Z84&x#4zj@*&#A_9 zOtsGe&(YJPOhgKm^buQYf+{U*Wi6WbNj}F*9LAMs?l5;%eomp}04>9= z)Zm^|>`H?6N+qs5BOk*(@TjnR67Z>$?4Bg;m15lU1bs4xhzq+XpWDanXIH={fL{eP zNNy+hrM!gkSDI1iCEJEg(Zb?fN!`Dom6f<52pA|9^5vnlsZ!(%D;ruTAHnp6CX>(+gPkKn20WG5L> z6iU3yTtP8BrPL3QIE^SU|2%4A`0~1emd&7b=Ian~O2RaXZr zTOU`Kx-7D79DIY$6wf^J;4`&6Uh8F4#kla~2z{tuk@Qf%hN@Wp2lJx9`e! z%Xx=;%AI;_=O1_j3n*L8ZRLKHkAdRs(V5!@DHDdAIW=ihq*w{%h7p!hpIjVG{H8mu zUZ>Yu1pDMIOfBwBJ?)him{ja7mua^0ZsJx_zM~~RWmCH-kdN~!ywes@2~a9-;D!lk zf>MdN3s+Vk&;9S@B(8jhE4y(8neW^gavoQHr@fLQZsZQrD_orRN;K}dN^2IIOes%@4#jccyPjg?A zR?O#dnQX!VGc39L7I#WkgeylTuOKf&_A;&v(<^lMHGd`}ToYpRklto2v$Ri=-fZWQ zPCI4iBa406#=Dpgf3?@S$D{Y`O3Q)gI(<5AYIj_tv{a(`iX72C%}mNu)yhD&=jik> zb_7WoOjxJ=Qhbu3(qvK>jr|ygZ8R?j!R^HS=)yhJov7 zYav(bLpB&mRj`tWmeP3xycopN28sj*9V6@JYat(1k1n~EY?g`o**GK|el1hs)Nogs^e#aYBnonbJQ6k>P6p%}or0w{o{M8H2|BRqufOi;^Y`!T86PFfPrOxI z`xYOodGY4xWCxS5Om!v~HB`Paf5u}Kx4)F1gO9iWXFZ%8yjgkY&6rpVeST1B9F4{% zpunf^{#DPM>CDlP=StZup#t9(bk*cMu%a&bcMwHKctGN2Yz-~Zn~x`!C=9|E1nTe;&&%JsT_%6_b{W~4aejTq^Wv|s zM6D3tiesVr_<8vweyMu$`$-zO*M)CpwfvB4BL*i1P2yF`mn4j>!rh3#w zfauMCMBW4ELGrNt3FhzcfWN>LuoUK2dRBVo1_1DsEUFX4AL=JH_jG4& zbE-a2Gu?#S#)Xvh&j}rKiYbW_Um4?H)ck6cBSP+$wf?av$2L7VWyZG^IDmDy%EMWbK^Lu$r}MjWPF6jqxT6v!f@qGKy=1ATar{d1amC~t@L#E;l05r--totTo-IZrMQM@oKe9Q|9HgLBYgacnASR&GvIB?z1T%9%EohGq z=EWp3r8lM@E8SzDt4(n#N&z4&EwQ%5ANe1HC+dSanda~0U#(lS)3>X||Ac(_+!tMQ z-+P|?^`&5qRj^P1qy2fVb!E=-f{L|y!!57xSU$c;^QsX84HsnYcfS!F+5DtDDRhVu z1m2Vw8uLh^!zsWK0O?~j2n-m+?x32gVra+?1&hE3B~j$P>s-#uWrsf45HdIPYW?2% zk=tl90!^u87ih|=o)jl8*Bk2X&f4_G0Jrplcya^8G9qi({NXz*}qcsqO6$6S|L+F~WERK`22iyq?CE z`>JDGM#@&Tc=9^uaN_s9y}v)N@#GSYWcSa?SyxV5D|WxOcH3($n<+lKfGz-kk*|l1 z;12I)7OII65lB~s=H$eM8mnF9CV6=_COA#hOM+Gx+HY8U*1sAC0qg*+`Y+WBOm?^U zDRzM+)Kbo6Yx;^=vsPz8i=VYJV{2wkNQ$dlYDi8xF{9=HQ|#~8`QMUMS(mJyteKli zsdk^;lO0tSQ5HgV`&k;_pQ5)D8inQC6qe{NV0GEv&W?B)L2y%@kFr=@irCb?L%e6y zy}&TGp8E8-x0;6Lhx;NFkuTw!ax}IHFW)?z+V@lw_tore zjgbLmJ^6kMKm6H9-MQY;UsMP9CHh!8#?Fk@*k-jC`eHwhK{(u!UkAUS9ooaNALap} z!TucYXyFB9ywpifGo5Vgt^njz2fgDtHG%Vy&1 zq4FnHrAdj!D&OLy#1fTw^)1cWvA4J~E&$T(xAPVruB<$~aNfd0m6eBnHZQ3rBBCZ~ zUSdsTWDPNbWL|V<5B#}1g>I^wHX|Mz3pzG}CIy5LeE|s^21w>c4_)|_0a+^rUYIbG z(8kSq2Iw6D!37(h%4W|f{?o0I+p4>E?d;NA*f(b{KS;KsMs$ma5Q?ZKJ2Zdh>`TR1 z@_H#o9D*F!3%{o`G$Y$o0}oqU!No-B2(z3nly)<9%%4>y(cEz>76DTF=5TT$tMUI9K9 z%JL@`-2GlrCVyT~K~whO_R)LE33?CieV%y+=Hre$`ElHlAk*V^vOA~~h~9CUJq@K< z+NV`xWDUDF3Am`zy>8ljYp0HkRcnP;G6IO~e(*{n+sj`jvxstqkKF?OK3QQLI|OO) z4hJLEle%n@kr&~@(R>LhNP)wj4B%j`VI<{D1oK={&B=JZ(409faZ$B%Ld?7?)g_D0 z4O9jC&s;K8qrSK*EoIrW{e|6mUS7G~1x4NYUUHx84qhzGsM^FR)0Hj$Q+S5S804AI=-`yzvIf9Tr_cd3Iw(oK zEUk7xtZ6C+vY>=#?tH=jV#0(IHA#(Jz2<1+lEFcR&*0$1Y5R!_JGi&GU+=`sPH|iL zzdYz?SZJ67hpDcDtB2)qMGH-t-_mUIO~g|3�PJ}HG?Q&#wR5zLv9qnRi zs4mLL2$16TED;l5rbq5sR<90pk4NMl7fd+0Hp7EgMI{TA2$WKC49X#_U2q|po0rvp zp`-oDzLGFwezAfP*ugo{To#NstpG=R#Y9jxyi9~=c444 z#`anVfBU)Z^HNe4?JUvWcYxiuUwdE4&P6FH^V;Xy`#aROH>M;n+6ij8dsdvm?Ld{D ztIX`OqZL6(^moV=VKwQLCoUj%f)W2ctn~<)BdYv`egpH= zFqeL>01%qPC*cmCN-e<4;Ps7&wgHwn6fCVRLRDH11^>vuEF>Wt(n{u}Bvj<5tqdU{ zE7S5T5>jeP(gs2w`4{f>mm(6l05=!k%*2->;#Dqg0bHi&i67}jRc7Kfb}^7c#tknU zl0DA6bxVV&GE4l!W1M`oAcPNVQx zRBvO0YWd)uZRFK$Najl+Xt6`aDes(nx0cM3{T-f5g9}1GKyYczDD_lm0}!)v@>{&U z|6W{tDJd^*X?7{KW#Nkv{;jZv4?CaA6j?rp_s7VPH2m8*S!S%h!7>#phR6L@&Bs|kQQfA>~y3xWiq&odhx%`9MzC!$t9f)CJTtx)N zr4fH0nLo5g)c$!o5p}S?m9_M$LAJKi1gHeT9^l`USxmAM`qJnR^N5ug8Lc?F$Suv) zz&gqyp(5~G%`;evx(mzmw&b}5Ezg-;96D1yy+OHviK^M^JY>1X#d8wzXGHbAEK zqEolB3ewKqPdjzGcoN+rUvzQ>A3pbouIsef|3h;Ivw!qbPk(_&Ek4e05_G-8Sbd7P zOg%qXdk-MLTcS|R$;sFl{hjUZg?1@RVib@@R~n2QwwjiegVwO_5BL3$b>bDGnf*!4 zs7rTHLaG1g%t7v^=JKs6zMrh_OiB*qqv+sYE;F+Vp9>mcG$VRH=>8F?r$SMouwXW2 zm~5deEY&d~JavBk978q9%1c$MeS!?tE@rSZtqimss@eL($VF`ZZi`i>eiR^4VJl$`c~yu z)wuY$a7Q%bS4TZ!Tx=taiA_#89A09KXIoqK?gi0tE&JxId$IBVG56+iRbAH}_}%B6 zdoPm=GRdSO0y2ZlAoDzffFn2pDxeI4Gft?9voX#_j2g{sCXIrbY^Ek@o8+Zw``V^y z`b&Cm=TqTIvpyY@c!0BTy_@ALcP_xh4>VcoUYUVH7e*R`E?Ok{}H(eC$+N+Omr9qON<=bC;&h%BKz(U+&vCYw&1#o{x=! z_*llCmG)EelHFb7iW_n=J7-O;J6<1An-ot+_Ls*P$oJEwGZ;FNp= zcHLaH)BL6JB4nCwnUEY%KLUyYMd5r+#Jd0)xc^gtYR;92SDi3CVlY(05Ofdl4Ocfmh!J>p&Q5B-jQw*F23J$`3~Nb&R@72tw(fxI*h(HZ{S zgf$d`^V)23!`lpi;D5spZk}!)c&`?kl`6-;eR7@g1z1v)NP|&L_Vp8EEiZEBVJ8S@zYz4|$Gqkoil+uGVJ(QrB32mLyf8v219Aeb9_J32bd z_qY9}uBCF;^7^FckOa$*j<8j!_`mx3FJ#Y;os<>D6v@G{KYC|eY;4?WcGS_q9F{VE znYwLL(~>Qlo0e>@E6&O+F3HL)Qto4ZU0v$`eE$cQ`gTXh+bXW&cbH~&-R%1Qd+D1S zH*U=9kTSmiUK!Wb)iwC#;2>Lb$<94vSjDPCn7vz@*vsSJ?qJMdjP$$Mbf3U2(>EXO*_#-h85dXP?VnPvUVH2@ zOQE9Jolq2FbGJ?G1^udlr;GA>$ohelE`2tbLoL}LV6S{`0Ts`_!uV{BqX zoM&cgNkL!Xj(Kx7=SIy)eQNXOR^}9%9q$pHUYcCEsvu=qQG8up{6UPg9!K+6vOD6# z0h#U!Y9)~~-ei!RV9Gyb7=ju^oZ_eT9h!HN1!ZQ2RrJ*ZzhaB%X$Wa{@|G^LJFABN zF7<(8$^bh&=ZQow464^~81eiQR{_?j0m28GYP9BfrM{wVvu1BEF4{h8_O{}_iD}`u z4KOhyJbc;&_TKK!!$n1hJH>z0e_Y6^4GF2uxsWp>G;{{p6*Gpumrp9EP|F98+->~$ zAQofi5VUcmM?SL1UOb2cKZ5dXoG#8ne$^1a2tX(9O;0pM1ZCGo7cEVk+EkiQmlq<( znW_)9Y<^_=f`RSJPcNOCy7%t`uJ@-l@2h^hptUG4uQNBZGe01$G^8PAZPoJUmbPDQ zE86k$0DZ^hgWp)%Ig$Q&n!nrV{**K2`FB$h7UyKlXp)N zCEwfyvhqtyiMv|6D`|^wzUA-o#jKUbXRm&xJ-y>ZUEPV583WT=3JRN3Qd)`%no}Q0 zY2IHu?H?EN)D_*@nu*=VnStjc%_h7`4wd^XC(lc6Oj%vE{Fx=~mzoQE9;&a|@z>{?K;IV7_d4bjK$7KeYb$xV z*c+uDeX%gPU{`+>Dh}^K5-2Krqq9yEbC!H47pa852l=lkT=(pvIsN;audKFvcFdMV zki2h+bHn z-Iy}4tbfhi!>uWi3y&^6b!zR|`5~EIC+F54Tb0|N+EQ53k{p=7C{21GYI=NPWq3$Q zZ2f`hp*8EPVw-AW!&`P!I0m<9LD4RguUBF zc|5{JSQvUCh`fpsI?_rv$qP!6nf%URAA6#-HlTg+toHbUC9myS|LpwWyt;Vx>ak{R~JnkkhniHowE9&M;Ox;Sx3LsOFazoco7gL!?*0m!)BbEY1NBpf0Dm1v9)l==qL z%j~bpft&5}Q;Yf#Sd$PN`J4PWXmp3Y;CR{BWn(aWSla1aE0Xr%72YD)1+56ijSO+g zkEbp_xnTauwrQztC+07BU`6V{_|#b`s41N=VcM*ul$oi1(&>x6*Egl7Z@S)lvGkn9dy7xB5;G5}yt_!%8+hazgl+yW} zb6O0DxJS{a*Z}+_|6%ZgoHh8bHL~5RqQU<@4cAZ8>yCuv;6sN>e^FnJA+K*VyI3$C(HuuVXqrMepb zI%xTW1j%HuwhYQtj6kw)XSP$AVr!HQz>9i%!doOE2?@mbe`e48*o2P|zCHet`U~|} zzhy^M$h=y{p0m7Uxy-Jqc~Xgl!&RX@W&9dg(d!7cg6I*M={Qz#W{!#jvr$Z}2R)5M zA7ee(;0kv-Ko)sV%!L0G%-;IfmPqxxjc48r-GsZ2Ij>(;f3fM0-=%L<2cwvWy!xMm z&C1@v6Y78C-w((Qv;zlss9(rO{t5X&gjT81c;|H4EQ4F9OQB&QAfoFqwO=e}nQy3x z`_zwFdeuG^eJ|u@j_0WQTYMQD>K^)=Vvir)4NjLGTug=@L?tssr+%^_$pjqv9!$Jk zHPG|^#@=_=T*=!oy?T8vX!>s3b93fAw@r2!{BA~De0&>DRtfQ1gKPm_$|$-#PlarT z9+(3eTF(UtK9pa)wBo8%Fc_}vC87`$WT*!FoBTV-eKN9Ic6LEd2{B>g+?=3VeiF+s zc9D|-VgF&bK^}3mohzeTnNGol3^FtBM9^Lf>r_LK8V;jc#recWDQ3Wwf@(7ydp|%Mt2UYnnxl5 z*XEe}F7{aSh0fHxEwdAmKwzP@?a2u%=lpFz`cVD(t1MdC8#QO!j2YYKj`KMdKQ}pJ zUi_c~RFVo^0xr)PN>FBjZ*)UEiuNetUSVN&cG5)mjo70?*^6c;4uUVvUuG z`XRDBX{w!r3nd3k1ft!e0*6#!ZIj9W(4V9}we4{ib64hK(kt~zU%RL93Qf9rO)adM zwZ-LVi7PF+n+c2g&c=Mv&)|94#>mW8nIKWtGhNE;e#=8>NLojGL2ey9gpGAbZpC_n zU&b}~z6n|kkK1M1N{)6$2ed&XHHM65Y%EC@^cbngSX?ilMmH`$xybHeJ1(kD_gH>* z%IB(5(bo@*9rAwKX%?fpg2rJ7!7JdGcLP7|RHw@(SyFmPV0hsM9fovaLEdfI6VXrl z`|tF9L6tI-|3qhkCv`%yH#*pu?3f@-=vO-6{E>)HXl;!)nQTk5jsTpz*2p zRo7Yvl9uF7n;Xlj<&?oU+D_M&oLH4Nb4tuy;6sp=RS9~Z6`F{1--T6yFC*qeD@B~Z zi>v4|CCfN4Y?K~2BqTc`Ne6>TZx#d7bbesJ{Ih;(_wnPBM2I*tjMxAfOn?lA(wc>Y zh8n$`H%=zcMi+ILQCo0yCq#;PB*y#4D+yl9F7N5OzPBv5_r*1bUSIBX#XhsUZu+{a z;OM!VDmHYE!^4kgwLP&ssbv4V+iH4$cA#Ybg_g8M$q7pj&z}LiDIwnD%L8J?;Okgt z=bed;`*^MnH`j+1^RRMem?Qruw_mz$`H9qP>Cu*I=-4rdK~)1|?vSheJ0p3$WT+Di5!ooNtpeuPKaqEIr$vy6|FGZBxYL!ey{MU_HqPwe@u1abDzXc-@YRo1Lu- zHcV{B*kmfM9a*UpNNPg1T2gWheq2mP(;mxM+m#Tj7`3|lBCh9^Ti1WuS=FBJDh%D$N^bh zWbAnmXU!81chhrq#O$HMOB92jk}IbLSA$e;2x`nLw#Ez^{7m(n1Y z=(`xR#4eWX?0aN#DRkAsofvHk%O!)~N^e_MOFtR>78+b~JARz~0R}ifSriy>eE-_x zG`2J1nGl0aRN?>leN*6|(}Yf9`J$x_Evxzml)Y$pV^wsrArslLw=^uyh;)w~V`D4Z znVg*+%{_LKgDfbc)g3OT7^Y9lRLgG-UYExVz9-qL(^^@j`gH4YtnqhfzV)V;<&FL9 z3cIo&bS}hMXu?=jbwc;YY(4Ncd(U`>P)z=(aQ!*W6!zdpJok}!@*g)_DE^^)q@;bM z9kbD0y{6{yTivs}e|)&Q`tXmtyU*3up6j05eQw5#bKTJc%D8E(FLri5y(T?v_0yf5 zPpwWnnYDYxialA`dsnR3nZ=xiu9t`|bdDhF&(DA?J<2v<;*IfjH2C?AA$DT!WP9Pi zkU(*Oi5frr3?r4(#fZn2{@#CHbl>#muEPHCS-rE|H=M0f|B4ez{qghawvzjPelJT> zf821gC2`@k?JS88FB?{2^o`I_lr4!f1ZTQN#Mwp0+D#cdHpInc>eLuJlb`U-X%YB3 za5r+4)9C3QNnQAbKS{L{NS>!l3_B4@&0YRcaMgwxwHvB}C*yt0<&Sht%k6z-{rXq7 z<)(ICXq~$G>Rc4N+@q_*0^&0WKCEBPLG_a8gwxqF)r z&JI2{E`C+5n8PXv%fcvE5*`~7nZ}&9yynUdo z@lto$iq!Ze2kV#2y01AoX*p@PDahyD1eaXkSw&Dy>y(fX-*7(`(KEr<4mWuD=BItD z*(l=jF`ivk5(9aR7o0*y0rs?3rO7+eu_*&?icf6yn(72OBrKi3c$>KMdSi0igGY2ecY;k{9 zQU@2P{-pNW!cCAQyobv*ppc~5mv*ZsS>v3JI0%tq*sY#WaBEAn1Zo*iB+C)jqO0N&Fb6nA0?cb()u+QAAMz>YhkpE<>aTl1$g|c_F1AncW z#yTr(T@l^aN^*JNKggA8b7rgktYBtqY}Zz37xuFy54NQ=U4PEfEB)$tN_}GOHZt=Q z7VW5NJv*08z~jBLAyR!p`3pvc2LcZ_Ak*2~&(GG=&d%4()(#JDb7zEWc+5-O%i{On zDZ7MQ&bPe$J^hNaF*u`QZd9Fd%EXNL0OK_2H+Xy*Pk;9h3|_kibyvLWgHs(9e#e4l zY4+*H_qBW>9fuw%g#Yp(^vHOe*c2zl<%jfi zcf?8U-EWPPj_B!tvLf7;)s!%FFe;d}vy6vHaa3^MI1(XL$@$>EIqk7YU3J;5S^Zfn zPt2KrZe3o^>b}npc0b;hP~7{-{K6C4r}rE4J96i2%nrzF%-$!3cZF3Xpijl*;*^rr zH4%YjYpe4*s>1W4s#j#p++00nV%gS(*_D&StCM3(V#bl)A!%F0IZZwhBGUs;?;SIe2B>rd6VTX%R}m~qvU6} zrg=j724fmIAE{sB7?&|UC>V0Kw~-`%Dv+wtMsMW35@Sbb8|9m^7Y-dpd+SmEQaMvVo-bnj*X#Ef(q1z8g03?>eT zCxmJSLhxm``h*E%>l}jRm^s#E%oz6`1y9O2J3BbIy4rF^@kQcY{Aj%}md-7*%&a4t zy2qYBAd684uhFQtaZTf#EoA(cd{*8h59s~vG}g`q-Zq78v@BIMojkYdX_5W0WtZ79 zuBMmAb_+F)W3cHuNuXLXtfuE(Xn~eSy%TY9iLw_RmM2lY6SF8VjzmCzT@amJI1h^Fqz7hCnY!EH)~1#!6osjE6TbrHI^OV7S#T-re{&(4Y?Zn zo5_PyCf;~P&D&(JjZu#F@C;36p$3F$X*Dj%+_;<2sI*;etL!)KzSTcOWOOY>Mk6C< zQe?C`Ov1Bw>>pe`{rYa)MDP>gFD-H&KszD8peSBG9z9+>+MroY;3$H9Mt=%~)BkhB zor@@HG|1EOmrB&PUuRkD6SddL(Zx-6bd>8?Cnv4wYhv#Vs9$YqL&qGJkN5Znr2Hmu zLt&`q^_;z~sA|*9NIGs?XC>7{kRG+izv$!_Q8AnV)PNe_Fk9VzC^yaMqCMz*D(^qoDl{iO}&$w~!W5+r>IoR5|noM4H zV^EW?AK?7_CT%dhSHTH>Qvf3~>iszC95)3#@`xqlKb__36YQ_DSfdd~$f)z2I_W>5 zl#ieqT152XHjOd&XtdM0r6~~2(ilmd_54!GAAWf7pSPZ2cP4_Qn9fQo|A}9SvteXA z11hko3oMQA$e#!lvZ3FD!q_;CWm*$(2DdDcV_=64AFk+E(k!1*=kc{{yEs7QZp$T+ zU>bFZDtHsqV00fbYuNH))))~hBT{Lo@38iAq7;X_e;^;zI^W6YCtD)fjht}Zayqm{ z6k*he3{jlF8taq?QRhIm24hj<>?a2Vq>LeL0_Ngh)=Mw9xU0hn(SOqy*;sdY@bz7V zaSiv?)g5Szn}6}E`*#2KcvaTghnq4Q@&n_S9ADi2_WmCtMpZwoleMxO+ zKG+h}ePwe_=EnXtDffT%c-{2VpWZ)V$GiKA`l1&fYV=?Eg^TFC%d~YG3&RB|1gd(V zP(pOfN+>NoihzMNcW?J-Pmf{-Fz6Z4z1f37i;pzK9*L^|}Gb(J({)VNyqm#YrcR82s`_Zc2)vxU@E8g|m`W^9$_sqy^O7dwsT$RwUXGU+; zJ&O}!JPtaAOn?P24<~dpXyp+z^ceNBx1T(!R)(C6;iMRLuqdt?KCG!~Mk?I&_P&y` zJuj@-0_x;_mhhs(9mQ*{R7E9wFF2r_D%vn7{+)O7gMv`d*e)kAwRkBDvAn>1w%^J1Nnbl_i?Vl> zn%u==)K}7$^kmJucV_6MvTX~qtAnHJ^D<{g(>Z62AG<^A(W*My2R29eHAh77Hy?3< zfH(Km-mVeQ&Kj3J#=QRCQ2&@7#Gi ztFZ4atMJh#zNQUKJj9czBeJO{%MQ2NxZq5*X*pCjcyIZ!N z@S+uRYnz8|aTxKBG>|$s&L+Y$wh6~Ew0iR7T0^6 z91lA=XKs37{R`_~+MMm;c*xm4V)nL~FU{49p}>j>syX(mJhOi_?F{kEi=qYP_+GY^`fIQW*l8q zy^#KMsXA$PZ0zi$>cm-bakH4ssMIm`8KQ;?d;ILy&?y zW!)(RCtb00Nc#~iqPQDHi$pYTt)-WpKbF~$kldI#hzgR0kF-?X+mJZ>=uwQDfr`$P zsOY4dL|&P8J_ZAx)wXdCpA3%uWGcn|xL>cuan8Wls$ISUuFgAL6T6(!JMxKF@pO7aYHthPmtFNzYaL{;-8~b#=qE{QW_p&xd;=|FN zhfg+w(R5Tvt{(P_cH{(QhlZw4D##7X50ye?RNK{dBo*||nYXhfx#NO7p*MNeoUWwO zprFht=_USgzENSb)AQF9m#r@-+q)of>A6lcM`5UOH%J$Fg|NeOsO;pG$h@u>mJB58 zf_nA>o2RDA?sL$>Z&*|J;^w+)+YTx z9V_nhh}xymf7ciwoi5u^Q?sL@Y)5VFjG6HrMY^bX0DJbZvtn4Z5 z^NSBm%=AkLOw9OINn>nmV@d9!goH&&0Wm&4F#(gN`uI#`Thr@fW9!ok7RAOcs+tht zoe(`f(mRP}Up(}^bQp7q#F~ut@v#eF5n={N$|7FIrC%%b)GCW{Zj7od#?pvP*87eP z8&U$(r-Y{YPe_l5$PQ#t(zK?Wnyyr=N&U9`xYma{oYus(PCuAfI5{*gWJ+exq&V-0 z$eBqwtINwamElHU?6Na$$)IXKZhJc1;kKuPrB}P{d0NSo+HfyRrr0(1udVBtF{pi% zy{Ney6yCU>YH_*5ShIOVd#^<6HifLQi3eZ&?RSm&d5ah4S9p5xbC|f}RpSCXblI%QQJ5g$P%Z%wf{7basgtknq!mrzt;W zSsKwV{nYZZbkMRwk6)=?c(eS^{GRCqwODb%J=3u`+2^7-JvP(3e!i(QFlgL3r_8LZ z$m(h*Cu4DVZ6L1Pp`h;$vrjFahJ)W|MlJc4(JE@Q5qdY(}fjEV%LSX5DnEMo8h z*4lCA=^tFb{QAbt*IUy&PR-x+qvic+D~gMjPfJ@-RJbB7c%Dz@q?CXswq99OuMnX+H~+BorDdzfjt}Cqe@h|xq@1sXuAlMRE_AZO~tI%&f1bE-XEixTkq#e z*3Q~SSJZ{Xy7ETaO~;zk9qiLvjxT9A)tWRgF{h;@V_r_UqkU-3f=u2>JHGsx=FHl| z9qjM!n}2??wrJ<|wIyq=?&8d$u$BRhB1$vkPi<&Ty>Nxqgpk$w3r z2>+9j-ZO|apmTkY4bmgx=#x4fZLBSKpx&tncSI>3520uZ(Mh^P=V3H5X_DB|u00v) z+gjA;uS;Vd{7pyAgPr4QJRW|b++n=;8{@rA8LOo3Q>mTP%eykDlg0e2_tqb5^!9OW zL)YZ(Qz~-2ynO2Gyth=W$|AcHv>L~QRuK!Ih<7e*Y$b|^BOFfc1BqOce4>`f#W6I( zpYhdHOOxRS;y7xG<=P4MI6|S)dCP2VCdy5HCf=EjCVR7u zgAqY}31eU$)LDu9v)UZA5io~|mMZm2`!!NSm?i#PrE%~%!jCz~ z-=NRC&MA#cGz&I*7X0AGC~%oa%p>0N+Y=tM*{%dNxkbvN9!{ol>Lkm+arPvpVYuJ2 z9I^vlq)db^qFg>Iw|u;vBKq67wrunv}G> z@}qusWN}}|w9z?3}glT1yslGF)e9iQN z?jp10#be>qlT&BK>_Iv5rlRVeno!6D)fZ7rMO2fc0(OkSBs(}T#3_x`FonAmRDuSE zQl%vI2VzvNh0@ZMFS3u2lA{xwaTG`Kzn5LoT z+wNK8X7oIyJIEtCr=AYan$R-IN7=t6wR?eY^|YnwYamTqm5-(0n|9+Qbv2B=Jl56D zFotRXH~e<$FUH~f!!2rrjT;;s>nRl+FS@sW-o3@ga%(E{EVO5RaYuAx`G*ZuG*dJ}xc}gd9Rj-U$AO0X#KBh>YxD%@~5^5s0%} zMS!|ECLt>&d{T5xXh}>&Zg!~uRP}GYk3VlbJT7{AOi)m4kb87>Y*1iq_8y8F&6OpV~??S_x|`T<&Vsy z-stOlhDoZ5=t;fuu3TX1<(Oc3n=@@}WYcE!2+?ryM}Y}d01HHuFkB0n{KjLCzRo78 zC-&`q%lHfR`_reH`AJn}66ji{{7L@8^d7KzL*I{a8Ecd2Z95txc;b!bKz#8Dt1$q^ zFSow<69_2Sk>c6d$6tT#G3Ly^SjoZ@yn`}>O_>XGCRN88|EhjJFu=?`sB-u6@bVi| z?-`aFn3almQ25vdrk`W%@u+^n9R{=WIEPF(S65#L2h(Ou$or>=kYX{s(MXtIbkD-r z=77M&A6Od6i$!ueBEByRD~y;rEk0yITv=FAcxZZB@Ps(yufFT+`)<%?S5i~&1B5qo27(5_FMVl4kFK_Qx*YIjR<7;X(dry6f z!fI5rcdPWexZ-YxCcWc+aVP18P>q(}_d1S0bgT4YPyAkb2L;Sd=ngO?)5XWv*ClZ5 zSQi&tAh)$eSHleQs!4v)Pl5g;To|iGuk94Een^FD>xTtQy1!uS+&S9{4(3*r=M-&b zF=^wy!c+atg)54J!g7O%kob7MjWT*92TwO=Pe;>q_n`QR!O`Q4ypN=`7u`m$qTk{l zF#ThQ;3i1N&~5ZdAg6Al^qV@sJi>oN-VOa`27dE|@d{w5+bI1e72mqRVGsx9zo`Js zX~N9AjS@_;bRW8n(r+>d1{!7~V&CJ@~q*PV8J{#eUP4|Jr@x$r;tp8saQ(~q2HpXgY0sy#KdtaV#y_n|8G?ylc# z%IFxC~L!WpYHT|g1Nlh(Dn9~ir$KSU)U#~m#9tcUGcr0KxVE(ArIYPxFNK!3 zY%iH}YIRO{xPcQ=MNUKBq;&K|^KzJ+t!MEPl1 zL4CD$KaQVaLRZlxxwrb&0}{&0a4J8%`{B3oP4t(7rS_nKdiL8PmG8b$o;Xpz%17~c zUN4}jc~M(K`$mM4%}kj%cn|xmB-?pvLs`YT{Qgxx+eS~l`NWsTsQZr{+s3YD%`Fa^ zIx`XRmX3k;xVm-u(e-m?g{!benZZdthnq@!A$_Q&hw>jUdpKDu7DaP5ZfQXD+>!bg z;dQ<4gC9Hrtc|U*tdv)(o7%4|uQ~S9bMe{wVs|7AZIVEpbaa~nR10DP@?)Eo&o8PKS$+_52v1x8}NO@bqjK%S6v5P$J zakbkUhGi}`--?$HR3)bFii=KaxVJKQ)$FMOnLK);J}*;kiwU<<7zxGj{Wk4d@Hdt- z*QA*iTqas}e!jke?(POUV4`Ai3gdX; z2Mf*@{1=ncI|Yiv#r21d=CmV)+v?_SD?FTCS(%+&bBpIu!Dt*DO;9mf9sCoDq}gVm2bglV~Qir0C@U>_%=568HqUDqkI!O zzM*Y_xAl&b&%6~46C$dPY>u{GgNojAM0#svumxrUghs#MB^#9^Ha7YVFdLgedu#Ec z9*tp!t?Hvnrg8?cLANUg_X!i??L*>`T;b6jsHQwp+PoDj-DjohX!=DWKm4^q2Q6I% zoyzwU`+fb1j`Wa<&Z5-D(y-vtmYnIU%Yv45y}G*fiFKI;n=UM9dwzRiO7s2CHfA5n z44T^dO2PgQ_cEJB_b;9rS+{d;-lj##F_rb<(!JLf6z{mYq6cNod3!$G|INNnE_SE- zPT1#4cS(nyS4-GZ+~akD{XYhtG(yA_#3Ns4L_t^l#R2NR73b&Zl;?<-5RZS;{y?SK z?v%+CTAfCgfqG7X5>rf4Q#e^1E3xm~ls)*o)R8_AR^DsUOqul#@`aBQZ_T_(H{#*LofxkN$Uqjz_ zU*5OxJJ^hV9A85X{yuaGvuwia7$yU{QV)LyJpWApX0=KRKBK$}fB#;c#pd915PfDQ z!$WVPGU0i&?l2f~tZz_``VJ(2H#%D0fP~AaNs7TPb#n>|8WRv;>w_nW6l`Kw+l~B! zG#81H@0nqvcxs6P$_u)}vxHQ;S{tEpPgA>D&KHH{T)ldA<}>&7zEYu{SoY=knvCjs zL8b8%Y7%qmr%qctr>1-1skYR{=em;GrLQec($~`dmlw2@s+BD)*fY3nHIKPyVAbUXOF!=0M_V zmz-?QaB#?IKGD?l(8~0Ipwf=g{3V4E_RdA?=azJo1R;3zU;iZqyRUld@brQmFRiXy zgO~6s4!(V#d(HmVY0*_n62H#hGCjR@AH)fLzdlD^vmCk{y&wI&yzpu*dYp)NYw5M1 zD6(Z>_9p(@2#J)L(_UzN%DDQKmFu2glR9n9<#n50T8>|2wUyMa%=7chUD=yBKW)MJ z#u-OmT^_pp)gv>OJky@jnw8VBvmmr!cY6+TY%Vwk0oOS;#$Qo9?7=fL9_Jv2fnS^+ zsAIgMpjGkS5)1{Y@xmCC3kJ>%-ppBhZei2$7KB1GT23sPe{OBgKyZ0y$?%hcE6aob z%NTjXAi212c;{e6!96dn!P|j53Z@@=YqxvVzSgvI?d`y6tp_-jYAtTLmm}H`g34p` z8;cIK?@YlfOO7Tq5JXU)(@85!_fQ;hNDSK~E_Xq7!>RPOPjyuMxFRY2!SaTUX%@Gv zHRl#%3VhK|EIFUKtd$u$N^=UfUv2N~-(FC#t-qrK|LqU0TwPk)T@?~qxvH$RyE0UA z@x(ey>?FtGJwx7g z?7!JFE@IEH{DIT5P5MggnZ6ZS4i4GNAHbgJN*kC`(IxhbTj83yB`YhY$jvwYVf?k_ zb8MPJ({r~z-`OrU%{$mM2gIgPE&1z9L9md41&U!bokC6($4=gFPutsBF6ha17i*sH zb!KpG=IRSgO(&P5{4aC)sU;2PS7(9Fl_hoSi{0#_ikIb=c2!JK7Z~l{yAdQmjkUdh zgsk3AvWhmBRlRuUa%pu;`ilFkyZc+v8A`j`b?jJ!7xvO*gFQHjy(BK9@>j~No5>v} z%N;6b5XkOa``X?TdVsp?(bl-ZUrBATo%8AW>FBxJYU}nyCxt)aR&n1OtHL+`cvoT3 z_Djuck9rqvnI607Kz(@C-j-JWc$u!FEd&L-eLEQ zKKSp3lQSMz@&3{L%LPe^d-7=WOi69)qs^o3|AlbU`~CA;OX}7XxYrv{6PrzPlh=}(p=g^RMnU09!6=+p_$Y0lAhL1R*C zGZX#P6|OPakq%*Fm`jF|awEw2Yp3|?y5Pamu-srtK5aG!9iAJ;p=zn?WsE7#@3b+ zn zH^vExQ_0g)D6!+IdbHPSoe>`%e{pSDR&P5`A!Q+J_ZxnyL!YT72kKD=Gne`ZPUL$`C zVH3AASK@RG9)9hU?!IXC2fO>H{zde6`1DgWcUbb~e)9D{KOR?4{T+^@xkLNe8JP8g zp*PUm0j>kjN201FDhll#A|lWrhZF~VyK(S@Hc;<OPEPCgO{*jIg@60gVhvMcV`$qdQ+G}?316&Lw>4BK*C$L1l5e1)w`(Z9f zxB5-;_lF*}8TRy}>4{l?OJs`rc2vwL2>C%vJoJ(Gr zW5)Y=*g+i|I$b1K-8l5Zrt-FT;D9UI2k=N0e&)rgU<~cghW@=}EVJv(BK{=F)CKvt zzZJpqExjTsJD4xGy|||=Y~JA(ORDr{?UJaJre5jSBr!6&+!>UkSR3^Fod2~*gf+nK=R3R{ymHq&3O;rOSIYFCQRojL3*?h8=M($R0zuum{7&JDzO zjpNOU@HVl!3~$G=D3%C+Q+7r?bD`DWycMdOy)2*{<$nX!XLw6Mxd2z|Xj}pMi02Kj zu~Zt-7r0tRL76N%RI}lG;2Lv9kpqBi4S~fC(|^AXcaBxEY!2rIxPn{Y4q4%(EH;}> zqE{&u*$17Jt`0r>G>44?Pu8e8%CF$j2f<4so~&gb4E=+A@F_+-*l4BjQLKHwIU7*8 z6-%5m+`!28&nsg`&h}O)yk#Juh`%~dZiVVI91>8t%EnfY9??c0u^F7Bxf+!~fc0AE zI`lE<*~}?J5{oDsy1=n96F97o`3zLv^bt_qh6DknjDQ;Y3{V9lp!$X~1r*BnbjpT4 z1k`*pQ6@%g9%|>5Nz4miI%NjT(`aj^wa3l@W#_>&T6;FmG-B;nT^f4Ap1&TtGzuYR+g}f%=H&4S!~7G-3*%W{-k04e3zL z%on)ET#=`hZZ;sGJW$H~gHU}~bK0NG z3%K;P+^VqpLq-Yrg$*%!WU+8lw^KD*<+nz_>mTwAbly=kO}2J%`H&T<0xt^;S433(DlL&{Do2u*aFUvwJcN92eu>h0Yz56Rh`Os+-jasPG?#0@v_} z2UaNxFgn*bCL?h~jMr>HfCn>`SQfx^jtqec)@W1#C*p~@yH&zPx}eX>-6Pn)i6daT zeK-th0xQhWyMPJTVa$fUp}-*yMVjDI4b#v7pmMqHq|wZHZKM`ZL=|a*%R2w@SNZpR z&0*Ff2y}iSU-M7^J!Yknig}KQUIgNp@1Q-3!0j#4;GiNEP&};H|&Hoe8i;zgkMy&>Si+plprSYzNbyzzbnJv>V=J8Q{F^j8(i~rHt=LWbTDF zF&WyI+tBKeE_vSy!!em1bxdar-*PP{)A?6zg%X&4sbMl3&QK3f!j%bJ8#xqw1V6Q$ zSO1Bsr2s=%raL+Z${v0u10F{}wIWJWjS?AXFsLYbOMV;=jKfwVuT3vwV)XVWufWxe zEOLvb*VOT^OJSSXeX5_lNs9OwFHxK+4lAD;5au7}iFsw={!?6S);aktoR`qN2)5pGwOsxT(Ged*3T~-}WqOWJNpZ?4cIpg{vF}Ws zHq8s|?Tt=mqLtz|AXYqWNGUFE!|L4);PSyBM5FtlAiNjD6FuBKOhEZFrFDY1!*<6R z$@Z~LCHc!z`=@oBUbO1+5?9yjj?NRMYZV)6!XxW*Yqyrk73TCMduOdUKR0*tr8X9R z+jRS-h|KwM(d7{-o%M;8XV$7V3G2e0C$>n&tzJ`0rpDI96fTXK9okf%HsiiIGq=x< zv^;t{JIM0Fj{lW;CRSKjUft|Xyix&eHN}~+@!veMNtE7cKO z_MJK}6z)aV%uCDfRsg9i#$qh$+nuy`KPz}9R7u+GD zJ1SRAI)FD=zg?M?cUx}(na^iwgVXy2Wmd=;Nk6S9 zq<$~Yz?NvyzhW)sJh3rxDa3QQ#X0@GP`USNtJsV4-cA7MulCbQuzi{bJr z7Xp{AC)QcMRX*Z&6vxGW&VH>PHaK2l@kYk34jtpxDL5)-c)t}J+7pK%7K}M&0NXJu z_1H;98}7x0Yux&p^IdiVPgh%^xJ|}2N~s1lo@D`-gjcFrkOnpMJS@C*X5v=_hdMu$ z%JONyjRlldgAGwZ*?i+?fD(4#`JsuBZ4K%XE3Tgcs+H>3Y99(f)E2nNhW#HauFnCL%{7m}b!MoU&ym>z%Bp#W-Un2$@k1TgnV||{#S+v3 zDA76R3Al3_%GIWA8fT!<7@amM!@gO**E<6@sRy(=6GO@ z6zjx#SUk~(c0sJ!$`8~XNGV)msak;S_1K|b5|le!LjBZwRGnco^0A``3UYQx{TJj+ z_C)E-)uBII#<8PB8+jZZR;Wi+lj6&1;m9>#!4IT{X+Q>(dJ?X71mL~E$K;qVshF5HG`j+Y>bS*SfKaPu)Kyfqi>(J(!zhAY`H8nA0i)#fESUEZuWQK$J@beSZKw%m%Jj*W%7!^ zJyr+oppar?x^)h|7;QOUI4)55o|vyI*9|&f_Npdy3@8U*I5xn=3jPJ&#uv~=Q2{~Q zY33$qbG99pC)&sG*hniAHfZJboU%!vO!xk-ar%uK45Vpjp3#Hop9rWF>nwTX#`Gzm zV)-oF4W9`;L-CzMRw&_Vk+lKihpdez?l+L{r&}8$2IQ<`I>S6fbcdXttkIZAcM~SE zHVz7|#sHVDy#=l$9oGfcqD6Nopp-|*c!WXFMSKw0E*Sm{Y~&r0!^t-mha(Ajo3D5R zAGMop;aru*kBkC=tEjG{eFe{t@0TlrHqTpDTzQ+XILC#RTr5^Hc-$xnuU5^{F}~s) z8~FbZ^-%hzY!~rmu5a! zR`OU~qBhIDd<9qvd@?;&2P;&5fzJ|oN-oz`e3mQ$J!*7M{#IOlhNA)(`HFh1ZZs}f z>~o1KmJCd~#a@Lx?Mkkj1yyU=Y3xBjrQkGmwaUeO#C2ZaaUDU`T2?ROItd{o;yP89 znZ;FT{rqd0Bw zPR=rlG)j~)SZ0N?Mb_hU(>iDg~x9w27>s_ToF^kEcKX@h?G*yLSSk){D{Xa*f?O)V-^Av=^lZpeID066ubGv zO1q$mVip3^8N-)CYx-ah=`jm|iF6NPBHgn~aLXUK4vxk}Sqy>e94mpHKv6S)AREqN zaBL=uZVGJYpnF7gGZ^T05IU}V1Z}jx1-=U`2ec7B58#`r!vUXS&!tS@yI?@>fa@{} zK=Gg#Tx$t+I_s3j;5|`>7*+7&1DU4`3tCSIkF2nzC3yz2LkFI?Oyjv{~d%E7Pq z9{Otc%QZ(E8jsf8bDNi8e0nhWpl{(@I9 z6+}{E(5u5`a+p&mb1`-L`uQo!I27Sg1Vw~{iB@UAl3O%C(PQpOb>KF# zp^wr~eDDQ7AyT<)^s(2u1SwvqD`~aL1~isYoDfiSkf1&`nM)9d(&K-KS&xeB_ZjZz z`;AdMQ+|L#Ij;E;mE};AIQ1M#w`e#n)Atxt;5y5e3tXNy!#)Mcy!;%Gun1hZ zY9nwF)MYEb1YIQi0@rE0otlVQ%5H$t&o+UJPI1Bo{hCFb;4r#1^EC{+S=?*dB50T1 z=GME3LfQKIR1E#q)MH}6v_pehhbk4|nh$-x*Gkpup>sw*^JYNdjs@q#XKXV;#d(a1 zF0WPM_X8y;(>IN(wrt!c6?*bI%8h(yS15iYnNnK)Yh3aMv0*dtb4?sb#R&Y$Nx5LE<4??zuk;sA)tKAB{3NWqb`JI6#c)RPkHgvrSk}_Akz> z^M*sHQvqF+%e7(?J)roQ|Kw1)hA;S-JeNDyx;uEMs1Z=zXe~zcOJsWgAXJ~>9sxzx z{w+{Mml*N9VJoLgA}Bni6vickkH}{!pqdR=_$*~ym0+yP%6sve6Q>N?fbWub`Iy{Z zuyXb`sBSi2Kv6FD2cRGk%lI1k+y=wx65}-^$3Ze7u?YYh=Ds6#uP>+T3r-iszzLh| z2%k+Kk?#}ZJK8>h57in(FqEI+rWHjQ1kB&{y>tc{EslwHZ$0N1&Q&+WPlxY<2T(sO zkyWvgdz>8cf`@qTjn|}!t|E7nOS!{6aqruX)!VL)tGn)Wn)3Vp#1Y!lT*EetHK5#q zE*+zxum)Xx4I*xXG5%i1be2^LOcW7pw@Q$}Dl7OLA5-?|43D4-Lx_}|Nh4wnj#&e3eI@v5% zlKS_SkLnB)h}tseaTBcFVQ|08h!=~st4};HcGofR(T!sym)n+dxb{-(C3EwM+m=%2 z2V zpw2SVc+g~VBiFmZ@-KRQ$O;8-3s+#szl-Bhm*ZAng;snvr%UBy_8}8X*kr1Lvg*a5 z`=MoLa_vbv9hERc#7`gkfVg-MMqfm-uAMDszvnY2RWBq4FnBpiapW@^6f*+7 z9Ex(r))_&rBznIJm~PUU3jVmM58>OecjVpt+c)rSlcC#&zRTl*x}MstDpb{r?-C4s z*Gav=Jz1P3SBDOd;4R?x3phT;M)^UEL6Wb3pGV!AEb-#^+FFP+s+;W>lvC7g#OPQt zK02T{))w`w{%!1C?SwmRxG28ujBo35C{7YuBXN?n8@BT`;wPML;R+~{L;-ci@C?Vr zPbFOv#W_eL3aAU<7ioW4ipHrlY}detRDw89zvC;?&4L9sG^ZS%*Dh62g7~}u#pg9f zy?}X%Z__CBZRn(Yz9ND6u0AXNY9JrsTWy4NF?WjV=-PBN6trm}mx~O*=zB0%Eu;4b zF&Z#&TMKbBP$9lcRL}^ZqMm=3q9FQrLo7qXmMfPFSM|(rgyVo!F1UqD`hqN6!d1y< zyRc`t{T)iL{G?ag{Kd&;T0x0H$EeQ?SkPiToNSa4LBY!j>EP$ zbmK`)d(TQ98lMuCQcA!*IKZ>>XIj@G!Vv0H9&32sv)~ z4}3gigSZV~wTj@A>ieBTah)fiC{xcz^aPZy^#m@GDT0DbRdX0O!05Zgk5|B?!>gkT z7?a_mj1I+@WJhEQt`4PZ(mAvTyY(?s2BKUc4Mf#N_2<|Lj+Yc?Gr?t8;Z6Wst0@a` z;RwAF96H3nuX_-2%9sKEi*0Tq?Uim$T8ql!ia%^$}W<3Ji$)RthCrn-7!K<9- zPYs2jlFk{O2PEVo89Pe|(H zFW`+f{Bt<}dm&4I{yqg)c%gV3I2}gBtG+qKcd7Y%0r01 zE7$qov9<6SP&^)Lq8KJV^B4>Ndm8_og1`Taf1XC4@45<>j$HO|?K z@KV30oso_6=ftq}^11xEi84zsYfxcB{{obh88!mypt?~0n&Y~zLFIE?5=HYbT5-LK zwJbGJoR!uR@*Mgnl8doe#^Y9)1GsDaPrlyoaG0kiSywv=s(W~~Pl7v{{9ZGBQ*e*x zO%QE2QQicfPpS*#FZtggkNE75y?c#+p2I(Dcdc+04d)fO`zO%0f@kl9oHY+^f}D}& z4A!NL@NoJ#oS?6Hjev^NsUs*(4d(}_p*_iR1V1Q=qw`kGmH0up2r6KRb7TUL=^R;y z2#~jFBA|XIpro3iKkyZpFe0`?(w@(2q#@nF^#j%0)P+Tf=253-I9xY7Aaceg*qI`7 zfcO&6BA_!-GR0%XtG>3Kc-I&0NknrS#AH#=_J4d2QhsJa**!~IFK)=r?|r`gp-2! zrlR!b+{lv51h0S@E%(&UKG2ky)P8<>NlU(e(o%5#mR)<-y9QZr`6qn0MRNH7pQz3c z@{!L!gTF?jIR>kM-4FhDvkj=Mz;2;Pm2R<=VNboos|0?>q0Sion{!v9c7N6?f&Zwf z_!@Cqutt~@Ut9heYm3iBn}Lt=TQLfY7HbHx86E}Sx=lZg)Ni-yC&J43D9`Joj?`DY zwv6kuVE*^1kR-|vS@qW~`pUrTDfy1fM#-74HdW$e8h?THc!1M-dZcY<3w2iOt#F1qwZtZq*IdAoz^d0P2lzOv)e_qnUcCkDl74SQ z76}}#R4bGg?CBuGZwRl{!cL;54)$6$bdnx7oCR<7Nu>->UujS|LstkYBg9~6q`?W- z8sHjnQw_L2;kZ~BBv8jyuU3q_zXGV~8kFq_D85rT->{#7J4&68>LKb&^CJIz%ejqA ztHGSW&$wT6{+wa|;wx+PgNLM_q~Nwm;b+K~8q|EgW*kaCLvF?Ghcz7k|HXX(%!A{) zi<^YvcENwVVKfBIYo5jVB-WGX|C9>wX$R+%*cbobR6x%|U%auVUqc3Gaqcx65LM$g zZ#I+wFL2^=uo$+{XG$e#IK^q;pN(%qdhi*t+l=~Z#=n<2%2pO@NcZ-{8a6W(YdGeL z$ryn}?A2vltRhv;iB)W7jaWr1oUr-1Y@?o_#SEWW$Tr5kf(lsLGd0lO!+!cK(EFtE zb-X0l%U53|*+thrW zgiYOUc#^M^qYu33EO)->rU9rh6{#=ps~582iy@I?kFVfE$+enLF1aO zhA#>>%rJFJVI#iCJ<#v@Oq&~NugFpaj^tq(f=rrXIJQqTY#(UYrlKBW7+W87;U+#SYOahLci{vAFWvMr z>-5b;wSrlvkB4#uhe^t9Jal}`z?ZM%G*g6X~c?Am7rKLezS4p zicQp4Ouy?mV#Rp;PX zyve$w>md`kX#_Z7bw7zb0%-_rMJ^3iIAPI{whhsCem|RzJ*`kia|5xMcD}ITY3B=U zk1M@==d%bTEcBf(a7F64&Kvgg+at<&;L@Z0;sy}yLBfTc%xtRUWGTSJ)iI4nm_9MT zYsCZ|vKKeM$OCi(DlYm~x|FB54&k_@s+puiBo+;1x(*qFT?20hSv)aEX}aM)R>-lD zMp|N(qoJ$NA>VQxBCt)L`~TE-^}$h9SNy&E?%NGX2;oDL7*hxV5)2q1fP}9^Tq1!Y zNElE+B1)(=QV~V`Xc48Uv*v%2KML)Pmey)3BTfqxt=fts)oEu$ zG5fZ^bIyBr-v(6v>0~A|Z};r)o_pW9=iYPg`Jma1tSUfDOF8j&cl4A&YeYOkyU> zK3^Q=>k#CcU|m2`Qt*sRC9S$rcR#yWo9q+*8zy-PtTfUgD@2Eo)D%3UCX3WOyEq-E zF2AGQ{ijiqpD`joPBY7o?Lwg%`S}^XI~KVYRKNp2ac2&yJ>snsl{umA@Z$E}EZc&W zaBf7t9>{Ezx!xGa?DtT)F&&))P3K(Bb3pZ)0~@ePX0tU0D~;zRc22;9GgZbMSnjp! zi9A>2ycm_tAcI7Qi`lN!mY`jUB6tD45WKkC-)^AwZw%;6G#T#;d6g8jIA$KNl4H=S znJa`foyU!_AmQ)XS~m zP$cg;p!+`b*D;)E1m5!ry-?;lxz>f+74Lc4=?Y$4pWvM?jIYp6SA6kl)UM)IV`#){dq z1Z&urVa2RNf>m)Q7U^W+EADF6$*L4sMkh00k()pze8u%cI2VIlmji28AlJedt?7cb z*ZLc;>FOdF{bo&vk|A>$#0LetOJm9s#=Rp}HNJi=uY1uNCj7_08>E`PP z4PObpMI>kxS`58OWR%y>vt&%?aZTvwaSj5>$Ysg6Ex;QL|Dz{ZGOiXnw5lyd>xocRqHjnFVYHce_gPg2<~6d;*@O53VcFR$GpXQ6|L{^{o_6=3;y(=s)23_ zs3Zmw8a!eF3%(ifBw<=d3b#-Qi+_Lzp0_&&IUD- zGS6kUjGFdA%hE7XQ~rM%R*1F1?KFB!b*~54A@61shi__~HYMOFNo1*?a8Qp;&fIUm zO3Ci_P~EoV55c@jCFu?gr|v!m$_fD^f;`zl8M!cqGuzAUwvSU1ySAPtJ;j>_49o%8 zY2(3%R4(gpAR7j(^A4+`dXX+dv1-2+7pKU6*(@VJT%%X3}#X*w3 zo>1X8{VI*WP94I(QMmRA+81`{N3acHDZHP`}s@Ayu1H2{w<_- z6tdp12MyjPs*~0s5r=E*Y$q?T56-F-jT~j*D5x1gy=Vx^wf9jpUL)-l_KLT?3HF=s zcx_(WJNBF21m_;^(Cf&vT9qtSHOZINarKvEE033d;~Q=(Tl;NEg2i-Omkb<;G(csJ zQzngNpZxf*-X43+;nSzzL=ZZXOp`o%x7j~PevmwB|HQjN4OfFvr zQ|bfDKMTV;`=Lr8-f0z=bhMJUc}Eu{M?4;aZ`ANZ)4iNhJB(@R^U*sn^)!VJ+Zof{dL< zdA~h{&imXGpu-6Y`0vyMv3ntlhYlkjqz*7$6UMPVS_wK>#7sAt&ktQb;NYW8rW-}J zKjSs{bc*p5A8{DG6m*mQ0pp2{+Ns`UJjJH0!Sm>j;WZaGk-o*-sSd&ii7a+9c^@_W ziFHIfg=)Xu?TGf^6J=0TfI(y)?in{(g*U>2=#%^byo7bSRi!%xsHGSV{Ag;v2WE# z@HHic29MtbA<1RNx^pj%_}>5Gu^rkL+{h zegZUj!7$a2Ao0+_V=+;U-A!YREE1rSd7sc-!doe4kuZJO>?J6R1n6YmC!D!hHDL`= z#ow-!vBIvvnOHc>LP8&eWWc(bUP*N3OsvV<5zdSN8Mu$~ce4#7C(1ol&(M$hCKUgoR1^QjsxTIexM4tzbWf`lVPDaUo z#eM{%x7T`F?AJo9O~!sL#oF|o`wvhp1r;o@lG9`#5KVez!U!jMgas*~-<1*W%bLfG za4BXi`ML@gX|j1|rJD}vJjr-dnX`oZ5!2}+xT=I^MJ1&35aTWL=^D>Uw*>r=M|BVz z9cWO0)*Ak<0)J049`+zC#n&-jFFFq%hwcLRFnOHUgU-I$%Yp>RnWAM4Fj71SBYd-4 zjL0-tSGafiRqBX$6e|5;j43KKYDBC}TdYUgo_wBin0`OF*-G?w z(*Hb*>5n5?qM7MW!Kfqra;8VRFMTC{e$vO^&7Ysb=Ud$K(Js>YGkvXh1e`OUh?t2G zpLo6>{Gom?o}pR&=g0XOacOvlz3seB&lDpd-*Y5U#D?vQXKZrqEBJfZJpT6-BNd?- zDS38p=jYFYXaDz~@-xQ+A$BH0@j(9XQJf9WpYSvGD;(1jp|}@(pND>3z~7(b=W;xc z=V#n+U|gTViheG!y^Qk((H4As!1IPi=W(J^7!AybaP)E}RV&40DA?|-nW>tPm&HavO!rNxOAWIO&h`9@7e_zCa>G zqRu7}F>a+#(5eng2uG>XxT?AQeQ$}$kPz5=v*9cDhTaX%W>_;(OLpK8Td5f{&a0g; zp>FJjIqUl6oVO9dp|4cF@+ZWGk`*Y9=2Uiq>xggxvGdCO@(^_BQ6AKXu-)N-8i#sR`bklN4wSNTPx>Js$Nhve9n})8y1ZjG4GB>B<>$QtE%x^wM#ZNRt%kY z@5-4AhL3Knsh&Ho(7TLl0cu1kw&~At+aekJ`@wSVOL-qrKcZhA%7{?lYyPWx672z#7RY~)LN8S9bl{!Eq}0S<*mE(`XhG3>2F(T3PtS9p8S zEA%lV5-n!8^X$`w*h`!9z1~#oF*c+L^x!k7SUY~ioZMiyfQoLOhk%TyGV4dyqO5zM zjj_ta;8*lD@Zx7E#wslKYlr=9?yr*vUx1pRCw^{VP>J>0;rqm@so>I#%46P@8F&31 z$R$Pf0(HI$)eF2DG2h{v{tjqs8etoTMH= 3600: + hours += 1 + time_stamp_in_seconds -= 3600 + # get minutes + mins = 0 + while time_stamp_in_seconds >= 60: + mins += 1 + time_stamp_in_seconds -= 60 + time_hours = f"{int(hours):02d}" + time_mins = f"{int(mins):02d}" + time_secs = f"{time_stamp_in_seconds:05.02f}" + fi_time_stamp = time_hours + ":" + time_mins + ":" + time_secs + + return fi_time_stamp + + +def get_timestamp_for_uniform_frame_extraction(num_frames, frame_id, duration): + """ + function: get the timestamp of a frame, 在均匀抽帧时用。 + + num_frames: 总帧数 + frameid_list: 被抽帧的帧的索引 + duration: 视频的总时长 + return: timestamp; xx:xx:xx (str) + """ + time_stamp = duration * 1.0 * frame_id / num_frames + + return time_stamp + + +def render_frame_timestamp(frame, timestamp, font_rate=0.1): + """ + 函数功能, 给frame, 按照顺序将 index 渲染上去 + 逻辑思路: 把index渲染到图片的左上方 + + frame: 帧,PIL.Image object + timestamp: 时间戳,单位是秒 + font_rate: 字体大小占 min(wi, hei)的比率 + """ + + time_stamp = "time: " + timestamp_converting(timestamp) + new_frame = render_single_image_with_timestamp(frame, time_stamp, font_rate) + + return new_frame diff --git a/fastdeploy/input2/mm_processor/utils/video_utils.py b/fastdeploy/input2/mm_processor/utils/video_utils.py new file mode 100644 index 0000000000..a4769ca8ec --- /dev/null +++ b/fastdeploy/input2/mm_processor/utils/video_utils.py @@ -0,0 +1,83 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import io +import os +from tempfile import NamedTemporaryFile as ntf + +import decord + +try: + # moviepy 1.0 + import moviepy.editor as mp +except: + # moviepy 2.0 + import moviepy as mp + + +def is_gif(data: bytes) -> bool: + """ + check if a bytes is a gif based on the magic head + """ + return data[:6] in (b"GIF87a", b"GIF89a") + + +class VideoReaderWrapper(decord.VideoReader): + """ + Solving memory leak bug + + https://github.com/dmlc/decord/issues/208 + """ + + def __init__(self, video_path, *args, **kwargs): + with ntf(delete=True, suffix=".gif") as gif_file: + gif_input = None + self.original_file = None + if isinstance(video_path, str): + self.original_file = video_path + if video_path.lower().endswith(".gif"): + gif_input = video_path + elif isinstance(video_path, bytes): + if is_gif(video_path): + gif_file.write(video_path) + gif_input = gif_file.name + elif isinstance(video_path, io.BytesIO): + video_path.seek(0) + tmp_bytes = video_path.read() + video_path.seek(0) + if is_gif(tmp_bytes): + gif_file.write(tmp_bytes) + gif_input = gif_file.name + + if gif_input is not None: + clip = mp.VideoFileClip(gif_input) + mp4_file = ntf(delete=False, suffix=".mp4") + clip.write_videofile(mp4_file.name, verbose=False, logger=None) + clip.close() + video_path = mp4_file.name + self.original_file = video_path + + super().__init__(video_path, *args, **kwargs) + self.seek(0) + + def __getitem__(self, key): + frames = super().__getitem__(key) + self.seek(0) + return frames + + def __del__(self): + if self.original_file and os.path.exists(self.original_file): + os.remove(self.original_file) diff --git a/fastdeploy/input2/preprocess.py b/fastdeploy/input2/preprocess.py new file mode 100644 index 0000000000..120be9ce88 --- /dev/null +++ b/fastdeploy/input2/preprocess.py @@ -0,0 +1,101 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +from typing import Any, Dict, Optional + +from fastdeploy.config import ErnieArchitectures +from fastdeploy.engine.config import ModelConfig +from fastdeploy.reasoning import ReasoningParserManager + + +class InputPreprocessor: + """ + Args: + model_name_or_path (str): + Model name or path to the pretrained model. If a model name is provided, it should be a + key in the Hugging Face Transformers' model registry (https://huggingface.co/models). + The model will be downloaded from the Hugging Face model hub if necessary. + If a path is provided, the model will be loaded from that path. + reasoning_parser (str, optional): + Reasoning parser type. Defaults to None. + Flag specifies the reasoning parser to use for extracting reasoning content from the model output + enable_mm (bool, optional): + Whether to use the multi-modal model processor. Defaults to False. + + Raises: + ValueError: + If the model name is not found in the Hugging Face Transformers' model registry and the path does not + exist. + """ + + def __init__( + self, + model_name_or_path: str, + reasoning_parser: str = None, + limit_mm_per_prompt: Optional[Dict[str, Any]] = None, + mm_processor_kwargs: Optional[Dict[str, Any]] = None, + enable_mm: bool = False, + ) -> None: + + self.model_name_or_path = model_name_or_path + self.reasoning_parser = reasoning_parser + self.enable_mm = enable_mm + self.limit_mm_per_prompt = limit_mm_per_prompt + self.mm_processor_kwargs = mm_processor_kwargs + + def create_processor(self): + """ + 创建数据处理器。如果启用了多模态注册表,则使用该表中的模型;否则,使用传递给构造函数的模型名称或路径。 + 返回值:DataProcessor(如果不启用多模态注册表)或MultiModalRegistry.Processor(如果启用多模态注册表)。 + + Args: + 无参数。 + + Returns: + DataProcessor or MultiModalRegistry.Processor (Union[DataProcessor, MultiModalRegistry.Processor]): 数据处理器。 + """ + reasoning_parser_obj = None + if self.reasoning_parser: + reasoning_parser_obj = ReasoningParserManager.get_reasoning_parser(self.reasoning_parser) + architectures = ModelConfig({"model": self.model_name_or_path}).architectures[0] + if not self.enable_mm: + if not ErnieArchitectures.contains_ernie_arch(architectures): + from fastdeploy.input.text_processor import DataProcessor + + self.processor = DataProcessor( + model_name_or_path=self.model_name_or_path, + reasoning_parser_obj=reasoning_parser_obj, + ) + else: + from fastdeploy.input.ernie_processor import ErnieProcessor + + self.processor = ErnieProcessor( + model_name_or_path=self.model_name_or_path, + reasoning_parser_obj=reasoning_parser_obj, + ) + else: + if not ErnieArchitectures.contains_ernie_arch(architectures): + raise ValueError(f"Model {self.model_name_or_path} is not a valid Ernie4_5_VL model.") + else: + from fastdeploy.input.ernie_vl_processor import ErnieMoEVLProcessor + + self.processor = ErnieMoEVLProcessor( + model_name_or_path=self.model_name_or_path, + limit_mm_per_prompt=self.limit_mm_per_prompt, + mm_processor_kwargs=self.mm_processor_kwargs, + reasoning_parser_obj=reasoning_parser_obj, + ) + return self.processor diff --git a/fastdeploy/input2/text_processor.py b/fastdeploy/input2/text_processor.py new file mode 100644 index 0000000000..cbaca990c5 --- /dev/null +++ b/fastdeploy/input2/text_processor.py @@ -0,0 +1,602 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +from abc import ABC, abstractmethod + +import numpy as np +from paddleformers.generation import GenerationConfig +from paddleformers.transformers import Llama3Tokenizer, LlamaTokenizer + +from fastdeploy import envs +from fastdeploy.utils import data_processor_logger + +_SAMPLING_EPS = 1e-5 + + +class BaseDataProcessor(ABC): + """base class for data processor""" + + def __init__(self): + """ + Returns: + None + """ + self.tokenizer = self._load_tokenizer() + self.tokenizer.bos_token_id = self.tokenizer._convert_token_to_id(self.tokenizer.bos_token) + self.tokenizer.cls_token_id = self.tokenizer._convert_token_to_id(self.tokenizer.cls_token) + self.tokenizer.sep_token_id = self.tokenizer._convert_token_to_id(self.tokenizer.sep_token) + self.tokenizer.eos_token_id = self.tokenizer._convert_token_to_id(self.tokenizer.eos_token) + self.tokenizer.mask_token_id = self.tokenizer._convert_token_to_id(self.tokenizer.mask_token) + data_processor_logger.info( + ( + f"tokenizer information: bos_token is {self.tokenizer.bos_token}, {self.tokenizer.bos_token_id}, ", + f"cls_token is {self.tokenizer.cls_token}, {self.tokenizer.cls_token_id}, " + f"sep_token is {self.tokenizer.sep_token}, {self.tokenizer.sep_token_id}, " + f"eos_token is {self.tokenizer.eos_token}, {self.tokenizer.eos_token_id}, " + f"mask_token is {self.tokenizer.mask_token}, {self.tokenizer.mask_token_id}", + ) + ) + + def _apply_default_parameters(self, request): + """ + Apply default value for parameters in request + """ + + def set_value(req, key, value): + value = getattr(self.generation_config, key, value) + if isinstance(req, dict): + if key not in req: + req[key] = value + else: + if req.get(key) is None: + req.set(key, value) + + set_value(request, "top_p", 0.7) + set_value(request, "temperature", 1.0) + set_value(request, "repetition_penalty", 1.0) + set_value(request, "frequency_penalty", 0.0) + set_value(request, "presence_penalty", 0.0) + return request + + @abstractmethod + def process_request(self, request, **kwargs): + """ + Preprocess the request + + Args: + request (Dict): may contain text and messages fields + **kwargs: others + + Returns: + bool: Whether preprocessing is successful + str: error message + """ + raise NotImplementedError + + @abstractmethod + def process_response(self, response_dict): + """ + Preprocess the response + + Args: + response_dict (Dict): response for engine, contain ids fields + + Returns: + Dict: response contain text fields + """ + raise NotImplementedError + + def text2ids(self, text, max_model_len=None): + """ + text to token ids + + Args: + text (str): text + + Returns: + List[int]: token ids list + """ + raise NotImplementedError + + def messages2ids(self, messages): + """ + Convert multi-turn messages into ID sequences. + + Args: + messages (List[List[Dict[str, Any]]]): multi-turn messages. + + Returns: + List[int]: ID sequences + """ + raise NotImplementedError + + def ids2tokens(self, token_id, task_id=None): + """ + token ids to strings + + Args: + token_id (List[int]): token id + task_id (str): task id + + Returns: + List[str]: strings + """ + raise NotImplementedError + + @abstractmethod + def _load_tokenizer(self): + """ + load tokenizer + + Returns: + tokenizer (AutoTokenizer) + """ + raise NotImplementedError + + +class DataProcessor(BaseDataProcessor): + def __init__(self, model_name_or_path, reasoning_parser_obj=None): + """ + Initializes the DecodeStatus object. + + Args: + model_name_or_path (str): The name or path of the pre-trained model to be loaded. + Can also be a path to a directory containing the pre-trained model file. + + Returns: + None. + + Raises: + None. + """ + + self.model_name_or_path = model_name_or_path + + # Generation config + try: + self.generation_config = GenerationConfig.from_pretrained(self.model_name_or_path) + except Exception as e: + data_processor_logger.warning( + f"Can't find generation config: {e}, so it will not use generation_config field in the model config" + ) + self.generation_config = None + + self.decode_status = dict() + self.tokenizer = self._load_tokenizer() + data_processor_logger.info( + f"tokenizer information: bos_token is {self.tokenizer.bos_token}, {self.tokenizer.bos_token_id}, \ + eos_token is {self.tokenizer.eos_token}, {self.tokenizer.eos_token_id} " + ) + + from paddleformers.trl.llm_utils import get_eos_token_id + + self.eos_token_ids = get_eos_token_id(self.tokenizer, self.generation_config) + self.eos_token_id_len = len(self.eos_token_ids) + self.pad_token_id = self.get_pad_id() + self.reasoning_parser = None + if reasoning_parser_obj: + self.reasoning_parser = reasoning_parser_obj(self.tokenizer) + self.tokenizer.pad_token_id = self.pad_token_id + + def process_request(self, request, max_model_len=None, **kwargs): + """ + Preprocess the request + + Args: + request (Dict): may contain text and messages fields + + Returns: + bool: Whether preprocessing is successful + str: error message + """ + request = self._apply_default_parameters(request) + if request.get("eos_token_ids") is None or len(request.eos_token_ids) == 0: + request.eos_token_ids = self.eos_token_ids + + stop_sequences = request.get("stop", []) + if stop_sequences is not None and len(stop_sequences) != 0: + stop_seqs, stop_seqs_len = self.update_stop_seq(stop_sequences) + request.set("stop_token_ids", stop_seqs) + request.set("stop_seqs_len", stop_seqs_len) + + if request.prompt_token_ids is None or len(request.prompt_token_ids) == 0: + if request.prompt is not None: + request.prompt_token_ids = self.text2ids(request.prompt, max_model_len) + elif request.messages is not None: + if self.tokenizer.chat_template is None: + raise ValueError("This model does not support chat_template.") + task = request.to_dict() + task["enable_thinking"] = kwargs.get("enable_thinking", True) + request.prompt_token_ids = self.messages2ids(task) + else: + raise ValueError(f"The request should have `input_ids`, `text` or `messages`: {request}.") + if len(request.prompt_token_ids) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") + if request.get("max_tokens") is None: + request.set( + "max_tokens", + max(1, max_model_len - len(request.prompt_token_ids)), + ) + if request.get("temperature") < _SAMPLING_EPS: + # zero temperature is equivalent to greedy sampling + request.set("temperature", 1) + if request.get("top_p") < _SAMPLING_EPS: + request.set("top_p", _SAMPLING_EPS) + data_processor_logger.info(f"Processed request {request}") + return request + + def process_request_dict(self, request, max_model_len=None, **kwargs): + """ + Preprocess the request + + Args: + request (Dict): may contain text and messages fields + + Returns: + bool: Whether preprocessing is successful + str: error message + """ + request = self._apply_default_parameters(request) + if not request.get("eos_token_ids"): + request["eos_token_ids"] = self.eos_token_ids + + # processing stop_sequences + stop_sequences = request.get("stop", []) + if stop_sequences: + stop_seqs, stop_seqs_len = self.update_stop_seq(stop_sequences) + request["stop_token_ids"] = stop_seqs + request["stop_seqs_len"] = stop_seqs_len + + data_processor_logger.info(f"Processing request {request}") + # processing prompt_token_ids + if not request.get("prompt_token_ids"): + if "prompt" in request: + request["prompt_token_ids"] = self.text2ids(request["prompt"], max_model_len).tolist() + elif "messages" in request: + if self.tokenizer.chat_template is None: + raise ValueError("This model does not support chat_template.") + request["prompt_token_ids"] = self.messages2ids(request) + else: + raise ValueError(f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}") + if len(request["prompt_token_ids"]) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") + if request.get("max_tokens") is None: + request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"])) + if request.get("temperature") < _SAMPLING_EPS: + # zero temperature is equivalent to greedy sampling + request["temperature"] = 1 + if request.get("top_p") < _SAMPLING_EPS: + request["top_p"] = _SAMPLING_EPS + data_processor_logger.info(f"Processed request {request}") + return request + + def process_logprob_response(self, token_ids, **kwargs): + full_text = self.tokenizer.decode(token_ids, **kwargs) + return full_text + + def process_response(self, response_dict, **kwargs): + """ + Preprocess the response + + Args: + response_dict (Dict): response for engine, contain ids fields + + Returns: + Dict: response contain text fields + """ + req_id = response_dict.request_id + token_ids = response_dict.outputs.token_ids + if token_ids[-1] == self.tokenizer.eos_token_id: + token_ids = token_ids[:-1] + full_text = self.tokenizer.decode(token_ids) + + # 模型支持思考,并且支持思考 + if self.reasoning_parser: + reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict) + response_dict.outputs.text = text + response_dict.outputs.reasoning_content = reasoning_content + else: + # 模型不支持思考,并且没单独设置enable_thinking为false + response_dict.outputs.text = full_text + data_processor_logger.info(f"req_id:{req_id}, token)ids: {token_ids}") + + return response_dict + + def process_response_dict_normal(self, response_dict, **kwargs): + """ + Preprocess the response + + Args: + response_dict (Dict): response for engine, contain ids fields + + Returns: + Dict: response contain text fields + """ + enable_thinking = kwargs.get("enable_thinking") + token_ids = response_dict["outputs"]["token_ids"] + is_end = response_dict["finished"] + req_id = response_dict["request_id"] + if is_end and len(token_ids) > 0 and not kwargs.get("include_stop_str_in_output"): + if token_ids[-1] == self.tokenizer.eos_token_id: + token_ids = token_ids[:-1] + delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id) + if is_end: + full_text = previous_texts + delta_text + if enable_thinking and self.reasoning_parser: + reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict) + response_dict["outputs"]["text"] = text + response_dict["outputs"]["reasoning_content"] = reasoning_content + else: + response_dict["outputs"]["text"] = full_text + data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}") + del self.decode_status[req_id] + return response_dict + + def process_response_dict_streaming(self, response_dict, **kwargs): + """ + Preprocess the response + + Args: + response_dict (Dict): response for engine, contain ids fields + + Returns: + Dict: response contain text fields + """ + enable_thinking = kwargs.get("enable_thinking") + is_end = response_dict["finished"] + req_id = response_dict["request_id"] + token_ids = response_dict["outputs"]["token_ids"] + + if is_end and len(token_ids) > 0 and not kwargs.get("include_stop_str_in_output"): + if token_ids[-1] == self.tokenizer.eos_token_id: + token_ids = token_ids[:-1] + delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id) + + if enable_thinking and self.reasoning_parser: + reasoning_content, text = self.reasoning_parser.extract_reasoning_content_streaming( + previous_texts, + previous_texts + delta_text, + delta_text, + previous_token_ids, + previous_token_ids + token_ids, + token_ids, + ) + response_dict["outputs"]["text"] = text + response_dict["outputs"]["reasoning_content"] = reasoning_content + else: + response_dict["outputs"]["text"] = delta_text + if is_end: + data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}") + del self.decode_status[req_id] + return response_dict + + def process_response_dict(self, response_dict, **kwargs): + """ + Preprocess the response + + Args: + response_dict (Dict): response for engine, contain ids fields + + Returns: + Dict: response contain text fields + """ + enable_thinking = kwargs.pop("enable_thinking", True) + if enable_thinking is None: + enable_thinking = True + stream = kwargs.get("stream", True) + if stream: + return self.process_response_dict_streaming(response_dict, enable_thinking=enable_thinking, **kwargs) + else: + return self.process_response_dict_normal( + response_dict=response_dict, + enable_thinking=enable_thinking, + **kwargs, + ) + + def text2ids(self, text, max_model_len): + """ + text to token ids + + Args: + text (str): text + + Returns: + List[int]: token ids list + """ + if envs.FD_USE_HF_TOKENIZER: + tokens = self.tokenizer( + text, + return_tensors="np", + padding=True, + truncation=True, + ) + else: + text = [text] if isinstance(text, str) else text + + tokens = self.tokenizer( + text, + return_tensors="np", + padding=True, + truncation=True, + max_length=max_model_len, + add_special_tokens=False, + ) + + return tokens["input_ids"][0] + + def messages2ids(self, request): + """ + Convert multi-turn messages into ID sequences. + + Args: + messages (List[List[Dict[str, Any]]]): multi-turn messages. + + Returns: + List[int]: ID sequences + """ + + spliced_message = self.tokenizer.apply_chat_template( + request, + tokenize=False, + split_special_tokens=False, + add_special_tokens=False, + return_tensors="pd", + ) + req_id = None + tokens = self.tokenizer.tokenize(spliced_message) + if isinstance(request, dict): + req_id = request.get("request_id", None) + token_ids = self.tokenizer.convert_tokens_to_ids(tokens) + data_processor_logger.info(f"req_id:{req_id}, tokens:{tokens}, token_ids: {token_ids}") + return token_ids + + def ids2tokens(self, token_id, task_id): + """ + token ids to strings + + Args: + token_ids (List[int]): token ids + task_id (str): task id + + Returns: + List[str]: strings + """ + if envs.FD_USE_HF_TOKENIZER: + if task_id not in self.decode_status: + # history token ids & history token strings & befer decode str + self.decode_status[task_id] = [[], [], ""] + + previous_token_ids = self.decode_status[task_id][0] + decode_str = self.tokenizer.batch_decode( + [previous_token_ids + token_id], + skip_special_tokens=True, + clean_up_tokenization_spaces=False, + ) + if isinstance(decode_str, list) and len(decode_str): + new_str = decode_str[0].replace(self.decode_status[task_id][2], "", 1) + self.decode_status[task_id][1].append(new_str) + self.decode_status[task_id][2] = decode_str[0] + else: + new_str = "" + self.decode_status[task_id][0] += token_id + return new_str + else: + if task_id not in self.decode_status: + # prefix offset & read offset & history token ids & history token strings + self.decode_status[task_id] = [0, 0, [], ""] + + prefix_offset = self.decode_status[task_id][0] + read_offset = self.decode_status[task_id][1] + previous_token_ids = self.decode_status[task_id][2] + previous_texts = self.decode_status[task_id][3] + decode_str, prefix_offset, read_offset = self.tokenizer.decode_token( + previous_token_ids + token_id, prefix_offset, read_offset + ) + self.decode_status[task_id][0] = prefix_offset + self.decode_status[task_id][1] = read_offset + self.decode_status[task_id][2] += token_id + self.decode_status[task_id][3] += decode_str + + return decode_str, previous_token_ids, previous_texts + + def _load_tokenizer(self): + """ + load tokenizer + + Returns: + tokenizer (AutoTokenizer) + """ + if envs.FD_USE_HF_TOKENIZER: + from transformers import AutoTokenizer + + return AutoTokenizer.from_pretrained(self.model_name_or_path, use_fast=False) + else: + from paddleformers.transformers import AutoTokenizer + + return AutoTokenizer.from_pretrained(self.model_name_or_path, padding_side="left", use_fast=True) + + def clear_request_status(self, task_id): + """ + clear request status + + Args: + task_id (str): task id + + Returns: + results_all (str): all token strings + """ + results_all = "" + if task_id in self.decode_status: + if envs.FD_USE_HF_TOKENIZER: + results_all = self.decode_status[task_id][2] + else: + results_all = "".join(self.decode_status[task_id][3]) + del self.decode_status[task_id] + return results_all + + def get_pad_id(self): + """ + get pad_token_id, if not pad_token_id, use eos_token + + Returns: + int: pad_token_id + """ + if isinstance(self.tokenizer, (LlamaTokenizer, Llama3Tokenizer)) and not self.tokenizer.pad_token_id: + return self.tokenizer.eos_token + return self.tokenizer.pad_token_id + + def pad_batch_data( + self, + insts, + pad_id=0, + return_seq_len=False, + return_array=True, + pad_style="right", + ): + """Pad the instances to the max sequence length in batch.""" + if len(insts) == 0: + padded_insts = np.array([[]], dtype=np.int64) if return_array else [[]] + if return_seq_len: + seq_len = np.array([], dtype=np.int64) if return_array else [] + return padded_insts, seq_len + return padded_insts + + max_len = max(map(len, insts)) + if pad_style == "left": + padded_insts = [[pad_id] * (max_len - len(inst)) + list(inst) for inst in insts] + else: + padded_insts = [list(inst) + [pad_id] * (max_len - len(inst)) for inst in insts] + if return_array: + padded_insts = np.array(padded_insts, dtype=np.int64).reshape([-1, max_len]) + + if return_seq_len: + seq_len = [len(inst) for inst in insts] + if return_array: + seq_len = np.array(seq_len, dtype=np.int64).reshape(-1, 1) + return padded_insts, seq_len + return padded_insts + + def update_stop_seq(self, stop_sequences): + """ + Update stop sequences from request. + """ + stop_seqs = [] + for seq in stop_sequences: + if seq != self.tokenizer.eos_token_id: + stop_seqs.append(self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(seq))) + stop_seqs, stop_seqs_len = self.pad_batch_data(stop_seqs, pad_id=-1, return_seq_len=True, return_array=False) + data_processor_logger.debug(f"processed stop_seqs: {stop_seqs}, {stop_seqs_len}") + return stop_seqs, stop_seqs_len From bb1e6db3ae598794a24d985acccb72ed0a1d0a43 Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Thu, 14 Aug 2025 12:18:52 +0800 Subject: [PATCH 02/16] add qwen25-vl processor --- ...ssor_adaptive.py => image_preprocessor.py} | 194 ++++++------- .../image_preprocessor/__init__.py | 20 -- .../get_image_preprocessor.py | 34 --- fastdeploy/input/mm_processor/process.py | 10 +- .../input/mm_processor/process_video.py | 69 ++++- .../mm_processor/utils/Roboto-Regular.ttf | Bin 146004 -> 0 bytes .../input/mm_processor/utils/__init__.py | 15 - .../input/mm_processor/utils/io_utils.py | 264 ------------------ .../mm_processor/utils/render_timestamp.py | 103 ------- .../input/mm_processor/utils/video_utils.py | 83 ------ 10 files changed, 151 insertions(+), 641 deletions(-) rename fastdeploy/input/mm_processor/{image_preprocessor/image_preprocessor_adaptive.py => image_preprocessor.py} (91%) delete mode 100644 fastdeploy/input/mm_processor/image_preprocessor/__init__.py delete mode 100644 fastdeploy/input/mm_processor/image_preprocessor/get_image_preprocessor.py delete mode 100644 fastdeploy/input/mm_processor/utils/Roboto-Regular.ttf delete mode 100644 fastdeploy/input/mm_processor/utils/__init__.py delete mode 100644 fastdeploy/input/mm_processor/utils/io_utils.py delete mode 100644 fastdeploy/input/mm_processor/utils/render_timestamp.py delete mode 100644 fastdeploy/input/mm_processor/utils/video_utils.py diff --git a/fastdeploy/input/mm_processor/image_preprocessor/image_preprocessor_adaptive.py b/fastdeploy/input/mm_processor/image_preprocessor.py similarity index 91% rename from fastdeploy/input/mm_processor/image_preprocessor/image_preprocessor_adaptive.py rename to fastdeploy/input/mm_processor/image_preprocessor.py index c86d8046e3..6508755926 100644 --- a/fastdeploy/input/mm_processor/image_preprocessor/image_preprocessor_adaptive.py +++ b/fastdeploy/input/mm_processor/image_preprocessor.py @@ -73,6 +73,70 @@ ] +def round_by_factor(number: int, factor: int) -> int: + """Returns the closest integer to 'number' that is divisible by 'factor'.""" + return round(number / factor) * factor + + +def ceil_by_factor(number: int, factor: int) -> int: + """Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'.""" + return math.ceil(number / factor) * factor + + +def floor_by_factor(number: int, factor: int) -> int: + """Returns the largest integer less than or equal to 'number' that is divisible by 'factor'.""" + return math.floor(number / factor) * factor + + +def smart_resize( + height: int, + width: int, + factor: int = IMAGE_FACTOR, + min_pixels: int = MIN_PIXELS, + max_pixels: int = MAX_PIXELS, +): + """ + Rescales the image so that the following conditions are met: + + 1. Both dimensions (height and width) are divisible by 'factor'. + + 2. The total number of pixels is within the range ['min_pixels', 'max_pixels']. + + 3. The aspect ratio of the image is maintained as closely as possible. + """ + if max(height, width) / min(height, width) > MAX_RATIO: + if height > width: + new_width = max(factor, round_by_factor(width, factor)) + new_height = floor_by_factor(new_width * MAX_RATIO, factor) + else: + new_height = max(factor, round_by_factor(height, factor)) + new_width = floor_by_factor(new_height * MAX_RATIO, factor) + + data_processor_logger.info( + f"absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(height, width) / min(height, width)},\ + resize to {max(new_height, new_width) / min(new_height, new_width)}" + ) + + height = new_height + width = new_width + + h_bar = max(factor, round_by_factor(height, factor)) + w_bar = max(factor, round_by_factor(width, factor)) + if h_bar * w_bar > max_pixels: + beta = math.sqrt((height * width) / max_pixels) + h_bar = floor_by_factor(height / beta, factor) + w_bar = floor_by_factor(width / beta, factor) + elif h_bar * w_bar < min_pixels: + beta = math.sqrt(min_pixels / (height * width)) + h_bar = ceil_by_factor(height * beta, factor) + w_bar = ceil_by_factor(width * beta, factor) + + if min_pixels > h_bar * w_bar or h_bar * w_bar > max_pixels: + raise ValueError(f"encounter invalid h_bar: {h_bar}, w_bar: {w_bar}") + + return h_bar, w_bar + + def is_scaled_image(image: np.ndarray) -> bool: """ Checks to see whether the pixel values have already been rescaled to [0, 1]. @@ -125,13 +189,11 @@ def make_batched_videos(videos) -> List[VideoInput]: raise ValueError(f"Could not make batched video from {videos}") -class AdaptiveImageProcessor(BaseImageProcessor): +class ImageProcessor(BaseImageProcessor): r""" Constructs a adaptive image processor that dynamically resizes images based on the original images. Args: - do_resize (`bool`, *optional*, defaults to `True`): - Whether to resize the image's (height, width) dimensions. resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`): Resampling filter to use when resizing the image. do_rescale (`bool`, *optional*, defaults to `True`): @@ -168,7 +230,6 @@ class AdaptiveImageProcessor(BaseImageProcessor): def __init__( self, - do_resize: bool = True, resample: PILImageResampling = PILImageResampling.BICUBIC, do_rescale: bool = True, rescale_factor: float = 1 / 255, @@ -185,7 +246,6 @@ def __init__( ) -> None: """init""" super().__init__(**kwargs) - self.do_resize = do_resize self.resample = resample self.do_rescale = do_rescale self.rescale_factor = rescale_factor @@ -232,7 +292,6 @@ def get_smarted_resize(self, height, width, min_pixels=None, max_pixels=None): def _preprocess( self, images: Union[ImageInput, VideoInput], - do_resize: bool = True, resample: PILImageResampling = None, do_rescale: bool = True, rescale_factor: float = 1 / 255, @@ -242,7 +301,6 @@ def _preprocess( do_convert_rgb: bool = False, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, - predetermined_grid_thw=None, ): """ Preprocess an image or batch of images. Copy of the `preprocess` method from `CLIPImageProcessor`. @@ -253,8 +311,6 @@ def _preprocess( If pixel values range from 0 to 1, set `do_rescale=False`. vision_info (`List[Dict]`, *optional*): Optional list of dictionaries containing additional information about vision inputs. - do_resize (`bool`, *optional*, defaults to `self.do_resize`): - Whether to resize the image. resample (`PILImageResampling`, *optional*, defaults to `self.resample`): Resampling filter to use if resizing the image. This can be one of the `PILImageResampling` enums. do_rescale (`bool`, *optional*, defaults to `self.do_rescale`): @@ -301,28 +357,17 @@ def _preprocess( input_data_format = infer_channel_dimension_format(images[0]) height, width = get_image_size(images[0], channel_dim=input_data_format) - resized_height, resized_width = height, width - processed_images = [] + resized_height, resized_width = smart_resize( + height, + width, + factor=self.patch_size * self.merge_size, + min_pixels=self.min_pixels, + max_pixels=self.max_pixels, + ) - if predetermined_grid_thw is not None: - assert len(predetermined_grid_thw) == len( - images - ), f"len(predetermined_grid_thw) {len(predetermined_grid_thw)} == len(images) {len(images)}" - - for img_idx, image in enumerate(images): - if do_resize: - if predetermined_grid_thw is not None: - (resized_height, resized_width) = predetermined_grid_thw[img_idx] - resized_height *= self.patch_size - resized_width *= self.patch_size - else: - resized_height, resized_width = smart_resize( - height, - width, - factor=self.patch_size * self.merge_size, - min_pixels=self.min_pixels, - max_pixels=self.max_pixels, - ) + processed_images = [] + for image in images: + if height != resized_height or width != resized_width: image = image.astype("uint8") # TODO : 需要手动加上,否则多除255 导致结果会出错 # 直接fromarray,不要靠paddleformers里面的 image = Image.fromarray(image) @@ -332,7 +377,7 @@ def _preprocess( resample=resample, data_format=input_data_format, ) - + if do_rescale and do_normalize: image_mean = np.array(image_mean, dtype=np.float32) * (1.0 / rescale_factor) image_std = np.array(image_std, dtype=np.float32) * (1.0 / rescale_factor) @@ -363,7 +408,7 @@ def _preprocess( if data_format == ChannelDimension.LAST: patches = patches.transpose([0, 3, 1, 2]) - + grid_t, channel = patches.shape[:2] grid_t = grid_t // self.temporal_patch_size @@ -398,9 +443,8 @@ def _preprocess( def preprocess( self, - images: ImageInput, + images: ImageInput = None, videos: VideoInput = None, - do_resize: bool = True, size: Optional[Union[int, List[int]]] = None, resample: PILImageResampling = None, do_rescale: bool = True, @@ -412,7 +456,6 @@ def preprocess( return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, - predetermined_grid_thw=None, ): """ Args: @@ -422,8 +465,6 @@ def preprocess( videos (`VideoInput`): Video to preprocess. Expects a single or batch of videos with pixel values ranging from 0 to 255. If passing in videos with pixel values between 0 and 1, set `do_rescale=False`. - do_resize (`bool`, *optional*, defaults to `self.do_resize`): - Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): Size of the image after resizing. Shortest edge of the image is resized to size["shortest_edge"], with the longest edge resized to keep the input aspect ratio. @@ -461,7 +502,6 @@ def preprocess( - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. """ - do_resize = do_resize if do_resize is not None else self.do_resize size = size if size is not None else self.size resample = resample if resample is not None else self.resample do_rescale = do_rescale if do_rescale is not None else self.do_rescale @@ -481,14 +521,9 @@ def preprocess( if images is not None: pixel_values, vision_grid_thws = [], [] - for img_idx, image in enumerate(images): - if predetermined_grid_thw is not None: - predetermined_grid_thw_one = [predetermined_grid_thw[img_idx]] - else: - predetermined_grid_thw_one = None + for image in images: patches, image_grid_thw = self._preprocess( image, - do_resize=do_resize, resample=resample, do_rescale=do_rescale, rescale_factor=rescale_factor, @@ -498,10 +533,10 @@ def preprocess( data_format=data_format, do_convert_rgb=do_convert_rgb, input_data_format=input_data_format, - predetermined_grid_thw=predetermined_grid_thw_one, ) pixel_values.extend(patches) vision_grid_thws.append(image_grid_thw) + pixel_values = np.array(pixel_values) vision_grid_thws = np.array(vision_grid_thws) data = { @@ -514,7 +549,6 @@ def preprocess( for images in videos: patches, video_grid_thw = self._preprocess( images, - do_resize=do_resize, resample=resample, do_rescale=do_rescale, rescale_factor=rescale_factor, @@ -524,10 +558,10 @@ def preprocess( data_format=data_format, do_convert_rgb=do_convert_rgb, input_data_format=input_data_format, - predetermined_grid_thw=predetermined_grid_thw, ) pixel_values.extend(patches) vision_grid_thws.append(video_grid_thw) + pixel_values = np.array(pixel_values) vision_grid_thws = np.array(vision_grid_thws) @@ -536,68 +570,4 @@ def preprocess( "video_grid_thw": vision_grid_thws, } - return BatchFeature(data=data, tensor_type=return_tensors) - - -def round_by_factor(number: int, factor: int) -> int: - """Returns the closest integer to 'number' that is divisible by 'factor'.""" - return round(number / factor) * factor - - -def ceil_by_factor(number: int, factor: int) -> int: - """Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'.""" - return math.ceil(number / factor) * factor - - -def floor_by_factor(number: int, factor: int) -> int: - """Returns the largest integer less than or equal to 'number' that is divisible by 'factor'.""" - return math.floor(number / factor) * factor - - -def smart_resize( - height: int, - width: int, - factor: int = IMAGE_FACTOR, - min_pixels: int = MIN_PIXELS, - max_pixels: int = MAX_PIXELS, -): - """ - Rescales the image so that the following conditions are met: - - 1. Both dimensions (height and width) are divisible by 'factor'. - - 2. The total number of pixels is within the range ['min_pixels', 'max_pixels']. - - 3. The aspect ratio of the image is maintained as closely as possible. - """ - if max(height, width) / min(height, width) > MAX_RATIO: - if height > width: - new_width = max(factor, round_by_factor(width, factor)) - new_height = floor_by_factor(new_width * MAX_RATIO, factor) - else: - new_height = max(factor, round_by_factor(height, factor)) - new_width = floor_by_factor(new_height * MAX_RATIO, factor) - - data_processor_logger.info( - f"absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(height, width) / min(height, width)},\ - resize to {max(new_height, new_width) / min(new_height, new_width)}" - ) - - height = new_height - width = new_width - - h_bar = max(factor, round_by_factor(height, factor)) - w_bar = max(factor, round_by_factor(width, factor)) - if h_bar * w_bar > max_pixels: - beta = math.sqrt((height * width) / max_pixels) - h_bar = floor_by_factor(height / beta, factor) - w_bar = floor_by_factor(width / beta, factor) - elif h_bar * w_bar < min_pixels: - beta = math.sqrt(min_pixels / (height * width)) - h_bar = ceil_by_factor(height * beta, factor) - w_bar = ceil_by_factor(width * beta, factor) - - if min_pixels > h_bar * w_bar or h_bar * w_bar > max_pixels: - raise ValueError(f"encounter invalid h_bar: {h_bar}, w_bar: {w_bar}") - - return h_bar, w_bar + return BatchFeature(data=data, tensor_type=return_tensors) \ No newline at end of file diff --git a/fastdeploy/input/mm_processor/image_preprocessor/__init__.py b/fastdeploy/input/mm_processor/image_preprocessor/__init__.py deleted file mode 100644 index c11444e675..0000000000 --- a/fastdeploy/input/mm_processor/image_preprocessor/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -""" -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -from .get_image_preprocessor import get_image_preprocessor -from .image_preprocessor_adaptive import AdaptiveImageProcessor - -__all__ = ["get_image_preprocessor", "AdaptiveImageProcessor"] diff --git a/fastdeploy/input/mm_processor/image_preprocessor/get_image_preprocessor.py b/fastdeploy/input/mm_processor/image_preprocessor/get_image_preprocessor.py deleted file mode 100644 index 0ff6f7d1ed..0000000000 --- a/fastdeploy/input/mm_processor/image_preprocessor/get_image_preprocessor.py +++ /dev/null @@ -1,34 +0,0 @@ -""" -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -"""get image preprocessor""" - -from fastdeploy.utils import data_processor_logger - -from .image_preprocessor_adaptive import AdaptiveImageProcessor - - -def get_image_preprocessor(args): - """ - get_image_preprocessor from args - """ - - if args.vision_model_name_or_path is None: - return None - - data_processor_logger.info("use AdaptiveImageProcessor") - image_preprocess = AdaptiveImageProcessor.from_pretrained(args.vision_model_name_or_path) - return image_preprocess diff --git a/fastdeploy/input/mm_processor/process.py b/fastdeploy/input/mm_processor/process.py index bcdff21313..69e612a449 100644 --- a/fastdeploy/input/mm_processor/process.py +++ b/fastdeploy/input/mm_processor/process.py @@ -27,7 +27,7 @@ from paddleformers.transformers import AutoTokenizer -from .image_preprocessor.image_preprocessor_adaptive import AdaptiveImageProcessor +from .image_preprocessor import ImageProcessor from .process_video import read_video_decord IDS_TYPE_FLAG = {"text": 0, "image": 1, "video": 2, "audio": 3} @@ -60,7 +60,7 @@ def __init__( self.model_name_or_path = tokenizer_name self._load_tokenizer() self.tokenizer.ignored_index = -100 - self.image_preprocessor = AdaptiveImageProcessor.from_pretrained(image_preprocessor_name) + self.image_preprocessor = ImageProcessor.from_pretrained(image_preprocessor_name) # Convolution sizes for patch aggregation self.spatial_conv_size = spatial_conv_size @@ -266,7 +266,7 @@ def _add_image(self, img, outputs: Dict) -> None: # image_mean=image_mean, # image_std=image_std, # do_rescale=do_rescale, - predetermined_grid_thw=np.array([[patches_h, patches_w]]), + # predetermined_grid_thw=np.array([[patches_h, patches_w]]), do_convert_rgb=True, input_data_format=ChannelDimension.LAST, ) @@ -299,7 +299,7 @@ def _add_video(self, frames, outputs: Dict) -> None: # image_mean=image_mean, # image_std=image_std, # do_rescale=do_rescale, - predetermined_grid_thw=np.array([[patches_h, patches_w]] * num_frames), + # predetermined_grid_thw=np.array([[patches_h, patches_w]] * num_frames), do_convert_rgb=True, input_data_format=ChannelDimension.LAST, ) @@ -319,7 +319,7 @@ def _add_video(self, frames, outputs: Dict) -> None: outputs["cur_position"] = np.max(pos_ids) + 1 def _load_and_process_video(self, url: str, item: Dict) -> List[Image.Image]: - reader, meta = read_video_decord(url, save_to_disk=False) + reader, meta = read_video_decord(url) frames = [] for i in range(meta["num_of_frame"]): diff --git a/fastdeploy/input/mm_processor/process_video.py b/fastdeploy/input/mm_processor/process_video.py index e82456bfcf..1fdfea1562 100644 --- a/fastdeploy/input/mm_processor/process_video.py +++ b/fastdeploy/input/mm_processor/process_video.py @@ -16,15 +16,74 @@ import io import os -import random +import decord +# from .utils.io_utils import EXTRACTED_FRAME_DIR, get_filename +# from .utils.video_utils import VideoReaderWrapper +from tempfile import NamedTemporaryFile as ntf -import numpy as np -from PIL import Image +try: + # moviepy 1.0 + import moviepy.editor as mp +except: + # moviepy 2.0 + import moviepy as mp from fastdeploy.utils import data_processor_logger -from .utils.io_utils import EXTRACTED_FRAME_DIR, get_filename -from .utils.video_utils import VideoReaderWrapper + +def is_gif(data: bytes) -> bool: + """ + check if a bytes is a gif based on the magic head + """ + return data[:6] in (b"GIF87a", b"GIF89a") + + +class VideoReaderWrapper(decord.VideoReader): + """ + Solving memory leak bug + + https://github.com/dmlc/decord/issues/208 + """ + + def __init__(self, video_path, *args, **kwargs): + with ntf(delete=True, suffix=".gif") as gif_file: + gif_input = None + self.original_file = None + if isinstance(video_path, str): + self.original_file = video_path + if video_path.lower().endswith(".gif"): + gif_input = video_path + elif isinstance(video_path, bytes): + if is_gif(video_path): + gif_file.write(video_path) + gif_input = gif_file.name + elif isinstance(video_path, io.BytesIO): + video_path.seek(0) + tmp_bytes = video_path.read() + video_path.seek(0) + if is_gif(tmp_bytes): + gif_file.write(tmp_bytes) + gif_input = gif_file.name + + if gif_input is not None: + clip = mp.VideoFileClip(gif_input) + mp4_file = ntf(delete=False, suffix=".mp4") + clip.write_videofile(mp4_file.name, verbose=False, logger=None) + clip.close() + video_path = mp4_file.name + self.original_file = video_path + + super().__init__(video_path, *args, **kwargs) + self.seek(0) + + def __getitem__(self, key): + frames = super().__getitem__(key) + self.seek(0) + return frames + + def __del__(self): + if self.original_file and os.path.exists(self.original_file): + os.remove(self.original_file) def read_video_decord(video_path): diff --git a/fastdeploy/input/mm_processor/utils/Roboto-Regular.ttf b/fastdeploy/input/mm_processor/utils/Roboto-Regular.ttf deleted file mode 100644 index 7e3bb2f8ce7ae5b69e9f32c1481a06f16ebcfe71..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 146004 zcmb@v2VfL8(?2{ad+uON$H1LAY>MfWC3h031oSW{sOP-O*@UYa+3X zh(|PQ->#F#SC?)Obx$XXINhvM=Z4Rg-K;X~U-`jO6UuosEl$l6d1-Zm@3U3hJ z!)#QOh+V*akGR3H!_EZ9eof?goX9dgZp+e`D`8HtMx;G_fhYUa6b@s6(D@gNDY%Zd=Yu5EUFoLlX&~^;&DO`-s;9~S1orU{8E=C{YV)Q8~+5dvRg!?rv zMwh@ZM%QpLx`~U?ZTbc7U3v!h1ujM=NU1^Mad?6@pus6^3hL#o!i)ObJ$+ zg#xe4s=%$rs==+z>cDNl8pCbIn!|0y+Q99=I=~G_|75H?12m%v@l!2v-6Toq>oxGLTgpThlI zdxi1Bb$B(?I;_U0)}b{_>yYX_tb?n6Y8_PFv<|FZ z);gei0c-y%rnO%s)7rPvr`A4|O>6IprZv8TX^pFpWbIXGogS~~^zuyzbKtsR1s ztnEWgYrBxi*0#k>Ya4s0wY5FT+RAQPTiUx=Tj0KVG1J=2W?GxtMp&B!nbyWZBdm?A zrnO0;Y(^|iPFo<0~F@GXQ5e_O2E{=iUNW^xX!0uL*0QoQT9~MXLRtZX`CrT zLkmEvokNWpY1KH#v@b0f>s8kv50wCYz>nU6Zcm2JJ4NT|bGilIy<|CAE*8v6unMdO z3uB#FPu7>c!zQtXYzf=U9P9ym#!Mc_tMf>HoWIXMdnR9+<_p*&G{`Lq1p z;6|>ZRJ2~v$wfC6eN^-bI6yXUo4+l$Ex*lX3kC-&*=o8tFv_;ow!@YL4ji!^e}x0x zTpY-ijRW!E00RdYIPe@CfFPK^n>W~F^Idk^JOkOd1h@$J67V_TQ^3c7bAb2Fe5QwK zy!`X!UG9bZmM~L$ziiCvz4-3Mw=X_?ap1+S7du~Ud9nUQ0$5v+v=qMZMj3xSfBF0w z(etB#RKQNa=I5K>t_Cay%tfk3fH2@ah;F{PdE(}fo2zf~n=5awx;En4$ZJDk(+#>d z@LK!i>@kxGZeemX1;h=Y_S4#B29Dh`V@Xr!a! zm^dy@h?8_yoQC~z4wlGy^!yL#L;46d-6!H*Wz`8&d_Z4`59w?4-;c#7uu(41Mfyg3 zE=hbr-@>B*4w~(2`d(as&G!TSC@zZ6=(6}md@H^a-@}smLHr0?@QS!Bexf_D|1#)T zafR+d$375O=^;I$$Mi&8qu<2O;yV2 z%tP6Wa-9@Xviz(7E655-Lt3P#^pI;=8CI5+W93-|Wg|g%RZ^BCY$tD471m>QRs&jh zlk}0ka*13j{bUZgS#FVQSZmfsu9B-^U-`>ia+zGty0ES+LKc>RvWP4ytxa^VJrSn9ZN3&JrE(s_DNm~qvGdy=nl!G(KKh_Tm& z`(UoL#)bQ4!~Iw>InEWHgR04fF1%nCZBP)tvYIm-_PVz$%x7I&ruhihYE`gE<7)l z6k}X?J}N3ATzGyeCfqa^Ae+eN3NJ{-`6CxzhzfIeS%DP7UHr$~qbMwNCp{q)0KMeI zOOP-0loKyW-h6-yFGcxyxC<{$#V`+WrU|72ypRholht0VEEU2$!P<%oYx-5ON%rGBXA zztt^^8~^y!>a*{ZlQnsQ9N8*=sczTpmJgAs-{q zHnHIP-zDzvA+~H%_@>w_8r*e$Q>yq3hPfJsTNi} z)ogSaVuw&4txHC^o^@-Ka=PvNk~9C2WTU38bT;Y)tPk=UtNUVa#Pvt6w%2lqN62X8 zue8(%;EEoVwiC6W*6`c8%Fmv&s#lz*Y9!n;NYzC@Jrdzcrs-?0&VZ%D?2s||aG{u1f$xP zYq{44Ubnmpde`xu?S0HAk58men$I`B(l^w1u@A&)sxA1?@Va-9yy6VNR)=e(A@F?o~n`R41CZ*V>{f0_K-@;@zLDPS*fyuhagsbFxy=z>QJepV<) zp|XXV6&hA(R-to+zAW@}p~rzTFg$Q`;Hkm|3U?^Hw@B+Edy3pBTBGRvqQ6)RS<6{t ztaGfptT${GZR>)9gIWcx4oWZ9qS&TlSL~(i?d+@VY4&gIKNY9qfyG-DA5%QF_!q^W z2m1#X4=x)V5j;A0VeppV%fUB;pNE8oObba2`7-2QiDD&2mq;y{tK`IzmrMDS8eQs# z(q&5bDSf8&?a*4GiJ{laR4g;G%x7g@maSHHR@wKo|RTsx==Z|a@)$&EAOs+sY?DTU8>Bj@T5Q>XWOdR=-oDV2uVf`qr3MGk?wcHAmK5RrAAI9<`d*npo?@+9hiDt^IBt|2hNf z9H{f8Zo#@k>YlA9>P6Q(RPSN^()Hh}|8ZEkumufdg9#1(XgH$bca2InifOc}(Tm2> zO{mG%ranzaH9g$)&t@f?HElM$+016^npbH)qxt6MCz}7&qE3s=E#7N!v!$(Nx0XX% zPH8#6<))TDwF+$2rPZQVTU#A!b-Q(e)?Hg~Y~$HxXq%~RmbZDo&C|AiZEbDi+wN%l zW4k)-*0j6czI6M#?c2AX)&6w*uiM{h|5pd!4&6HJ=z zxL^4A@Ee`OI&bWJw@a-qGrN4zHCNY;U5|D>-}S4mPyhLf;1S*tfe~dQnnuJ#OpI6_ zu`l9S#QPDCBLgDqL`Fo$M~;eI5Y;fMPt?q)^-(*b_C=*dor(G=>g%Ws1?H1Cl zeYc6-9Nms|d$-%i-7a*y-0fDk``!MGX3^f!xuXk52S>M#9vOWu`g!;8?lIldy1(qv zzQ>v#cY2oU*}Lban4&TLVm^%Ju`Od`Vu#1hie2A}_bS}0U9Xs4bK?reb&5-kyAq!( zzES+x`04SR;*Z9kkAKm-X7A;_|LoJL&(l6n`#$Jbt>3_Y3;TW2zi9u3{df2OYe3xr zy$8HMkPR$2u;;+J1CI~9J*d>6HiO0wT0ZFE;EIE14o(~Va!Ao3ZHM$9vUGJMg4 z`fGIe(X&S%9sOubnK9kREF5!U%+s-z#?~7#QGCwPyFhw3UAGR>-)FsyglyiPu_m=PQ`cLdT04N&n7jU)OFILNv9_F zn4C5x$CL(BMo!r}<(sLwrgoZ|I`!wNFQ%287C&vyG{>}i(<@Jpo<3{(h3S{4-43-fEw zpFMxk{O1co7K~i5W5Kh9r5A#|3x_W}x=1W)w5Z>rZHo>s`e@PRMGqFs#RV3ZTijrA z_~O2cCoEpDc=O@|i{D@T!{U2O*pfU;N-U|hr0tT}C8L(iTC#S@-X&+2Tv&2z$)8Jo zm)e$ATiRl2^wObArz~B*bm!6|OFvn9b?KvJ#yZq&fTq}yNsJ5cTis%(XS4>&4Y{kwMM^}8Z;>wCgD~*)}Rt{eI z!^(TBXjSf2!K-SlYPqWWs$r|9uUfe(an*@cU#z;m>i5-Ns|&9#zq-NdPOJN@9#psvHh%5cwR6{QT${S~?AmYF-dX!%UCwpI)>U2CVqN!j!`4k*w_;u5x)bZZSa*Hh zAM3r>7hYd}eS`Jk>-(;sxPIaKt?LhO@Yv96!{`mOH>};TXTzBd7dG79@N}c!M%%_p z8=Gv5*f?FH*_&9==|HaFWGwRzCy$(xsM-o80)i`SOITgq)|uqAv;pDp9IBy3r~<^EQ- zHTTw#tu?o{*&4HTs-rII@+qG?vw|i_axIK7#_3bUTM{ggtefsvVx8K_S=MJA8MR!!((P&4P9RqfZ z*^#hg!;X|4=XPA$@$-&5J09(Lx|8qp-Wjm7(9U8zOYf|-v)0arJ6rDTv2(=E89P_) zOxk&3=a)Ob-+684FFPOZ{A-urF59lkyPEEb+%box1zn?oW4LOe~OSODvgKC$U{(*TkNQ zBNHbkE=b&zxIgiH;`fOeNtBc)sYFt(q=rc?le#4JPa2*yJ!xf9a?+Wk3rV+=o+kSx z=S?n>Trs&xa);#Z$-|PTC9g={ot%<7D8h2-1G_xJEUdH0msQ+H33J#F^H>>0Ud z)}DoXw(dE+=i@zB_B`5a?9IP7bZ`B=o%Z(KJ9O{Zy>s_&+`E78`Muxoy}Q@k7qBm6 zU#)#@_r>lTy>HIG#rwAJJF@SSeOLB9c32#R9AzC19N~_Bj){&{jsuPl96vf9q{x&4 zDP>Z^Qo>V)rz}X>l5!~J!;~LW?x(WUys0Hq>!h|zjY}Pqnvl9a^=RrhslTK?-|xS_ z`2HIETkr3=f5iTo`&aMZv;Xw|3;S>He|8|pfzSiZ4n!Rod|>i{r3ZE#IC9{V13w*j zbkK6J(7|#C8ypNj*!SRsgYyq=I=KJf`GemdynE0*l>1PLL$wdJJrsLr)S;P&wjMfk z=)*%l9=d;+AI^KY9HKg ziXE$VtmUyD$3`5Rd2G$GJ;zQTyKpT1*z@E5$BQ4YalG~MnB$|4&pN*5_}=619{=X} zo#QW01e^#tQS(Hr6FpCiJTdFU+7tUuym#W;6W34NKk@9O$I09$ttUfIRz6w#WTTU< zPIfxk?PT1^fhR|voOp8D$+;(&o?Lr!-^q7RUObt8^1;bJPx+q8bE?Ftnx`6^YH_N= zsmN1KHcW@>eHJ~?>T+?^jD`ZoxXMYuQR@93Y-Z& zQ}0a2Gd<1>KQsNz$}@>)4xM@T%r{{zSFc*RV%E$V)2B_HGI`QFZ@)Eh!uWAx$BZ5| za>VdqLx&6=G;l!wetrA&j*sgV8`HB#_vmg>kr7?HbPn&-p?$lyZCbZ#*`j%~rX{WZ z-d<%`ijQ|Ad!ydoWy?^Cw+~RCvSpaV(#YYV10C%`ZFq$k5!9+=? zLk2fd5F4Km=Z=U(7I+HJB8yg??5#RPMcA4o#OS9G68_4aGroq(QOCJ32X7P};bH~&(0SYZwdfY^XuHip?GqB>Q%D3Oo3OwXrV*pj+(^_U((dRLY7eqU^hQ=G zSY8bZk7)$V*Nrh-Gn8OU^QT^bxUTjz=E9;P9JZJ~kkXSZrWIkUhv28JT9DDXdc4gOC@ehADP7yF*GCo82iY z0);wkv%@1&Ipd9D@M5+&;v&*)&O}GT@SZ3xjXT5qb-I)gRS;r0Vx%)BOvfNhBFxhnHrbiFq${;2V*FLi5p1|1 zunsgk|G*_QWSF-R<{9P{=8GK|XbTmTijboW+m{&a_ho)8Fa>#{1_(}LlT*CH0zoZu zat0m6>n1Oz!X{^ifpV2BTSh3-S@F(UHRv1_vEP@Fzjl$Lq53OZrb!BK8*0xI>kbiU zz$PiIZD6MoE_~!rnX39+{FSPr|S!$i|)$ zYYB-7O^9$PAycAd_pb#(6~Qgy>`h}-3Un0ww?I%_7h8;@S4=2Sn}5>;MBCzGRcgwY z!bNZjlfkSW)dQz2z7B7D?}iQ^d&A6_`c&T;V^J|4_J$6Y&&lT|_9nIh{S)Hsy&!X8 z?IZdI_KA!|-i|PPtV7xx2Bt`Ai2hW7p%P6}sBI`}(+X18F0_3&bW2rzTS7u(TS}M= ziH(a@_l<+l(-T}#_Qs7>H+4VJ#FpR)i;asxiYAddaoI8;A)$#q))o(Sf$D-+o$PA! zL6j;bJSrl=H{Kp^2T#Mo5@J#9KwDg7U_xY^<}t_tX;ik1k)@AZ>WHge9~{>Q4mv`w z7<(^gfa;I0hxL6upbt`I53{#a#evwCifZ@?E$vO>kwQUiyhA|lf^6~F5ut>H+G|bm zkBN{)W+EHfRA-*xUt39vx;&p_g*v5+IF^?&xhs zaIS=a1Y6An=&9~du`(pCOAH1A>_BL0(@h-)nin-tv&d{GH98aupG9wiu^}~wW=$hIc6Y7cl z{{B>g=YLliL_xQv!%GvQv{8tsBJ7`w!X^mSKrK}P@VK)YEmc(vFdBV2At5dnHdXf= zs)zc9Qoa|tLds6c=HzO-CmUr8c<<<8y=ym?*k#w!sCMofAH~)Q?z{$>5qz^ zfkJc9oo1Sffv(g})`7psPs_P0T6YYUnR;$)Xe6+v3Sy9|sRCC|@^ST2-`BLhD|=^~ zdc7KH&&*5>^!Cgg)m;knh50K34U`yS^9Q?X>81-oO>m!3D}{N4xDr4j{ZY+Y2?;)S z=a6Do>i-B)7-@5oBB7mM4RE}TrcX%l^m}uR=j-YH^aHB%qm}(KeU*!=J9sy8_%u@X z3mVYjrSwoawC~&Jl(<1Ev^kM&IMhL?maDTm2gw48_h2+00mp}GWu_9k`PN5wX=iq? zhlWSIjY1Ts&Vd>JSfxJ)=?PL0s92|E&VimWG&Is_a=oo&W2){_Q$uaG{;;zeF<4s| zD1qCc4UpP1L@T)j7(D%BF*HLDDZLX}0Jc;|We>yVxBJ_$`?-#DB4l?;TqleHGB~16 zV9iL(B+|@Z3M+jD)^H5+fbayH%|92S5^MpOS~zB?t|(n`b{&RM+!EqSrD`xEG$Fy6 zPFdl;JfT%5FhR}HylVt{gQms9TzuR9H=w zvdW#3S?*Yrt0+it7o0|Y-&m{?Io3_N`d?QyjW8P9orK5`SJ?^C2~n6J2BlF^RlJKH zb)TcKQsbJSrHUXTF%NIoTz^dMu>){_+KfGKbL6iyR!*WJ#*frrKBYda5B1|GXeR%V zN{A6uSSCwSYS^n%82iWCP;R6j2}lC;0Q3Y@1*`$g0JH@R2gI`nG=rs3DVd9! z%XnHP%hN3E>uTjuo(3B=sEB+{yN#+e&IqI3avoq1?nlxD;~?$kW$CaSKvfL`;q7R* zMWsc!F@?q&zS4Mmgoj)RkLlsc1o^;Co}A zPMBCnl8>jFGKvPvcW9ohK;7ju)S(`A=O0jAgvH8!G@m`91?-M_Qs&V3IuA*6s%7U;HI#m2grLlJ{AN)vukiys#a=_1H45w+p=SsY@ zl{={$>eF9Vp`0v(mdSMLCL*b~r3FQcWuW;C^_D|ulBFfWlV~CK?N5`;>kEpT71oNS6LXnDp@+j?; z@6tTWyVOpm&^vMoHA7y}avd#~A5c$YE%paJqjho=tp)GxyfoR3xwHjomUwKY{vM4f z7UgX=W>F7*)Vyv?qCRpdMH`F2&&Sw{D9r=12E~c9lqeR^0!6plG(m0wG%_z)no}zaQK-?ICVDQRN}k)Xr*0b6GY*i$ z@S&|1NtHbsQ6S{8y`sxVqzZ;7wXu|?aN`o~GbYeVk6BoDI!z8s0OhjWMH+*)BEMxG zCFn3@H%bnsUf|;f5e>dypmLTV$|wCP0$*ILmz@BcsEl!+Hb{I~q{77aRMPki^*f3C zDC%b!PwOokQJ+7ksHGaxMF84Tyk$20c<`g2c^mhAE$G{p8+c}p`9kI6;YI5#WoU}* zYX0C+hSH3`(3iial90EY9=`OE#g|T4zM`eZ&$QCZpu-+tLNQ)926; z2WW|BMezFxEdk$#gQr8l=UTEfmG&r0RgCUrHICCtxq|u`!{863>G0=RDo}lS(!8wm zj-s{3OR6C6gZKMUCgcU}w_9eQ45gRq(FWBIy^}+th;zkhUx;L$~aFqR+ z%7`EuCiCEnMZBwb`y93p#~yFB;~Dyvcm?2F07}d$|qI{aNK4xQT+I<70^Lwda32fjy*fKzk8O6A-7KnFw&|u>awC34LklLRQOS+UYq4 zZF?7ZDC7g<5Bir?+9?$JD#&O~-Hmo=*L3K?�egE!tu*v~K}gWeK5h%eT<6pQ9}X zpua7m0meeK63uq{FLRa~t&-p_h{Go@P zuo`|G>e5_>QVto8ewr8Y{jj(7Z5l00KwsIYG2|yf9tZv1F_s*s79QM)pJ#-B6e@*z)tPCI#2a1@)mQbp8OooJ;0vcz7%WZ2aS(u z494OT@-2$>Xh;o)Cc~{|Ir+36f+64KU2K}59e6I(+(_5B9zu8XH*%IjSZKxlMb_ttZ@K9#G@18n@Xq^DjMqqaFs)IM^t)waro)a=VneL0@;18zDan z&}Qp^!}dXZtQ>~EJ)XMCZ>S$+iCJoq7rq5_%UA^^XWDjzEok(hUfQy;-owD(=gR>GX9wnjnNm#WT+o`3O`{j1vI zAAQ)s=(F>|uS2NETKJ#BPlcZW|4Z;QKYL_8g`WufG|wsA#bH~GXXj&dpT}g>Gf9t zx*p{&1tbDK1T6+&mcUq`$Ap74O^pw3(DW%9U#C-zEpAX_Ms`qqaYObxWWz0Lyi#>i zpvEy5=zjCSoM$O$uHlQ#O~!KbsZq%sWArhf^Ht_r9gjJl?jL9;C3}$J!%Ai?ug(3G zjHq@sFwawZL$|TA`<1N^`%K&Tu;caoMfGhpPr}?x={J;hL)rMc&%@U?zUq^hUnm(> zW$HZD+!N!2Dqq?0ne?f2NDul_U!hCc{JscRdI0n(+O&RBeO}q`iVoL&(=|6Nt@MFV zwm$j?`r!hlm-L(yw!TW^mgU!dw`?;tS66d*8|Ly#_rs=BGs&q#{)YKb`^<}DI)4YwZCJjEPJ@xuf28Z{Tu?eUr(boU{*UemTP=6(|3 zel9a_HD6OSxP3JjRl3AIhj9Ct6RCNN;=AI*-+auaz}tRm4yETgn7_O0`)1$#@Xg_G z(C4q!&;6Xce*f&7_hdaY?VsWm@6?zKe;e9*=Ii=1+db@W^WALsP9C{=<(_N*oBz7r zTm(Iexw(6e{!hMJw*KaCh7N~Z>p7>3|EkX@`E}2i)VO3A<^=_a6C&Dt0si^OlIGWL zc~bpCV0}dQ)j9LyhHo$KI|TYjQ3xT(|W&{xzVth$9S-LTAnwr$i8N0 z`NI4|$H9J6`d;f$t>0nO!@gIxex}}5a{#52UGoJ!_5j!8gtGIYztvnog}cWBgxNFm zS9Yu_12(+QSIrsJc=b7amA4v;RQ{NYXxm%Gt1y(IZE|J%E1O@<8`3bxg3YL(N4aV& zaDz_o>RXw81mm768~p-SBqmMXELmVG+qh}#a5gM%{D?tpSlzv4M@wwrSVeMoZc@=whRoj_vTlcDeA5tq_YZnoYdCQqy5h0((O& zIC};gEpUQKPV%Q*6hOHt59LLg{Pk~LvCbJ#}>hW*2 z$e!NgU(!qDt!j?dh0HSka|wZQl}5&~wsCQThq0#dgZlSnVevzT4raCbjEIe66%Y{1 zN^7^McJmDyGI}uc$GJy?n74MNa_OaZ|3u^ndNdRsWeij9t&xM{hSAlLBP&#-OK>aE z1-O;zGq_dg1GrV`UAWcgINa)VXyoW#Bgrvx^steXI7;QPP1nWAg(4a+s@(wX=F+ae zc5`Z1ood6tRpsKFcDPC&nTsd6HC|Y|7VUay*U+w~c9o=Y@*=FqK-QG+#PRJaLWGtv zNEq@A)UFR)buNnHzADR_xA2vTg0>XqqSq7tOtv8Nj^j&6Z?>E!5Wdr4E7(f5imhgA*xGC$gnq{G=c=uytP z6vQ)%cZ&1qdu$oQm-6h2`tBb&)JL7&c{8%GDY#?ki|#OvuUYcQWVwgWmAm9_oYIiQ zC(FIypgPweANZFCeYFTm%!`#94k`+%zKeU%z~gy;;(Rb4OrCrwA4gs|dt}n?uG$(jbR^$fGy%n1ru!2djKRKX-CUQ|tX|E__rrDo2CPv7oen7B!kS(1Sei{j=gX zD4UG-oXh88+}#DLa5j&tmGI>>PVJh_=WulflfXA>t@!k8u|@HvaZeu0V|Xtf$EWbA zd>Wt5XYiSP7E<|A2`WjURGF$$Q+ykR^>BRW7EYbBU0QW z6Enj&dV$3#X4sA5Mz9fLlt7GyU*zBL+dN$i5F;~FA{=>xGr}&4XO(WLMAa;0{BC3z zmyBJ;3S*uTWrQK8Jn|0YY@3`a`^he{kn|My#95IlCW|p*kch&m4MlO1gExPGvd-dk ziGKKUs}3*5{n=0K6PC(mvoUNC&ZUZAZCP=8N?+j2t+()97S19wCWxEZihH8tyJR5$+yXm9MA7hE7W6zr#(Gz2GMCGjMldEnB5T z-SGC$jWx@}nrC87HKyeO>!m0|G|(a0d9er~iC8T|?&3GNyW~8$d-&&YcSADrz^3pN z_jSzADn>liG0t3n0-h+Zs&KS;8JuiqL%+o{@)W|7MFkx{8Ez8)8163EmMZs8;O+)M zQt|dfS1-8B!&?p&7NWw0O{c}V4d@mAI4$7>?vtgG?>z#&5-AGl6soON-`M>c zCC5~Hyz^C*_JfB>lNdFuChY^-6yTDC^rK;#x z9M~m0!rjA9!gX`n(H;VaYrVZlbIMHwo*=D&Jag_h7v% zE8puXM1<*(?6R{EA&GK6+$8Y??k>DBP|qHKn^}9DN}*z|sTlD<$2fU-8F-?+qQY@* zR27VjC8?ORmGRaOw2AUMg&F~pxC8DktS+hiHJ9X%ke0;Ex+_h&N0icacejYD{~i&n zTf_={H#Cu3ijLzx8LPo6MW9ZhMt0q+@GebZuhVo?J%M+;it4^_lXw!`U9vaaJvb%=r#j4{)5=$3R8QI?xQdZV zu3wjU)uzc@_2xvpk5`oLfV)e^!QI1m!gbbD$^ISyDB$ST$W3wi_1u&sBx7!-a_a;) ziEn|s3$t96+g7;lTD+%1@TSQvf2xl6O5A3 zChnMi6i;=KpR$mZUFN>f!*m)of~tH3=tfN_nV3Emkm1e(3fsVKi%at7*dJiv6rNwO zl5khtB`)Kk4^%o;mP*@}KS%ss9qKBd{YZEDJl=-4=B;r4VM7eCCVR8U98=UKpo0 z7UfoM!}*QHaC&2L9?Wy#Ogw*{iwE%ByfZJsOY%~@w2>R9I+n#3*X4OdUYRcztwkHr zR_Wm?ox+8DgfGC1#5` zI7x0UPEwnXv(y%fMMe>^80X3@!?|k9#R{xzCNF->|;$2mMKZ!H#-H&*=rd#JP14WyW!`s6I0ar$tp@RdDiC4OWZQVGUVh z)|9nmZCDgviBq+*o#%zG>yPsj{3JicPxCYUUH%^Iw{uzNdu5&N#lPTR@e8o;zT=np zkNhWomH*6dV0M_!@9+$M54PVU{)GR||HPc}InMsVK~};L9ylA&2j_m}#F<}taMD)+ zQAiYqwP?favbYF=!diP&g~NK z;@r@);+!}y-WNZKE8?oSCVm#z#SL*&+!D9lr)P-_@vFEm9*W1}H|Z^XapqNFX_uvN z9^q=aMsAQ>U?<}=Q{AIIp%b6dQ+!c0kKJW=*e@)d-DbDgO?HD_XFva+`ZVVPrx&14 z^To~{t$H)JID0Yru(S7O?~QpfdofO4{fFNBrXE`XeYO&MZB<^4*WlGfOVR58+<*Uv z-Yeeuf9}8Q-_V0~pH^05+4?XqsC%)Y^q}tPO8@yn1ERk} z3+8&Ye`ogYtiJseW0p6@s_g4=fv`l1U~CJ~bI(#3t3oldl*I^D9wSploK{g8V_j8@ zb=9c`&ibf@F|Upu^VG>6Q!t`Ur+KuH7UNtyoK=Ujkq_Y1<3n_q(g2u}i zTl$W^Cw-C}T_&6xM_1_@{Y=;C2Hm7vbeq!Y7o6=@8E3gw$ElRHaV}*e)`Ydd`M<3( zql#oH>@YjdPT_>z6*x6-P1f0dd=uZyxA3ie8{f`%@SS`Y-<|!ezpQiq_?P@^%oM-n z-}4`Es^=AcjbGm~Z-{rsJ1l5Q9G5?MK!T;jV_zV6LC;M?Bg++J@Z=CCw184f> z#z}tpML`iLiU_L+!pt%l=lPYwd46R@c~MbR7F9)cQB%|wbwzyyG5e&Tt9WHpE}bIv&{YKG(T}jpX2udP8M~Y%ZC$2KNnx%j7^-zCobR= zzE@7*!|8kX!~^k2Jds|~2WQ?DlO<(2oOZWfZpIwBxbF8@_l2I`&S?%8qc~;b6~fcWRY4D~^*9Hw-7Y*BjV-#1N{c-3r<*sa+eAs&x~U ztMZ*%NadjJun?_7j4t^qJP-a6Cp#-!6gmpp-WzTWUDjEOfsb`Sb2-wJIUCSLVXywF zaQ&y&$SmZ8Ia&bDz)oe;*$g(5&BEM#4ohGQ*h2BU_ycFxaip-|Vo2))*B_U%^KyZ% z09+i>oJvxi`G@s2BHT2WE+SGBg18>fS(0<)X%fhWoa#HuEn4wwO$ z1wg$)4+*TsqkKZ?2%y}i4JCn>`CBVqdPS?8a~<+|i|m-)=s$gmx)XKY|EfImqBF=` zW8O4>apDNQ;|jfF{$~D~?MbyB`L{pwhC9f-?#8nDYHb+5JfIx&H{~iH*Aw$Tu!n%p z%-QA^9exaV*mtM@e9g1wZroos-*<)l2hV(Hp7;;Ze~U78?UDO^HwOIHE6LRAz5X*F zC@(9WGxW`_`Ph8)=9vFJ6m=Q*_tZ`*UJZA*jJv(uvF>p9{XhBU-PfOdHKpk~?e*2r z|3B{B<(l`v*^Jk7%;MqSM%@(94Ws&}L*7FpryOb*Mk2 z_#r}mMYzM&cOVd^z&JP8Uqhqdg`?Z*R5NxygByu zP}P%FYrxHtyZ}YN^AB|V%b$4*J^gaz{pr_x>ML=N z%?sv1^DFe08|GKe81q2(mNl&^)chg)Q<>=w{yp?xL(qc%%b)q7J9YM!Hq+gq*?sin zF;3diqxGM89&gDr|6U$PO|p$QW(^gk`UP~un9O7<>~8kE{|!fb=rZ8n&C18!`?B(K z^IP@5fBO4>)5(9YhjTo2_v?SRKirb=y6u3R{|DP4tAAu+|5W2QhI6cfC_7SFX3CCK z>x3M0s47^WtAVvaf2^l8!<*F>xC&u4r8QOvJ79&dFjfe=z|M=p6$Il6oV|G*?Uz$`D6=^GGNtJ0EW=d6QJ7!DOX$NLZHEAbi zO|@wkW=>6LH|F=9u=}Zp=tak|+BSgBV3lnsPBot_rsGue*&=~H!dlr<`V=#udpObj z0oL_D$4uxEeIcH}7XA{p_)EGdYhv~J5`L8+jtSXYj=)c@Opp^;fP7oN&G4n2oW%0r z_Zk+ly!fqxLo86H$uwq@N98dVBu~f_%r4K!_gHaxPM%{WSHW+rN$kyp!)W+0eYSj zz<$7Q^5n#N@UK(=>%#Z&CER^>p8~N`{1Ce!l$007%JE~In*AI54a=8G;ti}Zn*@1> z#9OiM+zPmocppf74E7}8eL7q<56cT#--K!VX1*CANgTTdF#q3=y$T2LMkohX1&?9n z`#AaoM}Ihn-3#Z@D?G3Q_!(B*Kj&ZI9sQU5OXTns{|YI;=6IWe6~PO@FY=4Pzv15i z|CWD?oWE0Vx3HS*=ddxPJAdyC(K zo6ghW-a&8VScABW^cg$@_^B}GZ#s&D7keLJ`A+qre$_QdLD1+wTK z?um8IO2Ab=_r!{472vA3dt&Xg8gSL;J+TT}1Gwt>o>&j91zh$2Tr8^`ymJ)EylrBdc?pA{9=4fs7dK|(>fmnNAkEfJ=$))v6 zF0Eg3LBBi!Rlng~wXdG{`{{YVpLi~wqZM9=7g!H?3GE}HeQJ`BweUVu%G!9tBxD`D z=ajOptP8vzc2`PSU)Bd6Cc}W^_)p*sWkcYNWFz2>Wn`WmDkIWHaE+@wK0n zEo2MeEwQ6q%2u)!@Yb?5@HVmy@V40HEAhKD?SQwJ*nuHCsyDVWT)o4^9)~V)BV+{J zDD3y;GFnE%?ScKjBhNN93qDRAByj{7=8_A81Uh8IPej21n`mAWx(VpISToY#_j_q$Ee*3axC^E zFgZ@`U%;>Ej0X-)jr0>Gc7MpX)LsUqwegz-@8Au-Md@zfljUUKQzhP*$?0-B+?jGF z+}UzA+yt2bcdnd^-2n6CJnXxeFXsbaAQu2%C>H`(d)0mQnoVB0U+%~L$^#Oo>&t`k zAUJkd;@vmKgQK9~m^=o0PGMhxMV`f1PF$W>Z@T3d@(VommHZ0s*Yazm{7!yH{_=bI zJ-$G~sDN*}f0RFh+RO4X6_h{8pHSizymu}ruga^yui@QvE{qx1u`}xi_DST2X3cRjfWdi9pU4ygiLLya8% zdgSo<+sIK=j~ss4Mh<~DKus||HNzEvF{B07-<0LzsYek{J&IWLD3VuOF}d}~;iE^6 zVtVB8ag7|xqRFc*8k-&=g7nznt49T2t?LVE{q6;e<|#D(GuSmnTv}e)HC|fFduzKU zhqh(>wSMPXzvsZJ?{?_o9eBI#)bbu$%lm09FQDa*<98lTK-U{u*L!MR@6_@>TFY~- z<%QPrTx)ruwLI5aUT7_!Lu>f}t>trQEgzt@d=9PU1GJXUp|yMftg36EM`?KjE6dk` zD_w73jrk^UrST1{Hs1!W^uB?0=U;#;?Qg&qdyHB<;ZHDD{RX>?X}gSRyNqeOjA^?J zKgt2U@1gCo0@^OisqM0y+Ahnf?XsM(%dmG#j}4w!x6T1vjS!w#!S)BPewoHoj}R6; zHdyqiV9|D5Zf(cq)^=QOt?T`@uJ_lv-e2o_53S`rw0`%{n%zU|bPuh~J+kR>Kdr}o zv>x~Suk<+H>p+LL7OgS1DQ)hp^|+VT-QGgkfL^cxCu7(56j*}!vuav{wb1&SYke)WzUEqA3$3rY*4IMoYp(UR(E6HdeJ!-U=2~A1*#SEooLbwEov`yk zYTa$f&e#hfwFWn2SJ@S~(&L82kIMm9+T4)cWH;bSryH_6_L@tr*$w>mTujV25YuQ@EdTI^JwT2b)kURu>V0R-{X;_Q0yP+}9;Ct6V{DRuM*oXSQd>^qN z$Pb`HKa?L*0cC^3g$=I0@xbvTlT8=e}`3?4x>*YzvCBUkGzu&QMh+=cU7CTjZfGg}j{E9!nDS_31g`)z1_jFBxdLblT*WG9S@5f~G@VJz&4(GTx% z)VPOHPrZeDgq<2wN7<2<; z&n>kx7GupF{v_M`Oy_Qefb73c=GwL3e4DA>VAjighxw+r?O8ixobS_zX6|uu?rvEM zeXQS2LjUTwZjg)@(6F_kP3uE*HiDLH294MX+OHinT}NoRaA>wJ&}b3RWKqyy(a>By zps`|LiS~g7+7FiJ09c%ZU}+A4g*gnCq~O*X)OOIizOr&?E0*-u?kJ z#>dbUUqCB-4V~~Uw7_N9U{_#!U4z9X*nRUi_Q1R+)?!tO0mGg&xQE$qSOHb{4{`M9 zujWmp!+s|7F24W$Mf5dqily+^nh(V~;Onsps8Zc?<@OM%9wF5|q3}Z21Xi@WnOA5Hxck8T6gH&=>yJKwD6FS; zHy^X<<|8%(FbgmnFvonvKQ|xqFU-fH9^f~??`VfV08h49HUkf1I_|aXY3>ak08(f;6@&5fcZr;te7_kv;edMbOywNQ_JA5Hq-I8+aDbB z$J=gyyzTbK+iri9(ovgq)CTr8;0_=I za1ZbR@W@OTzhhnF55QB{1s%k>h27c+nQn z4$vOZ0nic93D6JFA20v_85NLG0T~4qmqEp4P;nVlTm}^vLB$PFaYJ51+3Gu%9H`w! z@TZ%(4O%4}9D4}vKLmds>$W(9yIZ(Rr=956`OvTPQ80F&lzaIn=F`~qYyJf3y$k8R z3+es;*n97QD6ahvbk5A|E_JC3f&v1{Djk+ymkt6-u?q+&U;)9kw}`z)jlH+Dt0@{g zdSgk9i5fLAxkil|6HB}j6H_#ava|1V&h8=_bFcUJ{(KKTJF~Mhr+m-1f4}EUGqATA z^xX^$ZU!80bNw(+z#y&^WjU^`L0N}uy9~X-A6&p6T)-b(z#m+Anc*m(kFwU#n_q{r z9%Tc{MwCq`n^E=}u3^>fHGV(J0hA_`gD8hk{t2vkALTI05tO4SCr~~{`2^)8$|;o7 z*a_rQlrt!2QOi|O^h}47UOV?$1wrNL>!ZFOvX`*V+xL` zIHuv4j$;OnnK)+Qn2lo&j@@v~#W4@Zd>p&u*aOD`9E%Li;0n#)3eDgO&EN{n;0n#) z3eDgO&EVC|;ML9G)y?45&EVC|;ML9G)y?1-&EOc#;26!|+s)t_&EiXjyJ92Cbd;GW zvr*=v%tu)WZn6kv3CbR{k2wdIX$HS<7LVZf<2YhoVZ}H@Zry?0x8y*`-hyNC%HxJ4wkWiGh;TQd2ZehdhsMSk31q8k8Rsid|Ul&eSh=aj^?eHqseXWX`2bq_PY5T z9O$V(6V%5)b4HRdp3VF9;Jy=r6JvrYe+)QmV#b0 zXbd`hrkn3GBpJ^PM^PX0V9XI~%<4tdhYW+Ze=L@cD7!IkjyOC0L#!P!c;qF}@1|e; zke{*v87%JstLzOsaDAEK0CHN8(Sq|Gn5*~b8T~`N{2rqyj?O*4-vTBW$3efdYxH{C zzyE{>ZSOOlKMR$=-tjlB+<4BNt(kSt`|ZW9{TKS6kx6PDuWZfxeIHr=A3w2kVcjAN z{HeUx|CaT72)Xaakn_%LMe!C^JKRNv^snL{;3J48;opDG)^tMt@or$y9{iJI{lOt{ zocHn120c55zS4JKi`;4alf6)0Cyh2bU=(uB7WwQ7c%zM(o?C=1h*iCg-NY2g=6wSm z#BBoBD`7qP+t4F-3A=E8udo-__X+zDFFPO{z1%r!%`xyUJ%w>6gM8=xxQD{X2pt83ixq>51%rqcaN!oBEOh)bHhxPsav5;rSNQb)0EW<&6tTkjbPsc5{204E@2UN)QLgeU4cc^lk+6Y-I(Kk2#`F?(G@Nqvh^(=1IJErXUQx; zkLG%B16J&4lihs)7{IWcj)*%2nLP_SwFP$o*?5N`y*~h-ISy;?l;Jw}Dfs3YaJRFN z?zT)jH*4hwp!pw0{?QS`OWcQs8t#N)4)?L43QujsQ^WAoC_GgLE#NTd_&D^`k5Lbg zRzAW#bj{lVA0 zIqdhqAxE1-OWy<2=SjGTOh7TCbqiA6P$5pm(3(+DVjC&{IxBdjM9T zh0AE+GFrHd7A~WO%jnyEwDJ>Lxq?=HLMuO^mCI=5GFrI|uM@?zc4*@Y+W6Kqr?=p6^!X6;_z~JUfiZr9drqR9g64DDa20Ky#r%GW`TZL0ToDezoAN#^vm=HJXzyFJ zcNLJofHslwfT!+(&r-~zGx;&bfLX-#xeM@~cS;J^)H+GJK9-D3yHz8rh5A_G9G7(Nk#3z=LP+vEl0U zN4Uw-6#P-h`laIIBB$4#ee6qu$vOqz%v6jCL=FQvCojn`m@J8TSr^cRy}Os}fx@AQvm&l>I< zvrr{iGx)|V92^06^Pk}p!#n14dPHXB`&5TE{z-K>IvVETIx@lW`8vuWdWYc^b_c^K zc9qT}FpFLN+Yf3X{q2wR`5R2H1pfT>?@31KUw@-l3=_az{)1*~xb!#A-D6MRZvC~@ z`ZQkwzrr6lz;pfpQ&Z+YZJVw}fp^S=&w#xn)~!G?(6uSgvjz?08Ld2H@fe$VetS=M z0UQZ4hQe$dhU2C=Hs1%^7Pc<_J=F$g@VPKHKlP%==VhNi|5?~h8z>i|s{jnU4St3u zaQb%0`ag~D%#t#^ggYivXhB}W-eM~OuwKAiAH|IS%OCQ@skLXM056IKPjbTH&BPz& zLFSxhXT18jcA3Cz|#wc-qvdt(h3Y<{{DW3$Poe5$iX z?0sHFyU`AO>gH#@{J(kDSQWGo{5?&36nFm5f42YUC;#R_M)xLp^S{&4-&ESVGVRIl zG_(EhN8Tzg|HADJcb??)8*4L31UpJ~)Tdy&$~+#_g8F@lKU6;6?d7t!dA>ayN8E@z3C1eg zP^_bkXkAI0hL!O>NpGy8Eg_|(>?s@F&A2myKv$fe7CsfuJhR@^xZ3ox@U?ISYfb4& z^rzRM-!nya4I&X`q8w{bZLkVeDcXtlqJ!utI*HDrN^}w3u%A)~(L?kUy+l7zEe41| zSf?5yhKgZgxTq1AiYvucSh2nyD_1vR?W!P?VRbWIlero2kV7}3S#Tod9I%#X{_G4*bbh)E0Rgln=+QXACzk#r%7S5Yj)1=mS1gDH zV#&4yS>D8zxZ=DWX~#KXR@|^>v^#N!-=RHekLw=91D~G66E%1dFPwW5Z+!X?A6y9_ z$O6X~Vlyys#=d78%kB$SNl!i6r4Og&M`@DMJnAqylwT zlWIIQj0{67!^v=5sVDWg(m)ze4_zB;%`*B#mhEl9^7=%U?QOxby)Bqm!411{C}FeN z@yJoejvUzi3j27taCX?2Lyb>=y#K@oJ97l!*FZjy?t_b6Ti51bGL)*V7VCOJk=hyhWB3wbX*j3>wd=b}#Yn%$$ik*G1cfxge7`_$0 zMb`3n!gn~oA>6?An*wt4u%E&$_-k$p$i~LL3g6?`JHj1gmmz-z=l6trCSQ!mGRH(r zICe-8@ef%>GEv640_)}QODIGIu3O+e1_G$S8lN^IRy2beY>}s=6qUHgUc{!$(tjGt2sBrEgy5QVhbjMSm6?}T)Z547*j2G_r#V)0IE2xOw96&*Ss9!Cr z;r-|$cENd95$jGwe-S&kfwls0Js3OQD?wo)Xg5?0#d(+*hVyVS9OoKQgYyV60$Hb# zVkCU|(PA`ej)Av>2Q@B*M`^jZ9KWs*SK!xG;wt>QMcjh1(ACY>pidd+4jQ$=9^!T~ z%q(^xu;*;B6M+MEj&_zgbfQjyW54LBaP-YFg0at8df!E_+P3I1n=PfB}sjOy$e)wwXLdF3I@Rd z44`-{XBh0lFqmgp>%_2D#jw_uVJ*+FR$y2wGOV>@SnJ5J){bGVBlx=;>ZG_UXSge8 zxGON+6&b!dFnqOV*l7>$9|Z3V#ZfDUqddb=D~6*y!%-`SqefhGhICkod_IbW){qiw zvDf%|aXoyW8^jH`z6ozzkTL9Zh2#)81;b7^hMn#VFWZB^Ia3Rpo26Q=ny#9x8mk(j zN>$l7ZFHRLkZM2LzQ`^{*-hEbcBOrheUWV*{j@K#skFXnUu1pII>G9M)oYfIEZ18u zwfN2AM~mYY-4rnjANfsre*p2C!bdh&RwlC%FUYQ;XA_a1RB3Ts;UiqMT&iqm@gw!3 z3R~@q=sB%dSZ=x=E?i{a@vCi~xE?DkjNg|d7t4a5sBFjg<-3a$6+Wo%N4~q|Qn3=> zQ9FJO6f4a&itG8lwD8^OKi1-Mnx(1mL9LbM!cV0A;eD(R0@JpF(-LkLU}D21oAcBl zMXA*R! zzUd3_O<#a-`U3pW7vP7!06+8vL>y=g{729X^|{eri=Y*E!*3)4Jxxa`#P>2B8{oIS ziDwxk#c;mKpBowhieU3b@mM+7&lOELr!9MU}%?ApR|F5wGlg@c%UFE zg}J`gtj&dWe&<*lzE7;(wL4CxnOpP>p0@*j}n0MK>QYr z^9aK|;Tmw^8)Tb(3v9Z9<4weTZsT|l$A|Fs|BmtqF&=`fGuYAKLOa2Qc0&8M7M(Fp z7Zi7V?||ZgqBdLrN7@OFbOD;OH8f;vXvWsyPCI4jkIcq!PloiZb=eT?`%MtmP5y^oPJV+76Uc{6%_AH8lyukWMB_tC@q=-qwv zj`rw2dUPK>x{n?;qeu7Aqh|EtK6=oM9yFtMx;vj7bZZTIoQX0EWe&<*lzGT6hC~9D zJph%_EN04w)X<f>g|k4!Cv?dK{8qfl(*X&mTEgjLVqM`w+Bjq~ZtQ36vL6E=kAxm;7GP<^#|s9ru&G zUcd)*`T%tL0Cf5Qbou~!`#*r=P~4_m=RVGVM)?(m?m6@T##4heA2n_c)>-B9CI` zV7@r-j1rBq3hi2O=P}C{fKjn18Ne3g-HEoyPhN`S7Vt|C-0>avC3J@iI9`Hp>N3ue z*~ndkSN|T4zu-v`MFUwG3#v@RF$2c}91C&16vt|mdK6@-37;W{{R*OMG&|BB#Si+HTrb73 z8l{%CdJC=IHMM#Jt=_`;Xv??I@~vlDzJW2{1lHe1wCW+UU@2zA8O}m7eh;bm4W!}c zkcK~?=FNcj=QO4a53Dr`auqq`8a?%-8`C7tk}N#^H#Gpys0MfxbafPTbrf`U6n$+5 zjvWPU9R+P26=jA`(9dQ-s2LDy79;T1&*f+<4mPbFAaxr$v=WfIjUDftVB0F;eNw`@ zwMLI_p-0&54p@E<-lku0|3f_aCnDW+Y$EhDYQegJLu)uCpm`hP{T`kjB_MhmJ--F& zyMx}}0tMay3~s`&qXaj%gYQNOc-#g&Zo_M%1WaxNCf~zfqXd_?gWYKjkBJ@T`Zhcy zN{r$*Ab%Sk2_-xccJM&h!P}sOMP@C+LWY;X8Y8;}s=WgoxQ!9s0_EPpjNC@gFChbg z9-E=J-9v=wCrEZZW`savQ$U~VfLRKLYz+nG#o)UZr5m)md}y!(4evvD`Ubj_1J{vj z56V9eUiBp?^#YEU0h_BhM^-*y^)q1c8!+oPz~X0M)^C8r&%mq)z^dOc-d`}@Uw~Z? zFy3E)(+@CPzhaC(1G9d^7=ObU?*qGj19ts}S^E{U_A6$MQq<2F%Y8`eOBe?-1Thm| z0oq@opI-p77tmL#aehxKA)f{_$$Wun(D9gOd-VA__XXzdOPpW8In}DJ;{03CBs5yg zuM?BRe`4fR`dVPVm}d}2H(YCvqX&u?iZ@CuuBG6Zfn%28Tg>xy%=5SC&2{wTTgu)jF*8!hzxJ-u86q_$&rmlfgev464ET*#MN6gss zWy@zwwwUD##cs1qaRW@LT+yJ%5tvOXUyPW3wpGTY;kR`BX2g1m@pt|rZ%T1b8Sbe> zeS;y7j2I6O3v%ljmP0O><^KZ~on-KOKCHg^ zTUc3QT<(x49l^QHvN+ao7w?*2`OZPg8)68$!9r^ zN)+?hLvS93qCqi^`!JK&g{|E6JmSq4P%eXh+HzVWMx6W`j5znVqokbKh!OXJ5hslp zAyMhy79*&2aFS%;dRq?u?5Im2d&_X&GqT}tIr|mq0eHpzRtViPoNR?qTdq$v8Or%> z4etPJ?gML10&A!xa3b8_yu)R16nc!?nILH7c2DFLh2rz^8Gy&&HC{)IzD8bOEL-2bj zj$t^45cY;bCKX!bWzkTU@bi-Tpw!E)kY zIdQO@@B`}Rpc`o&+SB^_jc}Hp- zd>y<|oN!KvFL6LvN3hh7Z6C!Sk8;(I<>%{n#PNmfL$)(2>QRAgCo8j~qGTnH7Rp8& z3oOwspyNL%ouH(opn#+#)HhFb;yZynWazQYZtM<^*G6d0grg#r;Oh|S%|3~X zXFXZxSuf}2O6y3;EA(^Fe0XTbftJ2#Gh$z@12#dnRVq0vOH1qmrQ;lF&m3YyG_-^} zXhOp(FnDhi;vI<%og%#vf^NS=x{0WChs8Lod<-=>_c3fw6ATK3Sqt>@=L4!E456Z< zd_9A2A{V9YAS&YGECTF(Mdy&nNO_2@j__Vy&hGAlvkc9i(S$f?9AjhIiKA;QJ;%7{ zJfM&WKRuJT{&|FDUF&u z^ z;YXPv`v`W2FV-()ak(5In-iWZi%;(w?yBr4lj*{3DP+Tshua?y*Erx`XjrjvFfJ}C zWAFX^qUmU8?;|5ITsl1R0{1u(H5r;cyQFMZcGj%Y(mB}+e6ypXGJSlrq9U_=`M4)P z&CX#zW#Ol2{B%YiJB0sO%*`*JTU0c+IKPk{Hs%KxX*5N_`60y`O>s*>_ab_=$avV? zJs4MuLb`|0dxZSQi&8}$xG_i975^$b&ZTmlw9dYcoShGmyN8AbI5@;R=@jk)1|J>} z9ugiN;^0bWH5y}#U?XG`2qH7jEx>`fRLGQ=QwoeEI;Xh6{)0mNC`8EG%gGrfJ>v?x z1a=SS-@B$iojW#dz-uFhuj?7!FJAg4JJl&^YI)C1FXg`B*CWx=$};!s5wCoq>#;!S zvM!)cUVKGVzsR8SA#p7y_}BU_o8L{hp{Un_;>`Mzz^@|9ySZeI&B|Z=@q|G;S0!1t zvn(tec5r;j!ijX8Lqs1=V)SS6o&efAfoTNIIYmmUX++MAkd}~vQ*qyH$)8^-9}bRU z@t)j?3jwKPx*Z*(qFt3dj|q1K+A93~cz$-}>VIUZ<0n)lB~?v`S7-fWbtRu^b(vHU zs~zfzK^t#NpB+{nmd@RIHMBv!gIL0BMEz>iA8o4NNzUp|Q91LxT%k_l9f3YP$R*mv z#nF+EHa0N6Iw^JNxJY9I1y)~68>QcOs2hT|z@>u*i01B=z>(@5(nDzz>hHi;3cY3T z!N1_FwX!7&QB>NWxD zA1BWJSJsoh($@Ml1EilzNk<7v9gnwuEJc36KE@U7_bTwT9Iu!=p|L+f*&*e)pms5$ z#hou`5^Udnmu!1{MeIb!>>*u6L>;*pfT_rFA!kCvs(>qW#F2=#mlO}}80dOs#emc4 zIe`NzqE^#q{P6x3CBGl7VZP7~$d0zbE2Vb){?)5-#}aVpv4~Gvb5^?rOEBjEaMN*Z zSn`tin3#B4Dnn9ILV~qgTCbB~h5(L-dM$S$w8e!aqhesO;Go~Ki7%J!{PPQW5Jo!+ zbYTs;aK#=U;Qs+~FGrb9ZsY}&2Vl~a;A6o8rYs4J%To{>B7y~9AE3X3v!_7uU$oYa z-!`o0qLBsuDdV>d?X|d}hyOv>z_dP5snr>sT!T~lM5b3~_y~E|c0XL1=3lzcMzudrcnPm*p`uB^-Sj;P1(h1^@1j7zcu(3NcVV*edtL-`Xr z1nA2mJM)*Cqy@Z#?7*XJ@$B$9xo-aPp#iyrVx)|D(BV3 zW{=6to!w{BrxPl6jM2`R{!*gUkK)ZhtiFDP);nNjslV2xeFsl3Ac2R6qb4MztF6w~ z))J6KPr^eepP}LlG6dsc?DTov@%2?jGErij0m^1f%$MFx=FF^^njF*6yZgAbriRnA zXPy|L8NPko#*GX14GqYc@{d6S-)zioN}JNF*QB((1!aOJWn@-QU0h;AUj4q>(*1Lz zr_^S2sZSj-Bc*!dh?uCFRTX)<va_Tb+zI)@gzZ>Qp;)zFEI3u*;-^s?8%4 zVs-C~>APW4zST~f>@nR_t1~U$Q7d6BN z_nw%SF}815IK`b==$VIX7xrmrueDVMw)61`6gW{AXrctFa8PQ{s2Jv}m=JI63MFw+ z0+sTNSU~O(j!~V2WPAH%Cq`?63#y{EgEKmHOdXV-KdYC#Qkg$$aY5b2>ae&`|EM3k zb97wN8e%WowPE0<6V(?gR#ZoX^_!TJGGSnJ?u0DTu&FAfXx>}>Yxc~|tJr#?y3bnD z<^Cet=kAyzKR{DXDVN|4DWXnbRKoP46cxzbJ5u!1zy2ZLzvTzn&LNKu$l8JHh}=kw z)(*I84?komr{%o0_O^+Mor6>=F)~P|^APEIs1$sCNtn~-Pinga^%xwZtzuJ|mQ>eEBlb-hy}qP&V^vt3Zd?77o%InOg=>G` z>$I-_>LXR(3|c!RN>eg9CuL$~Y*??__~I=yd-PoRPQM}h7IYu{kMs3uBme#7n-zHf z$Sm;qTI>qs$2lTErQjS54~0><-W3w1C$6h;<(e=GGi`4}sWTh<0Z$aM_a)-LE?OHR zD3B>?A;SyAAVNYIUnwRk64{w3BFV@6?v}GcxW0R%5YBIw?i?qRMtv#WRE{Rl7_g@; z-VJgYIa<`pQ0|p090lHIlt2`1I*>vli3;guDk5~(#|ph$w(}=n#>aS~IwH*;bBVN9jh1H79^}G1Fb?z8 z5qmmf#t7-u$=_P%;9%>*)H2F29W+$<`X=$wPk6TiUyZDrsa|cZ6yEC~@y<&Q&JPV6 zm>D-jOPct)gQKSIF3Ztrqy0XJiHwSgiHeLNYs)89r-T$&q-P9{^6!&5a%=CBx5o5e zH8VleC`~XMHRxfT6fZv~TZPPqA)H$4=qQtA6*=iD1`oC_E%oq7=o$*aO*uq}1|MMY zj8_0{pzudKX#}|mx1yNf#AH#q2~Epr67bPcj!{vaDF1MjnQwJ9J)IFq5oV~U54O{$$7)61hw z8(p3sL4Gf3T3Z@cGQEKJ*I&pf33E)YU)FQQqYq|h<}W#=U%K+~NAt4$JL!S~i#OdM zK5xH9e13SVr02$)(se`Th&;7N(%{VOqRBl2l1FbDG<4U(N=!JYXPt zf^$*qvE{mIokfMh%GMTvYa1KhLf|10EQ}n8%E{PRK-CCSuryeDYM8rJNjffhe#8e% zC9g;x!c;!!o0hY(kVl_0J1=C~qXNGD#*Gxewt?l%MQd*0b!f}2s~@jc6F)ztjUBJ+ z(7wHrwM8e6wuZKhhMl+Cn_4=Q(RoAZ@MioSs3v$vKlvfr+hpAFJEg@u=}|m6BXLT( zbn_Uw+OTtyw&c~i?owVI$tsza9yhsCy2q38U575sA9s=8wk~F9PC-@3qaS5Eg9a|G z?*B@Ot-i|!?U;hXvD$SpmR0M^<{qk`e4g$nbP@ES2BlfKsq6(^Cr)Mw_UL$>mywvrd4NVyRX03F9WG@EVl)Uq+wCUJ} z8TU5zoATkD%#8zg>o@kLw^+W{X~ZOo-MG;{VhuHf-1+bD|oFe$j>vybeeru=+z<>&NI|&+%5)*1$!E zifnYi4nP~Dpmnk`?3m!IAI<-yH+dKIBR6j37o$ePO~kBS#jFKTSOxg|%ZxL}CN3QF z2HDDLqjLv4i>VAu75&TXMLS>wSKE(0_v)-~UhS8^^76}Xemm9im~H-|hN|V|A@L*D zl-4w6J4rkEf9adM49Y3pO~|VwUnSh`(ov_T^_bqH+x+8`M!j@mLAGz03&~theU*SG z-lk@TCwhoo90h8g@j3=f=3_C^9L7pvNohbVZ~4LVm^kpLkVGM8_A6nu`+=zAS$#Hp z;XWb^BTT>=f6O$ExR352_9^ajqB@Jcjg7)mN8LKWo)GFJ0s@&83A45<*c=^uZuacY z#vV(b&^>QLCfOz`9{;)IL`}&*7nh8UNttFEjf}NU`AG+@yEy}f;iTqRfjso%L&Kj!uJM`hycN8xuX6&T)nx@Rrfzi18tx zUTdkAcW@ANa>{Th6`&3n@>Fy^g|&`Ct14yIrDRq`YrmQ`?W+yFd#(R!+N`hE6(5PL zURK)wm718Cp|A8WTUr$%$V*<6TK1Kf?<3-CC9e@-e|h6Y|6wfw#YxobONo3Z@g()cuM@s)V#=E_K zbL`lgeEf#&#-iMrg_H&BPEK2Rq6RZH!eAwnWAp;#MF_PdJF2J}5+GOsRXOM)fB`kZ ztMDF9@`8{MmJ{7{X*v1#QvkWRmI7pHS-)3?03b{IQGmQDeLbFd^IyL(SWI|%3UFx$ zx$3KRZ0~L<*w|QGa~<5;S?I6~fua|>O&N|+HS}f%FGQK*X&L7YicoDM-@SHYT;Zyr zQBpildJmbOnfh|Q^vyA$SlW1!)W~)Y{&3OY*GF|wZOECN+w-N2M?dn*iGCdPQocXDdxjC~ES-5P&1b5UK*>=bqJwEmO#jET*jeP~qGhnof-jjNiL zS+y{SG-!Hg<2*a|sGixY*OK80Q%>rNrsgCKOApS~M!EU+9zLg6za`btP)F)=hi7`n z3;`Tr2#Tu!M=E1L-&k2g*`bk@nRGhJJirh64-fN8AFnfV3dZdhV&vQLtcuxU4svVo zQARP)Trk=vY>jd_lNm4EJ-YPgmzV#v;7HF^gR55c1U0K)yIfay`8DBEi(0olJ9{~N zmps*$L_O-J+7d9IlUoo=NIi?TbX~=G266Qz{8aMJ=lWYKq%CAyyoDNb1K0SmRj8$1k*FybkU_NJ-XBs_ZasVDc&vU-&_M zskSk(#ZNfcGJ2e_V0=+a3B-Un=h2lVvW^T>mPw!R*78PbZS8?;5tdts$+@5cUsicL z@YY9;NOR?ltN$FoT5$kl>Iz?8tg$b)N?l>9FKV!K9_hJE3u4Qmx9t%szb03e2{ivL}>S*I9+LU!IlYtz?+;C0Lu3PaNTWYVVV0>1mC=M; zc+h^kup*{*eO1l6VaYZ&$-~xGS8p61bI>!jwnt`7c96Aw&ZGgk!_zv-xku;E^M0za zCzlsx%|1M?Wc=YdSv{7XennNdxGp)guqOKF%xOi5wak-L3aH-$)YVkp!6)ae(22ka z=4>|lD}mz>tkM6`(%_R)8?t3PW#f*Hnz(mdeEhh*6Q>@o!-bUkZoNjOd2~n{H8W~p zV)^S;y;pu%=U?~X%HB2aHKf(1q>Y3rnKi#5jp7IOqyF3mVz%C*=wc0+DRhJZj0q9+ z2lVHQ@Dnw8ATv}ZgV}8IL`O1Xn-`)FZr8U<9J!&Qdd=`88|&oZYikB>7@2s`JF_;o zWL&nhO;A=%W**$m=Vif&X`|NJ@CaRq7pBFpjKCCcCV({dF@-?RkJjI&Cvvy~Ga0lt}Vt-{yA==C=G z6(Dq7(Jiv2sn_Na9aFrlRFUrSy#jCR_la+h*-~s_u}UU}^iS0-T7=oDka`KfVBAjd z;U+M>f%#FeTy?k6lqhZl?h4GM=vmu zJohqC?mFX3?o?tp@Ge#goxtgM1}>An3u)nG_VD7I@fw)3nN2U}6Yc0Nl?9ChLJx+Q zXJ(8n3fc|OA5gYpxIUi$xK~X`T=fhp%PF}HW!j}3XTM)zE67bvouwc{ z;6Z zS(b9Z%!&amJRrH%(6Z#!(MCHf>!8p>>LUGN_dY3zm0l_*Ek-~|agsv%@uTm^;KwI; z@pU~`Wpgl4NpIGQ&ED3jFhU0qF+NB8umGCO1mL0wX%P+|wnJA(KG?^{)fzwrC7Pzt zfqHcCVXQzS{BM7 z4LulJla*Me^%e%TY!Krl8{>qREUDIq@SpUmrW5Yr*YCB)+~9%BtM#$`$pKYe{d4e! zAtTP2`t7E*b>#ojV`gQcH_z)W&~As%U-3>TZh7ZPH~xZmbf4--v`K9``~4jptaWNR zlcY@eJYi2J0fa!mQQ&}c8M#$SdgH#4y8YAA)2Hp#>Gn-aZ}Q6@ou4-o~{SQnircd9eYxs*$l=PXCa?xSh&w*08aR2Fk8s(6v)S;`bvOC2gTJs{TEy{j- zOMP6!rt0c-by^#1ZQX{N!JF#i(a*Zvl1bg1twXvE&CDB-C%pGO76yP8 z%z!K!QXHnOhpx)-iC@k7=>qv}E4Q?iL4db%7L$(3pxN2vNmeL~dJ>Rn5B5TE?dqO|7)MKCmBKb^DiZvV=>%w8rSCTb_kp!Y9OSxof&k49);2YC zO&W?*w27o5spUs87UT9k_?~-y(BQ43d(0@)GYuuK54pXEPqVl@~(uv7m5PPq&($#iHg^BOAj6=hd_u*rPFopGE#W`WY z4yr@>V@#*fkEg6aIjlhQpjdp|39X?ym?4B%-MfED+DPgyJbLsMsg*W?;f*Ep^>_7G z$VzD#-wB&*09GziuCNpJI>93XdT@17!3Jz6*V$P>MPbmQ6;q3u83SsyQ9X&qkl@mJ zp9Gzdy4!{24e#cXt|S(|isZhDE=!18=qyRsAe?&a)ups&q>Z?e2;TW=;`){YAEx6) zbHFpC0SLR%u2DL6(AqMaQBG|}ITrRMQ$h4xgHhPsa*uzgj~K~+Jhn*hv3m7t!K)=_ zuCQwjOe$Du(o%ktJP~rfGsa`(VQFR6p@X9nfh8D@=m@+z%%g&E65Q-?%teON{H&-b zCk4|y;aG{J4j-DX#E(~x>tDEI(a1q5>vpK;u1g6l&0VykP(47NsOx-W(z(H}mQQS~ zJ2!Oyfx^#zQ9Z6B5&*X1I;vii*GuIU}F$yRR(={+v1@4TGo;;@MP0Kt;v&#j5iOb>|&>z~YbpFKFb zTS|CXR5@U=Kw3qr={pJGnXuVS6xd%XB;@K@nxUj|LeLz>D29s|-v(uyyh>@6sCwKi z%MVOS3Jip@Cm@5*8rdb-{RDo45U#V#G-z~R|2M->sFngBX z9XSSanT&Y3$aFTgwmi=gdRWZJ?7gj^*_*UDxzLOXJ`(DxF*(QV9TbN={#~?`mg&n8 z$UfjgVq)I)?dsq%`sCCZ2P%)I)h4A5Pu@VzZykLa$LOBb z!6{Rqnh!PHk_`}l26n2U3x!~<&Cm|uhGng*9OkIxqaZk?Kfx5y7^Xl`CB*nm(R{S`}o7NbI(ZI@gW4BoqHBs z+YWH;#Apy}qnVu-q;;_dN-G>yDhnm`P&qi;=qw#tGj%9h0Mut`W~Wi|(dboYDlZ6u z-+}tWghMUw3u!IylOd%3Us8`r(tR@0W1^JL*Bs@)oiatbI(hOH!KLMGp=ZmDXrZUR zcgrKZ6`ALjgCpdddSknX&=3-reDxZ2??8QXpu8c!si{f&1@GE>LvO`D#4`gV2XO-O zQA4m#fTz}}I2ns>14<*qLhZb5dZc?eJ1`-tVQd=Q3_;lc>MbT<7=Q5|N1Ft0o9FpM z#iNsai{@9?y)`Zg89@zeu8eKUnmcId>=duOc|#lC9iN;$=5^^_aczWG_XWf2-yVy* zcGs65T-C2BVbHkv@;O;P1#_$F-W{(^XxLagHlidhBr87J%QIu(tkU8+6KYJv$W6LvarCxZ%p8O)M-6p$`e8| zVxv4gv&!d`b{p3-7%-|5f98LZoq%+7rJh1dOFI=<7WgN4N1N=w&TY=C%r+d^VKy6O z15D;ySF;%>aCnD$ZlmWGsc#Y z2M%Ino_bR*aOb}e3sRpn;i8Kp4UUMuKArq{;q21gYlf}s)ob0bHG7s0EJ^Jj9Na&( zq)bm3eSGz5Ny)2UF?ty_;y5A8mJ#ydh!G!ROOR!f;rIx?_l6CX?~WQZ>fOrq8wT}G z9g{mcZ%k_M?`Pe9yS)7E+p}ihd~4vqw{FhfMC?zG9C=#0ziE?n|BOy|hS*d2xs2Z< z*aO>K@orG}yT^F(ii_>KM0%j_E;@YP!`#A>*ftTPaityF@>-^iMtZBU_3! zIeLs?Dj6>v7Q7k%cie|01f=6pH!Dq6Fy*DnC=nlSIVL1G3Eo?`=1NHWd zW41j(ZE5gpM{3)9wQtu^1P3N|U2Sb6Ba3b9>~e~objdkv@@Z-xn|ms%r;Hb`NuJ-QbE)h8y z-ChJ?2)9_~Dg?`Dp2|}>xk9kMLiRemU_3WNILc1}M%h94x7+JvbJR^_sPrkceh{Zo zXqf`p(F}0_6cEjf%+VPcy7Y8iM#kvO$@x)HIXO{L`QkC;bJPE0GBU9>=PK@qOn z_}r|G*|W>~H>QX5j^81)Z>%MD{;82Jp-FkM*<-WfhUP?;lteBCbdvcc{5rJe2L2ML zwdv4~Z~xaxcjAaR&L`V<6Nwrbb_O<1J%oEsGJcdoYd-QlXB^&7L4)C zB%Vh0g~+Bi9lbNU_MTmqH?CKQ&ZB)23xfNN(z@$24jiEVzDIgqcXUyAb+2^dT{At? zJ$mq5e)%Im+Rt=!A_Y>{hEwjNp)=#c81xm+T?QhGh2BES!9JU=j2wAo6LH`zCVV+( z_Ls={!`tlnS~5Y%fouVWTY`-|EmM}q88gb`_)9FMJQ?zt8!hFN?TGL=aSr>ets0$p z4kk@_IK!Y(8ig^1+E>k_HM{_;XW7IJROgM%RRre-`DDdJTIKZ#$&dCN?i}dr?dOE> z6gP=qM}EQ>x65FOo_G51aXZvD5IzY}~zDP=n^^8W#I~bkT~S zQ_^@m6;dyW85HJb(u*i?f<|Nff} z6egC(#FQr%CeQ=6PU32~HNpg8FSznDDDVoh3_C;8>(~WEwp8|xs3zA9m-*G4nk>i8 zmljCrh~qD?I|^{eNp{C_tb?||9oG&2!kr58jo``dXuusm$_}B9<>LmY1@#=9kuz8mI3Q!> z>wWugA3tE-tT^BNzGQ^iG{O2+?60Eei+L2$dmGLk`vJ7$!nEx;S62&#vy%e8)-EBgakGyUA|qeccleCJH|K z`TWQFPO|ooZUINickGbnu3jx`N5jnerK?x-BM@{1jYs;DDAspXS%X;a^FT9KkRNBKR0?h)1QX2_pk1cfjkJJVzrH#@tl3Y70F>zL{bcZDgEf{fh(!`@9R`r;ip7~M%D40e!hshn0?*TuH zZ~G_<_}ndsu03^vp*^Gi2mxgd%EA>!!C_ub)*6=ptf&i`F?6W$B>h0_kR^TVwpFUb z)tN(vW)6C{A!_^}cYohmncXV_=@tJd|M1??8I=)Y-^c$D5uvS*$zCzKV0dA7O?01v z{%%p^rur z?wXn5+?f-)x@Nd$*m=RbnU|Dg4meZS4Xh}ZUEPYaPvXvM^AH`7naV5?N{7f-HtLJU zym?{{1-i17NE!+>#gd>sG1nlA1>OrQ&*(NH;Xu;J4dr9s8?-=r+-Gb;(16USkptsN zdHXT@r|LEp#g6>P2>$w{e>SGl#U8b^XrU^JVWd@;&Q4kV*d zXioODOJ^1*jjM=DtXtN5@J?cJc#?;-bTFyI>}PRrV<*moIHMYz(|$K6 zta}xxkM8P;aQhQxov4jzg@`GfG(cPkS23u?oy;-INb^y!KX8D=g=)2-!O6)jQtMhQ zYHv-zin1m~m&S}TB#t4Hz;TE*LJu;gMHzWXYlO5FaDNTO2w&-kPU2VTIb%qsF+IKU zZ$dI+)1#v@t-kMz1ZD(o1>!PYnC?R}-w@jku$C8EHWd~NQC$fdTNXyX~6C5j9CJj z@HTk@Gzvu0=h#sL2dl}92pMNT%4acJX;ZsZjSd?Qkgt!$vf z*;<*>pD2@lqM1VkQZJr2ifrDa6=DK%aF2=ukBN&HFP3&ZlZQ)Vr@fdD8nIJzY#FFU zC#+YKF^J6cS9C|)>Z?XSY!nJ>q&!>VA#)s;B~!QZaBl(=&=}0^*(6NU|NEy7`^Z9f zsc;s!=xI;#Wvzivw!9Tb$BLn{kum|n)VCCsjEQY;U5OYKNbldyH-p^1VU z4an1Hwo19;&nDI8QEN+g_bI!3I zORK%2Je3Zqc7cfg-5#|&Fj-gE9iP5vuYUXh+k`%GnEqPJIK zpIBszbTWm4<@~5)i@JwaZLV+8H>@vESZ$EY{fiR?F1IGJLtOQ2coMNLTww{F+PBpg zs?oZK1PA+tchT^=u4+FEoyyVC#my4yGXy{o`o05J1_37kD;mmR+C&OgsHk9y(G>@# z5LdK{CI}<^Kt(?#EIPAhg^pjIzcMX#MUP(dDl`$5^Llk(k&?DNfAX2-1qI7K6@PA< zsxSM<)?%ZLT`bZxrL%Q!jEuFj*{D$V+4S{H5<2tiO?@6+G-F4bT>c7-ii2$T=d^eu zy;|#p)wx*-NYF_ravBh|2+s*S_-J_QlwBi7?U|C2GG(_;w|h$J!A?01 z`S}ewojP?JnV;8?+ma+FYfU>%4q?JlhkUwNh!NVDUf9 zJ4IdN2UsK0E}tQB2j57I$=lfq7ZqQAI)6!(R|M9)WGDj?z2ADPaP`nmS)E**G#>HY z{SYO7{Fss1a?pL>L{YX#hwLdE%cU||$beKGs0Vux$TrAhkUQfIjTsucOfK)*mBY^d zx=4EpJ|?dmV$EJx7RifxG3!En_-4;-Q+t$l6`r;)_ARcCG};$5%N+K_q%3M*$YZAM z8#qThQSbXdSQu23`9VA=zK0QbAqxOeZ$GH|*Z{}ARgVEY;V(A!{6%ybvGaqPDWn~(Mu9vYbyOlK9ZYu^Uez|<%G?QG`t zek@jRGNffwHx}<-RXs9L8vb;;ww!)`B+>_=;4Kbq1( zOfGe1=fDC`Gs}mu1EXC8OWDK@uoVi&_~rCB7{3(U^qZq}eG{s>byer-a_d%x=Y%(j z1&kYxJpt2%al_Sa*)MfZ81`~OV8P476B#!|pSFv)1f(#-58=EYd*O>jXtba%yl@)* z!c;q{eANoyNGIOzkF_gTtbTyBUj4Z}5M}40zgcACWgI=q7O?&Q9-)wtL_uhSB#DC-=Fp8C*%r#056^8 zF2Ry6%Ed&ru|pQM26_6ndSiHQg1DDg%L|d8hc^@KaTOaKtc^{xhfP)tHDlU3rYFQ! zBo3I&J_e;|G%2*mrp1-TOza(38acXeU{*$ONLCgdS-8}lECc^ip>_+qMwW%Zl+`Mj zy2dDpDVv6}P!iECpnJG)fOmwQ(k(x}us%{38{07>KhV32lf^_kkBYLWVU?&Aepew` zb_aciyhby`%28|Qp6w`Ww8i#6t`vII&xLS3c(>8v<;uXwu*ej$VJ0;H!IZd)L``Z$ ztiONkl-SCIfRwnTF8*<{JJvZP5~2 zk8Nztl_&ZUc-Od{>AqH9bqac|AMwOlZ7{UV$9>&pcSHsEdJAI59MPAeg=O;5@P63u zb>_K7*xBM*-bLT^=W1UM<*pxvM;!EQYy< zXZYu3SO6X^YVoE)F8k30ONG!#H$egQuv%Gel13uiq;uaAY30Ln;!R>DJtW}CL?0t< zm7EM?+WMn2+CWerW5)D{<|roN4v0UI_3~u2X0?k08k#-z!hxuFN8`5wiF(3x;?Kei z`8=FaOCQ-YPHt|t6j;c_peW^j#^j~KM9bPV**2_7-V6QbU@6wz2R_OiW|d7N7SbPn z&qP?k4NU%ZaD(xT8|;E56bj2j2EL1zrszcGk2AU%pcrEbVk*}3ZKUha(9@2v5=Z0A zxH=kk6MdmH%tIR+8Be1>M&*U=y#endvZJCkL`@x~y#D41zIp=LA@TBweERCg7i3)? zU1X+7Xyc;-EJQ)=HIU(SpAw^A6SarXm5fh$HcdKQ3hI=-kZ)5ss&5gJUP$v00#>c+ z{NhRnTa|@(o3yTeLdpRJM(YA`_~d}Jt#IH^Nzvzcrl z9pF(_>HBl;y)%>Ed+(Fpd+&`@QV1cW0U?znLqg~T2uSac01^oTBB(S46A+0A>RPs` zt7}Q<#?&oOUt5%82&gQvUSBv)p$RSNQ#~w~ygp7pvvLzM=&dMM+`S zs4!VQD~TIyEwkFmbxpWb18c#y81u-+JTkTOFq0uS#)L-OP>*gz(>G8UifyKpDSd?x z1A8IaPqfU4L`4Xh6NUWxiF$60=CvO3x#-$6JUlG77#`L{+iAjgax=J`ofrogA&i!! zY!{4UWo?a$B0aUMi;IDwFSUB#X7t3qVjO0*MIwSP;h87|Zz(7Th5+rIWmpir7a#uQ zx_1xG+(XFI&#d{?!SY?=gn0JE`qSm1bvx^xTXVW1q<#;v9zR#S`;BGC{aJH{%XH&G$Kppnrq}7-u-4$5nRGY9;Dvh1=)XoG2SCri0xt6pf zDCz>HLG4N@nUc+o&PS+!<%syVv(9Ti7_PdW(7I=q^NP`h;$^J5=DW*xH_X`c-e%53 z^WD5dtZXOXFZlP8o=B{ZKw?oDCx{-e_=y@$-XyNu6lc4 zY2Lua*7S}dfAK@|#hvryuiX1Ry%&}xB`v$q+tYt*dztZe<25(GU8COd)zdXKPk*&T zz53gmYfQGAlx@GIc^XHVp5^&K;a&>Qf(@*(G}O`=2P+~I5}Yjz)xNsyZp;|vVu+71 zvz5QEP6iHYZJi7Pk~$Z(&UBw2+cw-(w5mSSU*9mXY2Up0N7`e@+%no`C7- zwdNJHWx8>ZM~rNp%?!+ywyue8A^DMxcMn$t&t7w^p={~Pn{o@cz5JMU;l`%8 z#DxPn>3Lf^(=wMFCLd0G6CCeuEO1+Fj67#7tKfX8R2>ofg(B>T6Dq)5Qca>{J}Q-y zQ>Zg(K2_l~o zwF)krPimfCQoK5Y1n-?0R~~E?UY)b^>Rfm!n75*4u!^M34NdmSgOj2&&(puCExkaU z?=5^kXG5-|^AqO&?n!=D!J%PxVMl)Rm`(Ti&Jxrrb=&m4vd&uWP|~cKZBw|ZB{8vO zQ=v`j%GNyNq}I)NCt%t0e?^w0D>^d47b$a3PUd-=r~swf2&G@rTR|vEyq;n(NsNeX z>{P%Wxx~G+9vWTNAbZ6K&iLcE%evz|v*x9gF3otcXx&IzXf&+8SUPMNuDX8PSLZ%s@9hI29_@?$ZW~=&i0?t7*p6= z>=#(RBp;!akQx4-2?2I^mkzO?e-*sU7dvE~s&CB&BJ)9VFH+YCBU@`iRb_067CeMy zJ0cXMV#)J}*b;CLzN=guDQt_e@JaGtm*1O_Q0mp_8er>|5nVm2FW)_O#eDG)?>zB^ z{M=HpvgX*SrqJvN4}-=OdyDh0f7(ahMK%caM>c5v zkxqzf8rji$83gX&k~Bv=oyP4TQ9+qgy^%OIRMVFHkCsL8olnfC{>M@3b1d%4ai8XM z^sPP8d*_TWGk<=9LqhXo)c3jqR7<@^(C>k<jza9RBrVqmR8i+#C_nJp3+x{Pp1lVS~v_pX-i^sotk9 z^u067zohp-wMtceptr=I+B!Gx{*x=v+RF+$$*J}-<2I9p12@K|77b(~-~G2Sv6lM$ z0i{U{3I;ft4{(bUpdAayigrl_286&1oSR&xHbq)C%XnwOGnVrXb;g!J{kA2{q|ptc zB0Sv$_|U+?9K{dbCMVX3qp>p+d`-w16DJQleQTw{Fu*oxR=T_R&Jt7i02e(MePWu( zt0q1aij2rJcEPE>cTT&j%n2D1^qgnS;_Zm5PoO0sJLF1A8%^R8{xtT<1sV!sd#>iD zrta<$az`cxY1)x=29a4lc?juwghbZH_juj$1b!r?Ze%4egF;8#$sE7k!FxubZ_CCM z@1m8nw(nj%wlO!e|3YU-ZN*!=x34@~Q_`Q;IdgF`NhtMfa|v~F&4`Vkcf2pH{NOL; zNA?+*8AMfthn2>=_C!=K%|^XSuz`iCg`y5Kb`8tOPBBe^OrzA_zdrz$u{022I@o{Zr|UGvE%fIDMmq?mI7% zd1GAu{TqJpAmh{1wjDII2T?UQ$mkGy)?EA)s6(N$Sqi5?qX4g#;^R};v&w}Fo5Gfv zXPw`Zy>etu&$GSp(XEH)H>1?a5Uj;22c=EzS?(_Btp(ZhlAQ_ilD=4bV<4mA@Voui z>X)|_WUjrmn0R}ktSN56#zN~^hnHk#b*)dDaK+pfh+FvIV{SpnfOgcEGw01)j73rD zDe+Hw@~{N=?Io)$NancYW!E~+MR%0aBFy69CB>`qNPMYhqpQD-TV~9W54uv8ofC>` z&aTSnNM%*N)jONrtb;0MMyeZ}%RQrGRW3dbMxL90v5l1j(;cGnZ6$WdP8KW+r+T%& zA{<^^E3F)$>&0koxlRCX11<$5t0)}8CcH)?L7 zcCCASR|)SOWD>CT`8J#RFYYRzUcWYDQ)^;k>n8G=RFeUu3;wr^j%bywf!x5H(hv4l zn;N2}pa-aj|LsH{sTp++g-T=Y7hDxm&=D`hA2=}aZgjl0nZbH@%fx|p?949oUimkk z??P9=Olc3!W6vHgQH)kugV+X^6{@KwqwKpzU1IVB@;hhvaM=+zTq51#mRwjqXYaxo zAzsr}(w*%#WA(wpO+>yT`SF*!7XSLmx~PuRy%?`b^pI0d$Kulga=AU~WjstVBF(U3 z7AKVnELJdOH2h987L=0a7|m2ks90P_O81O;XEzpPAN>C8nZNDJj8~V>PssP=gPWdS z@$~DniSLX>d2SvUGAesPY{~AMJ)LhpQB!yP*NeNZABh`??B9|Jy{ur>wZ^hHI32 z#=d`^+x~}Rm7Fk&z^G`q=6pwZLI2^Rjf7jCbofeV@4F}Jau0kxLjLsQHyD38GDaNO z_$lBUM#R>@z%SZWp^nyW3$+fgMpHLKYot)^%7^ogfElmN1!Crl30NrP8`eDpB^0$3viw@5WtKT|X)41=gmei%ET1X)$p0C}Qle(da8;&iFviB~_ zvS>KDz)Z&9MGB_SDSrvv!UlEduoBGl^<8~N>dt<;s&nI`ICEAS)^xztGD24X6@r&k?D{iH}Q1QRCC<#HQ)Y<<4(GGn#hGgQ)8D>%y0(i#u0SG z1I<5+TKYb6-pMosF~~$y$}>z^4r82T=u^#$N*;-dr~(AVJVL9Z^Y>E`{mVTnQaIiY z6_RgwCHdG#RxBy5+*m_NsU*}lF~HGSD4j^VkvNk#b__`L4K2~qQc+)dl)X=i5I;0T zsj23fy^E44IsJHt5>sGFdw=!=)O6>^>9oYl0`C4xegPV#~Vhm&0v_jNEpTGU|CQK1q~8I@r~k=q6gN6ANz7u%E~J%xymIu zZuz_0Q&umtxz4?L=K#0mDQgd_wzu~-pIDqU=wH;HR<|zG!kuR}J=`)lt5n;h*lRp= za712sj?;Ixx8l`)fuPZ>Sa&sWF;gqB+Cee#La{*Uzo}6~)l|aMOt| z`c21yq1Qz*^qk|XmlZAFGlLI4azq@_0_p8KczY5^Z}YXDJ(Y*o)kPNWp)xKUA&La7 z7{zD^W)W(4cl}@+8^GM_so|mV)TSd)|Btk4lK+D@f%_vWCRqSZ6Xq>t@PPu{!D;RR zUy;7+?n?K>`mGI-zOkioUe*fkZ<;P%k8D5skvett<)xa>#z|euGYs*1`K&UH#aP< z!Wxb*8%s+aE{?=MSRMtXW)}N@#9W0r6F<6+3+@k}8y^Q(TOZ5ne4_2@%DIm(PQ`?N z$X}L~wImPoNrvC(U96)u^BiHP?TMYj+}zH|nNtJ%ipgT;%mgQ+s25U_0`=?_ST?xFE5+y(J>Ne&Ler!=EgfxG~QC^3EW6y?afhfrY)<6Q=eS2D5J-=f9-0 zk*MsPcu+3G=+u*l{7`)b;napass^Y?vcQRkOj=BL2LHS<)VWuzFD2Zf7^z*$AXu&OF8N;V|}qS7H1C&(daZKCt(KJ6y?o zv6>%ar(e8(rsZz4yqEh3qZk$c>^;QBI=4{|Rb z*R12cu4}5tT@*gCxie=6RV6!QWJKu0p{3Dqyha4fwB_(H+w)5Uzk6|aONPl2r`D(c zbrS322)}(aTPcb^LHl7~hBoHxqTym*VViIcM_j%lr+^58{{$8c3|S7pqrrj@&`sR&Q@leYL3}gO3Y;OAGlZ*-!U5 zr6klMNwI>zb&7_iLqTLNM^A3yvnLmAO3vKTRlccYylB(K`MuXRd%Sd4{i1`V0c9&nPjXjQBrnR%TAY>AnLBrXOKep0p{5xF^W!SQ>$lIL z*xz9bk9L6uvX&KO%uA(K0bYsGHLEn2%M=_Cv;^sFQ@UpUcI>t}q3{f9(Db+_;`((` zBw%eMAVJD2LP6F4*3uKTw2a`3*HhCH7Ceq_5X->O$~$@xBkplGp;Xhvr;ck94_ z6J4~p!Y(+g6q=J11%;HeT ztLx_-RVR1cT;GUqWWkRgD40+6tT-YcV&{f>BZ0b!0E!?0@{<)^NLztZjMM8GCD(E+ z`H;o4J0})Uo^?!fm((3QrsG?5wc$t--oO(n=LPSxrCb+6OJcfId!%-n*7|{ynV~bG z8i6X}gX8HN=QCy-*08WQ=g=oh`0jBy>czP3e(+ic(_vEw@Y-9)$!iGNfF93*Jt2w( zu7tVE^-=j|h@n`FL`or)#$rN{PX630*2CP`0F@d_rS2+-|1nbd!&v3xOuBb8kbPhU(8y!*L2F z)!D~KDA1e{k6bsKv1diYj+!91)VfuLUE61vUN`Ok;>2Sg94sw8@XP+QU$5c4hlj<} z3-=V3?zq~vXYLbgszXcmHtqUoW&MTQ2X`I(@qGOnYL>_@BkSlRHa{hzSrL#2mLbS` zHSw}X*b?P0_9$DDG-9nvOsy=s@>CJgMqB?EvMO0R&Y6lQ<(<>;L>~sQiW7hx(kvJn zGX2SrqAVUKEWpOvpt=qIc%UH2Wco%%_7JZ$P)a=l0N$g{K)S2IOkma_llrJ2qz*H8 zIYcf0OL1Y{@aa{59wxsU7yo{ABxhSQ`WRjohusgYwIB+Y7fcQW|WMPsq0^9f|!;ktGhv+6Hoc;q%(vC zIg&ZXW3@Jv6zAYGh7uR9&8pnn5@tA*v+iPhQCGr8ttWeubWS1*M?>$mFBCVePo3f| zQkXX`PIfc3DBSkhiJF>GVs)*{+SD^qbj=^Q*m5t{VILaqRDGy!u=a4ZO@S2bD6q*~ zUJL&tCxdw+_`w>aHASe}=ZIr~pPw(mf2PJ}3bl)~vnlfhGmTHGkAlQy+P@B~6*599 z!IebGL-EMb!)LR2gd-l6i{stX8`8cRB_|i2=uP%5Qs?K2)ior)uqh&=aCt%N>Eu&)CloNM=c1e$_!9wzB!=Ei-^cm?TbbFM|wd|p$g~yFa%#N z-b2c0P%t<{o#Pb5%;$8;9tq8nIgWrGoeQAS)KLMfz|AFkFK}1YbCADes?}<&nBX%D+z98QEnw%xaNBK~(N&J)Z zhs4f7yxB*dBfsJ1iK$OoiZ)yq&6I~v?#Pqmx-Mm&lU2d1_$=aJ&gdaS^+;EaOGvN^ z4@N1HsUa|(aPoBQ?GbvU)=7hdF-4EW`Mjq`!Upo>++TaKu-Ye}wm7vpX|(&LuG;<0 zF_Dd1#oxcyuy1}uQrF?ymRtR!$(?yc%~7HA3SZ>pYE^cK|BR5J!q9m;i~Ndv3u78* z_-A|PwqzD8D-X;a>T2&kOHD&Nx}K_M(ppT;Ne(;zFzKgY&IV^q(0 z^VZld>xsYMqu>MdDgW0R1$SMILggsGMsruxIK4~S97J^=hxxD-pIW3q{R1+@VW@XT ziU&|F)j-lx83Ft^EtRj=e9t{Ms`+%xN$DC@yeK!`Hz3L1(G>J0#R4=xEE^ptQ$5M^ z9vQ8B8t6OW{L9NQ9%^2Z8;JZ9@KpuHTr%XfkW18Vp?q}#jSB-%mWl=t9(RXyT@YRR zMYjvlqx_w*ANWlAI`{tRR*Y!hhGnXdM;0Hl^Ump#ULQ^+X#p#uk~Au=CmXe~IGHb84hM?QZwY(C_5EAm{- z&1Xg=8>%Ce>MEIfmbN}gO4HEJO7f0zDsu+#rz^^Y5}<{S^F|~At^kA*%vrP%imKL? z9b*nQOT(R5dU;Fuygl{BOUuIa^~1`R6c??kOK}?oBIK;t#Nt2$qu`>1?0G59cb=!Z zLs8-4l28Nv(30Mw`aSa^#vBvsvlEMgjSPZ{lCm2U90``$fCgp?17?jVl_<{kSl{WMTKHS!SY$blUWolp$=SwW0(t)DDK=< z9{o=}l`KcZmxXm$UnMMKnlnatB#g-nXpS!`yuq8bd$kyEM3veYrtKdYCEtzpTb$UA zfMqRz?9QzrQAN^*@VsX1*Gobo_MYLbn_B7X6L{Mt98s-?pN2iRM2OGR11tv5)MjAt zFO4pbUA!j0|7z1{bn}edHNDb?il#%>?v}G(*g_J;SF5|CEZrntA*Z<#Q?+{>2Z>z4 za4C7fl9<%PG2?AxWb5edm!v66)21u1M|@e)fhWn~SDV!C4XAdHX;Uw&6v%j}cCYBb z8r*4_e73X<{4x6Udn34jTU=)}tkdqD1VQrRviWN3Nhm>sm>=n%`%&~JXz0BD`MHs> z?_XCU0^$w&{kAzzESRyr5iy&I+W4CAu-X*79pxNrF!mJv-cE*wDU)mU z)AW1qd*eoqV%&Ov|9U!Z^(c4ifnhl#c6hN%7@Zf{=uE$V$jEr-N7)sR81o?m;ysbm z#D9*MBt>V!E~0Y=_1yxzPo_Uj0Af69JA3Nl5;}ceE9&4?ralv@RdaetojK>1UT;^Q zTb7)%>}>b4mpjg)2MH35`AgAt$qkdh#DkU1=Q|Oy%n1Seu=LgXB%RIhx=6lwt_1YY2AmHAcJ6bX|#s zg{390FHm?org2K1W~R|o!Nc?|=A59~JZhSy5I<>tU3~3z@-BIGRQ$epez1Ra4k;6# zox5v(WK7FJPS8xyqs||XDh~=QjT{P}y(}kZX>}Oz!T*dZxX-}Hsn0tQ-XY|GgogM6 zz|2~vwlV-lu>{TsM#bVzU?w!q#F|7h-w2_+=0WgQy1~597%f(ab}=>0HVsH}6Wtz$ z%uDh$>&Y$6=TpiU6tf=**2}$ul1+|g%OVa-cRYY{RRLAPoUyx|Qq59UBuBSS0un?D z{s%(#WzGBKD#cklEAN}cSzAU?vd;n~dj>_xcFtJLHG>H_{SSaO9w$Vm*oYmKq6o-c zvXlxvkzu63WPLwf(hoMLpS zM5Z%lZOtMFJPRaDY8G_1Oy7n8khm#3LyzDvS`8+QWIu$bj&UPUYglr|!=}UoUr9EJ z0Zp-7j9r|yDA49oVvdd`A5DP|$ATvHn)Uw$51M;TLa8_+KF+jd9x@QVG?_vH=|@i{ zYd1pACA2}PS2}ML`5yZ6DJr-p4iAZfd{`@>IN9TfH7;R3pioe&NuaB~+6ol-) z0VPO4mfeEWMWr^*G?m)aMS{FZJbyB_d+#dm^LS$Wo>{Z@btJvw8dn&c)0x8U9A9*8 z82}gKANNQq4@+N&Sk|3STZWq>Bb$d?1dltPrZ=X$#rIUvAGckpx$i2K)oX!D^&FEY zVbV+uF;hb}Q;BN-uQLU|=f)01HLNcxTHg>o<`9zNlUN(WC5$$nY0qC(6LG*PGTYxT zJJK1n>RY?LHaHkxxDPbGvE?yNVe@9tA6K6&n|jqLraTsNz55a9V?tpAJ;;&TuEqvH zHZgE;bTlv|YJ*9&no=y|1QLycc`=nmmwW2YPMaq?KQ+$}u;Zf>zNAN!$$edavaRLJ z0)@@(bt&^>q84UxYd{{n?8Up^C`M4DOKp`{5XP|QyPOwLCx-Z7Sx=*ybzti?mRutr-32ud_;pV7S}A=BVn3(`zrUY_E?@JK z#6t5c)vVEg=F;d-;iCK{rr-$FIuQUR0~t^K0_K=C0Cv!f4u_GJ8<@&baKO?ESm1(j zC#*F!&{+8e4SG@hHSU6#c#AV$K{~|@`EpLUt4StTUy@thM*B@HkJCN`jjEq~GD7nq zr@~ki$JvY$xX8Yvnu0 z)sHpLl65a~8Vzh(0IF+95}XU;Lqk5|0s@zfLiRI!`2p{VC?$JO>c5XnbWiKtMtI^rmhyDZGwihdiD754oMNWq=*3aJF!NJSX)z#C}(idqAmc+>m^*SmQ z(a{W@8l9Q$zsbsL#w;)`8kz=fj5XsD%<|Xaa+31#o)3q}?+9TEaqbZI-Lk_vqxcBH2$8Q2Zod;Qm) z5x$-1v>)ewf&CdIUmW~!@pZaC&U>cpkGSmK{*3d1gM-`vrSjV)KYg#1v>$>p0eeN? zm1fxZvTBv&R2Od#PtTwrrEg%MW0>|`sawT5G*n+-A@Pp;-&NW`?fGRY)lc3<;dc`* zRioVTpSqV^4Q!N8>8ZJM>nH9jFDntV`P0gcpvzQQo-82M%Ffs*HB}Z6sGpUQk)E5@ zpJNN{Atn4%ZBcIuGqV`pkkp_PYR#13I_fv?MO;~A!WOB^4B(SRg=kW{r=_m6cYuU9 zpY6{kI*xLfUd7mN-_I;WxU*?l)ZftLqmL?0z$ji=5@6WXZ^OjyaJe+oAhCskBmOb#& zc%lE>ORptCwNLb>rS_d{+P%HLFWLo7EyQ1M+ueO`SxVZHqqSkBnUimQpvf;`ZkmTr zetXV#)bXJV`i>CqU$QtizA@i#v2SjuTa2HbIO3JvlwPnDtzW0TwtY~Nw|}y~l~8y3 zv>^(oEzx0ZvKrAAR870aSs3}XhP$e!9_7?lfbW=gzfK0EB6UiKUwspj$S_TN6${alz)Y$}RH}^az>whH%6n zwdpbWt6I1<+Lv$&^4jru^-~>Yo)+dFW{aL$9Dk=Xq32Aep|zozi%Hw@t~k64MMsE) z-eV2BhV})rFm!KKIku`)hBBL0dW`QohUp) zX6uX~J+4+6T+o)D)m9Ls&>U3+6|@UQQ=4v}`v$N;h9au_GBf+CBPQO7oVy~2+F?}h zM*IVraW~+5wxpJhiG`lr63GC(+Jc3rSv;L8HptMkRkC;~WIXNq#yv(j_mZ~!U}EGE z;b$eT+%LXDY8v;83bKUvYhIf`PCF*Y+*t}s;5CtA95IiBV1)Isz-dM&B3m>TurW{@ z$+Zvx6ztKGWdiD|lvslSI+oE9r^316&QbQ(a$8d?KMzCk{pG*5Yoe3j`?3-2x#ZY!d+d@=?)oEgWdRJ;SPRw^SR6fEreHW>cOkN`XS zeHAUn0gGpR@TV2J+5pO$J2H)+7>PX$V6mTv<`48b)(+%kjq&HbP^-X??qv;p`-^HH)4Y_ei+(o)3(#74Rli2?x zPm{o`&`#w{b`H{H3uF0LM zM_>P0n>uQPB9i;pEJ|!`TO|$^xO)zVW{b}z)x~IS+)1Oi^)l}Q<2Ny}W8bXGJ?)8) zd!^5g3NDOubBilfyt~FH%qJ?w!fR2xPq>d*72}(us%!EoN>vZ1t)H*CHKQ}%3wAJh zMl7f1acuW#H5Whh#)9GDIQ*!&r9RyqORZyi1GSmGkh#yr(@%j4R>MXl>o>yTuj+`J{$8 zIE19i7q7wkMaAkCvN*;!*M}`+*C5px;ZR6*qxSgMUsKyXV&UP1#AD0l>cPQ?rbEr* zr}#aBn(kkJP0e|?)W&q;K|g{M#C#%3wAlq$j)g{H9jVes`8aUzl|k-$dH{rRGSw!C z(WG%{5hWT#BBQOlwWo3lb=~ueJi=QNCcef85b=dmhLaTZXt#-CsJ(qv+yBLN!b_eL z`FZv}ank!3PJJH*-iM}1z$ftk`##jCGEU!z7cZVt{uJkmJgdEZ&9?uOgN2hV5eLw- z^QO!lb_k*iHWtRb+FK^`B6M3Qmt$Sd7g%en0cj#XaM_{gK=LEm1MsTUt~)mG%;tv3 zlKpS5JpT6qa-Eb6byjSu4R=ebQeVEl=}2PNp=<58AN%6zxl2AefZpc0Yfd#x6m9FR z2+>S%fPX`raUF|0Pz@Wiur@bRs)5AdLFi&mn+1(5Fu66YL<&wcI*lZ%H=*jO8--D4 z(>k+gVU&C3q6?$zhbjlBbUoD8L*;Dk%0MSgLUX8kAe(o~lOdV)&YL33mhyC7uJq>ofXl)UfHiF=m#$)!`Di@(bx-fxSaW)sh=C;mjd^Y{R9-!m&ui!YNMXI7jc z9cNaZAv465Gb`}?i6-IXU6p(Slw*Bafh-6D2zcc3G6-HOKr-B|RWa@=+YdE=JV`hw zazgI4ntatx=QhzLx4kTAN0_B4^<+~w0DwzV6?FQq3FS(iV*i4D~KmA$UnZB*0T*};cW-;RdPvwmc~J0 z*T$ggq6}E-oibS(f<<(iQU?Pv?KjJ5zwy(4n||l?-=^O={VAHI-Q5k1-CaS^QGr~E z8b7PM17jis$E3^nG3e&KA2ru?cSimZ$DLL-=P|OG}w=Qj&N>`-A3l z-4DOi{pqUq2jM&2pB}ge>lGl*BW?0qkT?{pissKcqs9u+g{%l$(4MCXkTOv*a%*pE zrFRtiop{@#QIcGR4k&yKiLtd*YEEo6TwcF#lhN{e@_{iayzO8tUX;&j;7D5Lo%RM! zyq-noHk$8C<8L;Y?@v!O5ua7?CS>M>&sL+Ajf3XMQkCFlppScGjJW zd7OA9hoUQ8K&JikN_)Xa_A&5ORI(8gM$(B=W+tq;dlkAR2o6{?LzTpo5K>zy3y2Je zq@Oz?9uzLiU%}_Tl|CmMx%(ITIey*b=ZNletF)h+^!Tcww>rukQjHu89ige%+8P@h zNVP@KQQo0+|NrUgRp0iJT)q5UDH4GG?E=korPwUU{~?@Q#w*Y@$HELsx}>4sgDj*I zAn0%mDK+V21_#p3iEk!di^Sb=Q#zkNbse40zgW`y2mW2Lt%>Xt-+8bBI;ep5=ksL# zTRr$zB4ysGRx&=6aG~A~kSuU8pxvipGTlOg#*pSh-G34P8yJ@RkmPrWFNh!guf~0o z0I_6)xMd+(!WI0427Z$`v7GD}*3%1h&9oyIY6f99^2lFgjkB?VjCEng03F9y!E5ZY zCVgBf19a$@q85w?a$yfXo+0m>_6&uZHC}t*8GOdrB_zJd`$T{8zIa1Qf5S5_h@T28 zv9EHREXWohy;xdI!2x1Tys(079+tqXkTUzE+9<$?Gkl;Y-rxSo;gZC@^SxYYXQm5@ zY&wKy?5v_=;@gp;{qJvD_vW^|y>Tsjs}~$@jixgf|G>Wt%g72@oHWyj5JOuB)RDpl zqmW=UB?N8+I1xZV@Zllj4wc#}>c@i@5-W0<=S&`keEtjj5-W6xgQ9P(zCqIWXhjdt?rZ zdNOUqgNh^}9(Vmsx=Zf#kh?C32joD>5m3yEzv5Yf=Ut8VT(4!H2KGE1gRf_w=FK6| zq%lCp$Fci&wLU`X(+^3t1tz+ok$}EzhJd>@1Ru~->q((8Dg4c3VF+?tDQzI@gvQ*c z&mFm!${6B#9Ayr^`}cERUm)es#+h8y?JG9-_q5^%+~ys zJA7S8*E}Ykb|%mOf;6ABb5F|84GmEbCf<}=ODkZWo;ZtF1#X8-zG}+*P8}7&{VCP; z4sK$Jt4tcAU<2@S0S;l{gPnp_Y=oh`O?Jq0c z_p@dH+<*64YwI=5-e)%c@l5^k?A1@tdrtkzuF{!%Zm9oLxou%gT=UkMU#|Ye{!-Wn zi;x$81c;mW-UyyF`bNy$V~!KN7IFn(DNTDJs7I4m{>%FaD8ai>7dUfCUfz^lX3bk&ud>%!HcZC21*L2ddf=TZ)5J=qB4%d!##=N`QEr%m1*`o2WgP zzhUQ+P)Nf`IpL4H6zufYNOy66y#Fpp=16fwv5%czeehcBx4yBl9x%q`W=50jj^_XA zcpaHm>BN6Ck)x>BRy)0e@=UO}@9ryk;$^Pl%W_*5g+Z@W${)ZRj1A;a1(ZrNfu&F} zqSBr+kTK02y_adpeW6Jt7GvbQ(X}Wf5)#1DZr+!4iB%Z{PlH&8T>M-% zPg#{_tV*4bh`WB&u3fTpmm?rcwepXjtkGiqK&U>+^9BaiKt;9|fUf2!Fgd`g(Mi z(0Dg^BcTOrwkQs?Wo~a>?#9DvgrU(2SH{yfNcF)eSwnsB$4e%5j*pYq)U-JGUcs6e zhY=8+y8D&<8Y+PFfpmoYCb=^q&dv%aCj&2(f`%CAoJ}-@11>0O8g$8N7u1Kf2)XC! zpm{nJa3f_Xa#7{_i`D$;{eK!RE#H4rz46V>SrgH`zW2KpLbtgpIx-^^qk&P9g!2i;W~Mjgod1y)&lS?a>aq)eD%0Q~|yVGnP17uU- zgxcEZOXXLzCY<4PB0 zI+KW|!TBt!h=R^M%7 ztxxmk4WNJ$`)1-dn@PtI zGW*a1st+O{Le(VAzLCb)4n%ESH_vj9ZG}3cX>~m1x}lRg( zA)R&}2>yZ&WZ*B~Nq6zj!7JWwzqt8{n8Jp^XfksH zQ#A^dB?dwl@xP9^XO#S73>_0(qMS`lvy9QTK{TbvF9E1|m zDcL#hwiM3PouGlF1P4O(lj%bV8TQ($PR;^+$tGknCWugaga5^hH3#CLp zA+%>QW5_h9uALQiG*1>2-ao4aoanHUEY<)ZW%tM*m*uU$*jD+}W2K`x{S^(5&S{#wlbdS*NOD>MV?uX0oNS?EvnI{y^A?vH08$q~9mtsTn1c%p&0{L)wv$r?{%$WX^XU=1L`lAMYf4OZL|qk8>t)DeD=gA6jy>@JY2LU zFKF(e2Ag`6?b zNYjXLp)KvWT(VtU+%}MuG|)!suWLS&ur0WMY~P+gFOnmArnaVI7OrUb(EHNECN{DC zaY8bdm8!}r%T$q_-O)R))qxw&zx$PQ>;V4*)_EAOH*q|=eQ{m(DNDS4QzFaG#pqz!-3_{g-jQAHuWw+(4 z5M_6H-qD}q3OAgtYkRy(hoxGz`e=Pk-?hhbq$;2d^J0tpXZ#~(VQFakl7r%ZgqQE6 zrPfE2*ycTR=D#_=;mw6xf4(V`%YPEP5)FNXqNKP1(+?8SSEWMq%OpHQUmb63!jwR~ zBui;z5>b~v0}cWCE!l{?5~Tnzh%Lc6;2&bp;H=K~{`V!X+}Newn(C3axKuny7A_rK zmF5y((S^pYh+EiwmDF8qvEoFdzh_2pVr?oS{T?n?90E8lZ{^Y13-*_B%ZB6B6>&8g zp2Jbi2N%RPmZgQ685P?`+QE(sxt(3%mOV z;VZ}pXH<$o^GZ%ia>T0|`}=!D7^3UAR#1e8u$PtOKbjg%dQVl7@01o5)07f}-W(R*Rw3S25Tng6&-c!4%i`p6-ouR45Z{>CiUc=osN3@nfjCON z`(|b|Wqb3XwOO89kED^Y1HSoT5hWq}3wA9CpP3qJj=&1Qt-mI!aN<|co>_RJu<%$( z*Qw4J;t)SKtspeht1Y8v257fTe4nRToUZWBQp8k~__&lnBOGXVH#b6H?c?33fs5b{ z)p2w?N;C9y7Pqvccu&T98N#16Vrp!E;lWifgFK3C`}kPjj@0Ch9pX2{@`>MeJI7UY zX7YMU-of77#QfydB4Zc(Yc7t8)FqsKO<9Xupe?s-D6yxkq$hQ#_Qg%H4Kvfj%!~{P zJg-7W-Kq9Xensi__D(f(90rQ}(#4wnbKcC%3!KvsJw$b-W#TPZ3KZil0@yythK+S| zb;S-Q2N-B~kD>qK-KdcON5SOQ#pn{Duy1tt9`aTwuGhWQTjCGzRswvqwNm^7$}L4f z!Omq1`htz6v9I@*BWVT>wl6!{$U|$%=X1(h@mgdQyA>W;5_Etnrx7#LLd}SZgKf&S z<*qJmuuF(Cx3tL4GOLR&^#9lNx5`34qHgRKtWZMQ zANo@F*Jn`WNfO#q7z;HNw%m`6!u?LOj+;Gh8w?S12PX8aPweajg2GI8Q)n zt+;b0q$t)UcHyq-dyFIh7;^IvJ>Nck#dEso&#ki>uMt&T3(`YaA~DmJl*>O(4PhWVeyu3Ps?vFR%REvo?Cs6m4b`v-c$%uJP|K0I$t(ih%wR@Q zHzRc?P*RYlrd>}(Fvri;-1HseE{@p-rl?|P(e?Ad`hnwQ_8urNSyL0i&4;V-eP$YH z7cJJ6giT*H;#;*HTEY@&Z^z&@3!w253qu6ry`5QWCoB?`gsiWVwx6ezwuKnHG%d(% zt>ojl(Y5a#Dl0qm-rB{_r6;8Bo;7FZ{K&}pJLlA{%j7s`+bfnTj@$IH_1vdE-#`=_ zK7VR%V8B*)&)n4~YsLT6J+&qmOv#;01BTwa};y!32${r14X;APnb z?OAS~dA;k2|BT4k6>;%>2^~ALXARzH_iw*(V3vpLE=QNl?k!pV*;~3ZA#J=MS95W$ zX~zcC?O0hEIn&G%t$$rhR@l7h8+T7P5RTvJCm)U5;J{|Jbi@Lh*M;5C)f_zny&KZ?KJ@MzFOODlnHw6>xN}bXvFhu!Puy%; z{j0+j`8$42GGAWy>h^-7J#Vev{oU^Ca}L$5I$EGAAE+7S&6c^Q&dZ2v$n^!lRoBSE zu(*yVn&xb;53h)uKQN=|P)k(f3#)QFQ-F>YJv-eKT6L&6L;ft&s$m~f?O8xP1=je9GTHud+rMFemBO;wxfBac*-^e9%se6Yc!IiFOfnKkTUuLKs=9umWm<-g!AdVQE)%2Co8qP;2W3zoM9y;BX;qS<;3T zL3!z0OIKnhp@I#`giC^vmuX)8WcjFA>FY#C0eAy3}m$l63 zqM01cyh6fhZ<{Hz11$OE>uD!SK9?r^!&9l-)`sTg?5jL|?RqZj6zg-$@3KQ@KiHOduEpic%BG2#{g5%aQHh}B1GKW#{L3q$$@!W&pXoG{-~ z07L50`MT(H*t3R5x#9^={*wt$`MLj`Q0YU>tt)YUHHKK85Z>ZnZY*`yJ zeEd=^>nbb5n+ppw{rnQbjcrZPCSHyrD(J!N3%*MTGmLndeNVT{6ABfov8mgek|s5A zVDF$}o5HDp7d`qO`A@iqpto@*dVB%QHfO`!xf}Dgdgew)=XiML#6;zIa?SWt)vAn) zm9y^u6PcO0y0UgdE@?>kfIQJ&#M3OH@@)Bj&wmysk2>nkaOo>KzvVM zW`VqUIT9%J(ANmrX%bSsDvIz(5s%{B#XIMSs*lKbh#&GVh>M6Z84&x#4zj@*&#A_9 zOtsGe&(YJPOhgKm^buQYf+{U*Wi6WbNj}F*9LAMs?l5;%eomp}04>9= z)Zm^|>`H?6N+qs5BOk*(@TjnR67Z>$?4Bg;m15lU1bs4xhzq+XpWDanXIH={fL{eP zNNy+hrM!gkSDI1iCEJEg(Zb?fN!`Dom6f<52pA|9^5vnlsZ!(%D;ruTAHnp6CX>(+gPkKn20WG5L> z6iU3yTtP8BrPL3QIE^SU|2%4A`0~1emd&7b=Ian~O2RaXZr zTOU`Kx-7D79DIY$6wf^J;4`&6Uh8F4#kla~2z{tuk@Qf%hN@Wp2lJx9`e! z%Xx=;%AI;_=O1_j3n*L8ZRLKHkAdRs(V5!@DHDdAIW=ihq*w{%h7p!hpIjVG{H8mu zUZ>Yu1pDMIOfBwBJ?)him{ja7mua^0ZsJx_zM~~RWmCH-kdN~!ywes@2~a9-;D!lk zf>MdN3s+Vk&;9S@B(8jhE4y(8neW^gavoQHr@fLQZsZQrD_orRN;K}dN^2IIOes%@4#jccyPjg?A zR?O#dnQX!VGc39L7I#WkgeylTuOKf&_A;&v(<^lMHGd`}ToYpRklto2v$Ri=-fZWQ zPCI4iBa406#=Dpgf3?@S$D{Y`O3Q)gI(<5AYIj_tv{a(`iX72C%}mNu)yhD&=jik> zb_7WoOjxJ=Qhbu3(qvK>jr|ygZ8R?j!R^HS=)yhJov7 zYav(bLpB&mRj`tWmeP3xycopN28sj*9V6@JYat(1k1n~EY?g`o**GK|el1hs)Nogs^e#aYBnonbJQ6k>P6p%}or0w{o{M8H2|BRqufOi;^Y`!T86PFfPrOxI z`xYOodGY4xWCxS5Om!v~HB`Paf5u}Kx4)F1gO9iWXFZ%8yjgkY&6rpVeST1B9F4{% zpunf^{#DPM>CDlP=StZup#t9(bk*cMu%a&bcMwHKctGN2Yz-~Zn~x`!C=9|E1nTe;&&%JsT_%6_b{W~4aejTq^Wv|s zM6D3tiesVr_<8vweyMu$`$-zO*M)CpwfvB4BL*i1P2yF`mn4j>!rh3#w zfauMCMBW4ELGrNt3FhzcfWN>LuoUK2dRBVo1_1DsEUFX4AL=JH_jG4& zbE-a2Gu?#S#)Xvh&j}rKiYbW_Um4?H)ck6cBSP+$wf?av$2L7VWyZG^IDmDy%EMWbK^Lu$r}MjWPF6jqxT6v!f@qGKy=1ATar{d1amC~t@L#E;l05r--totTo-IZrMQM@oKe9Q|9HgLBYgacnASR&GvIB?z1T%9%EohGq z=EWp3r8lM@E8SzDt4(n#N&z4&EwQ%5ANe1HC+dSanda~0U#(lS)3>X||Ac(_+!tMQ z-+P|?^`&5qRj^P1qy2fVb!E=-f{L|y!!57xSU$c;^QsX84HsnYcfS!F+5DtDDRhVu z1m2Vw8uLh^!zsWK0O?~j2n-m+?x32gVra+?1&hE3B~j$P>s-#uWrsf45HdIPYW?2% zk=tl90!^u87ih|=o)jl8*Bk2X&f4_G0Jrplcya^8G9qi({NXz*}qcsqO6$6S|L+F~WERK`22iyq?CE z`>JDGM#@&Tc=9^uaN_s9y}v)N@#GSYWcSa?SyxV5D|WxOcH3($n<+lKfGz-kk*|l1 z;12I)7OII65lB~s=H$eM8mnF9CV6=_COA#hOM+Gx+HY8U*1sAC0qg*+`Y+WBOm?^U zDRzM+)Kbo6Yx;^=vsPz8i=VYJV{2wkNQ$dlYDi8xF{9=HQ|#~8`QMUMS(mJyteKli zsdk^;lO0tSQ5HgV`&k;_pQ5)D8inQC6qe{NV0GEv&W?B)L2y%@kFr=@irCb?L%e6y zy}&TGp8E8-x0;6Lhx;NFkuTw!ax}IHFW)?z+V@lw_tore zjgbLmJ^6kMKm6H9-MQY;UsMP9CHh!8#?Fk@*k-jC`eHwhK{(u!UkAUS9ooaNALap} z!TucYXyFB9ywpifGo5Vgt^njz2fgDtHG%Vy&1 zq4FnHrAdj!D&OLy#1fTw^)1cWvA4J~E&$T(xAPVruB<$~aNfd0m6eBnHZQ3rBBCZ~ zUSdsTWDPNbWL|V<5B#}1g>I^wHX|Mz3pzG}CIy5LeE|s^21w>c4_)|_0a+^rUYIbG z(8kSq2Iw6D!37(h%4W|f{?o0I+p4>E?d;NA*f(b{KS;KsMs$ma5Q?ZKJ2Zdh>`TR1 z@_H#o9D*F!3%{o`G$Y$o0}oqU!No-B2(z3nly)<9%%4>y(cEz>76DTF=5TT$tMUI9K9 z%JL@`-2GlrCVyT~K~whO_R)LE33?CieV%y+=Hre$`ElHlAk*V^vOA~~h~9CUJq@K< z+NV`xWDUDF3Am`zy>8ljYp0HkRcnP;G6IO~e(*{n+sj`jvxstqkKF?OK3QQLI|OO) z4hJLEle%n@kr&~@(R>LhNP)wj4B%j`VI<{D1oK={&B=JZ(409faZ$B%Ld?7?)g_D0 z4O9jC&s;K8qrSK*EoIrW{e|6mUS7G~1x4NYUUHx84qhzGsM^FR)0Hj$Q+S5S804AI=-`yzvIf9Tr_cd3Iw(oK zEUk7xtZ6C+vY>=#?tH=jV#0(IHA#(Jz2<1+lEFcR&*0$1Y5R!_JGi&GU+=`sPH|iL zzdYz?SZJ67hpDcDtB2)qMGH-t-_mUIO~g|3�PJ}HG?Q&#wR5zLv9qnRi zs4mLL2$16TED;l5rbq5sR<90pk4NMl7fd+0Hp7EgMI{TA2$WKC49X#_U2q|po0rvp zp`-oDzLGFwezAfP*ugo{To#NstpG=R#Y9jxyi9~=c444 z#`anVfBU)Z^HNe4?JUvWcYxiuUwdE4&P6FH^V;Xy`#aROH>M;n+6ij8dsdvm?Ld{D ztIX`OqZL6(^moV=VKwQLCoUj%f)W2ctn~<)BdYv`egpH= zFqeL>01%qPC*cmCN-e<4;Ps7&wgHwn6fCVRLRDH11^>vuEF>Wt(n{u}Bvj<5tqdU{ zE7S5T5>jeP(gs2w`4{f>mm(6l05=!k%*2->;#Dqg0bHi&i67}jRc7Kfb}^7c#tknU zl0DA6bxVV&GE4l!W1M`oAcPNVQx zRBvO0YWd)uZRFK$Najl+Xt6`aDes(nx0cM3{T-f5g9}1GKyYczDD_lm0}!)v@>{&U z|6W{tDJd^*X?7{KW#Nkv{;jZv4?CaA6j?rp_s7VPH2m8*S!S%h!7>#phR6L@&Bs|kQQfA>~y3xWiq&odhx%`9MzC!$t9f)CJTtx)N zr4fH0nLo5g)c$!o5p}S?m9_M$LAJKi1gHeT9^l`USxmAM`qJnR^N5ug8Lc?F$Suv) zz&gqyp(5~G%`;evx(mzmw&b}5Ezg-;96D1yy+OHviK^M^JY>1X#d8wzXGHbAEK zqEolB3ewKqPdjzGcoN+rUvzQ>A3pbouIsef|3h;Ivw!qbPk(_&Ek4e05_G-8Sbd7P zOg%qXdk-MLTcS|R$;sFl{hjUZg?1@RVib@@R~n2QwwjiegVwO_5BL3$b>bDGnf*!4 zs7rTHLaG1g%t7v^=JKs6zMrh_OiB*qqv+sYE;F+Vp9>mcG$VRH=>8F?r$SMouwXW2 zm~5deEY&d~JavBk978q9%1c$MeS!?tE@rSZtqimss@eL($VF`ZZi`i>eiR^4VJl$`c~yu z)wuY$a7Q%bS4TZ!Tx=taiA_#89A09KXIoqK?gi0tE&JxId$IBVG56+iRbAH}_}%B6 zdoPm=GRdSO0y2ZlAoDzffFn2pDxeI4Gft?9voX#_j2g{sCXIrbY^Ek@o8+Zw``V^y z`b&Cm=TqTIvpyY@c!0BTy_@ALcP_xh4>VcoUYUVH7e*R`E?Ok{}H(eC$+N+Omr9qON<=bC;&h%BKz(U+&vCYw&1#o{x=! z_*llCmG)EelHFb7iW_n=J7-O;J6<1An-ot+_Ls*P$oJEwGZ;FNp= zcHLaH)BL6JB4nCwnUEY%KLUyYMd5r+#Jd0)xc^gtYR;92SDi3CVlY(05Ofdl4Ocfmh!J>p&Q5B-jQw*F23J$`3~Nb&R@72tw(fxI*h(HZ{S zgf$d`^V)23!`lpi;D5spZk}!)c&`?kl`6-;eR7@g1z1v)NP|&L_Vp8EEiZEBVJ8S@zYz4|$Gqkoil+uGVJ(QrB32mLyf8v219Aeb9_J32bd z_qY9}uBCF;^7^FckOa$*j<8j!_`mx3FJ#Y;os<>D6v@G{KYC|eY;4?WcGS_q9F{VE znYwLL(~>Qlo0e>@E6&O+F3HL)Qto4ZU0v$`eE$cQ`gTXh+bXW&cbH~&-R%1Qd+D1S zH*U=9kTSmiUK!Wb)iwC#;2>Lb$<94vSjDPCn7vz@*vsSJ?qJMdjP$$Mbf3U2(>EXO*_#-h85dXP?VnPvUVH2@ zOQE9Jolq2FbGJ?G1^udlr;GA>$ohelE`2tbLoL}LV6S{`0Ts`_!uV{BqX zoM&cgNkL!Xj(Kx7=SIy)eQNXOR^}9%9q$pHUYcCEsvu=qQG8up{6UPg9!K+6vOD6# z0h#U!Y9)~~-ei!RV9Gyb7=ju^oZ_eT9h!HN1!ZQ2RrJ*ZzhaB%X$Wa{@|G^LJFABN zF7<(8$^bh&=ZQow464^~81eiQR{_?j0m28GYP9BfrM{wVvu1BEF4{h8_O{}_iD}`u z4KOhyJbc;&_TKK!!$n1hJH>z0e_Y6^4GF2uxsWp>G;{{p6*Gpumrp9EP|F98+->~$ zAQofi5VUcmM?SL1UOb2cKZ5dXoG#8ne$^1a2tX(9O;0pM1ZCGo7cEVk+EkiQmlq<( znW_)9Y<^_=f`RSJPcNOCy7%t`uJ@-l@2h^hptUG4uQNBZGe01$G^8PAZPoJUmbPDQ zE86k$0DZ^hgWp)%Ig$Q&n!nrV{**K2`FB$h7UyKlXp)N zCEwfyvhqtyiMv|6D`|^wzUA-o#jKUbXRm&xJ-y>ZUEPV583WT=3JRN3Qd)`%no}Q0 zY2IHu?H?EN)D_*@nu*=VnStjc%_h7`4wd^XC(lc6Oj%vE{Fx=~mzoQE9;&a|@z>{?K;IV7_d4bjK$7KeYb$xV z*c+uDeX%gPU{`+>Dh}^K5-2Krqq9yEbC!H47pa852l=lkT=(pvIsN;audKFvcFdMV zki2h+bHn z-Iy}4tbfhi!>uWi3y&^6b!zR|`5~EIC+F54Tb0|N+EQ53k{p=7C{21GYI=NPWq3$Q zZ2f`hp*8EPVw-AW!&`P!I0m<9LD4RguUBF zc|5{JSQvUCh`fpsI?_rv$qP!6nf%URAA6#-HlTg+toHbUC9myS|LpwWyt;Vx>ak{R~JnkkhniHowE9&M;Ox;Sx3LsOFazoco7gL!?*0m!)BbEY1NBpf0Dm1v9)l==qL z%j~bpft&5}Q;Yf#Sd$PN`J4PWXmp3Y;CR{BWn(aWSla1aE0Xr%72YD)1+56ijSO+g zkEbp_xnTauwrQztC+07BU`6V{_|#b`s41N=VcM*ul$oi1(&>x6*Egl7Z@S)lvGkn9dy7xB5;G5}yt_!%8+hazgl+yW} zb6O0DxJS{a*Z}+_|6%ZgoHh8bHL~5RqQU<@4cAZ8>yCuv;6sN>e^FnJA+K*VyI3$C(HuuVXqrMepb zI%xTW1j%HuwhYQtj6kw)XSP$AVr!HQz>9i%!doOE2?@mbe`e48*o2P|zCHet`U~|} zzhy^M$h=y{p0m7Uxy-Jqc~Xgl!&RX@W&9dg(d!7cg6I*M={Qz#W{!#jvr$Z}2R)5M zA7ee(;0kv-Ko)sV%!L0G%-;IfmPqxxjc48r-GsZ2Ij>(;f3fM0-=%L<2cwvWy!xMm z&C1@v6Y78C-w((Qv;zlss9(rO{t5X&gjT81c;|H4EQ4F9OQB&QAfoFqwO=e}nQy3x z`_zwFdeuG^eJ|u@j_0WQTYMQD>K^)=Vvir)4NjLGTug=@L?tssr+%^_$pjqv9!$Jk zHPG|^#@=_=T*=!oy?T8vX!>s3b93fAw@r2!{BA~De0&>DRtfQ1gKPm_$|$-#PlarT z9+(3eTF(UtK9pa)wBo8%Fc_}vC87`$WT*!FoBTV-eKN9Ic6LEd2{B>g+?=3VeiF+s zc9D|-VgF&bK^}3mohzeTnNGol3^FtBM9^Lf>r_LK8V;jc#recWDQ3Wwf@(7ydp|%Mt2UYnnxl5 z*XEe}F7{aSh0fHxEwdAmKwzP@?a2u%=lpFz`cVD(t1MdC8#QO!j2YYKj`KMdKQ}pJ zUi_c~RFVo^0xr)PN>FBjZ*)UEiuNetUSVN&cG5)mjo70?*^6c;4uUVvUuG z`XRDBX{w!r3nd3k1ft!e0*6#!ZIj9W(4V9}we4{ib64hK(kt~zU%RL93Qf9rO)adM zwZ-LVi7PF+n+c2g&c=Mv&)|94#>mW8nIKWtGhNE;e#=8>NLojGL2ey9gpGAbZpC_n zU&b}~z6n|kkK1M1N{)6$2ed&XHHM65Y%EC@^cbngSX?ilMmH`$xybHeJ1(kD_gH>* z%IB(5(bo@*9rAwKX%?fpg2rJ7!7JdGcLP7|RHw@(SyFmPV0hsM9fovaLEdfI6VXrl z`|tF9L6tI-|3qhkCv`%yH#*pu?3f@-=vO-6{E>)HXl;!)nQTk5jsTpz*2p zRo7Yvl9uF7n;Xlj<&?oU+D_M&oLH4Nb4tuy;6sp=RS9~Z6`F{1--T6yFC*qeD@B~Z zi>v4|CCfN4Y?K~2BqTc`Ne6>TZx#d7bbesJ{Ih;(_wnPBM2I*tjMxAfOn?lA(wc>Y zh8n$`H%=zcMi+ILQCo0yCq#;PB*y#4D+yl9F7N5OzPBv5_r*1bUSIBX#XhsUZu+{a z;OM!VDmHYE!^4kgwLP&ssbv4V+iH4$cA#Ybg_g8M$q7pj&z}LiDIwnD%L8J?;Okgt z=bed;`*^MnH`j+1^RRMem?Qruw_mz$`H9qP>Cu*I=-4rdK~)1|?vSheJ0p3$WT+Di5!ooNtpeuPKaqEIr$vy6|FGZBxYL!ey{MU_HqPwe@u1abDzXc-@YRo1Lu- zHcV{B*kmfM9a*UpNNPg1T2gWheq2mP(;mxM+m#Tj7`3|lBCh9^Ti1WuS=FBJDh%D$N^bh zWbAnmXU!81chhrq#O$HMOB92jk}IbLSA$e;2x`nLw#Ez^{7m(n1Y z=(`xR#4eWX?0aN#DRkAsofvHk%O!)~N^e_MOFtR>78+b~JARz~0R}ifSriy>eE-_x zG`2J1nGl0aRN?>leN*6|(}Yf9`J$x_Evxzml)Y$pV^wsrArslLw=^uyh;)w~V`D4Z znVg*+%{_LKgDfbc)g3OT7^Y9lRLgG-UYExVz9-qL(^^@j`gH4YtnqhfzV)V;<&FL9 z3cIo&bS}hMXu?=jbwc;YY(4Ncd(U`>P)z=(aQ!*W6!zdpJok}!@*g)_DE^^)q@;bM z9kbD0y{6{yTivs}e|)&Q`tXmtyU*3up6j05eQw5#bKTJc%D8E(FLri5y(T?v_0yf5 zPpwWnnYDYxialA`dsnR3nZ=xiu9t`|bdDhF&(DA?J<2v<;*IfjH2C?AA$DT!WP9Pi zkU(*Oi5frr3?r4(#fZn2{@#CHbl>#muEPHCS-rE|H=M0f|B4ez{qghawvzjPelJT> zf821gC2`@k?JS88FB?{2^o`I_lr4!f1ZTQN#Mwp0+D#cdHpInc>eLuJlb`U-X%YB3 za5r+4)9C3QNnQAbKS{L{NS>!l3_B4@&0YRcaMgwxwHvB}C*yt0<&Sht%k6z-{rXq7 z<)(ICXq~$G>Rc4N+@q_*0^&0WKCEBPLG_a8gwxqF)r z&JI2{E`C+5n8PXv%fcvE5*`~7nZ}&9yynUdo z@lto$iq!Ze2kV#2y01AoX*p@PDahyD1eaXkSw&Dy>y(fX-*7(`(KEr<4mWuD=BItD z*(l=jF`ivk5(9aR7o0*y0rs?3rO7+eu_*&?icf6yn(72OBrKi3c$>KMdSi0igGY2ecY;k{9 zQU@2P{-pNW!cCAQyobv*ppc~5mv*ZsS>v3JI0%tq*sY#WaBEAn1Zo*iB+C)jqO0N&Fb6nA0?cb()u+QAAMz>YhkpE<>aTl1$g|c_F1AncW z#yTr(T@l^aN^*JNKggA8b7rgktYBtqY}Zz37xuFy54NQ=U4PEfEB)$tN_}GOHZt=Q z7VW5NJv*08z~jBLAyR!p`3pvc2LcZ_Ak*2~&(GG=&d%4()(#JDb7zEWc+5-O%i{On zDZ7MQ&bPe$J^hNaF*u`QZd9Fd%EXNL0OK_2H+Xy*Pk;9h3|_kibyvLWgHs(9e#e4l zY4+*H_qBW>9fuw%g#Yp(^vHOe*c2zl<%jfi zcf?8U-EWPPj_B!tvLf7;)s!%FFe;d}vy6vHaa3^MI1(XL$@$>EIqk7YU3J;5S^Zfn zPt2KrZe3o^>b}npc0b;hP~7{-{K6C4r}rE4J96i2%nrzF%-$!3cZF3Xpijl*;*^rr zH4%YjYpe4*s>1W4s#j#p++00nV%gS(*_D&StCM3(V#bl)A!%F0IZZwhBGUs;?;SIe2B>rd6VTX%R}m~qvU6} zrg=j724fmIAE{sB7?&|UC>V0Kw~-`%Dv+wtMsMW35@Sbb8|9m^7Y-dpd+SmEQaMvVo-bnj*X#Ef(q1z8g03?>eT zCxmJSLhxm``h*E%>l}jRm^s#E%oz6`1y9O2J3BbIy4rF^@kQcY{Aj%}md-7*%&a4t zy2qYBAd684uhFQtaZTf#EoA(cd{*8h59s~vG}g`q-Zq78v@BIMojkYdX_5W0WtZ79 zuBMmAb_+F)W3cHuNuXLXtfuE(Xn~eSy%TY9iLw_RmM2lY6SF8VjzmCzT@amJI1h^Fqz7hCnY!EH)~1#!6osjE6TbrHI^OV7S#T-re{&(4Y?Zn zo5_PyCf;~P&D&(JjZu#F@C;36p$3F$X*Dj%+_;<2sI*;etL!)KzSTcOWOOY>Mk6C< zQe?C`Ov1Bw>>pe`{rYa)MDP>gFD-H&KszD8peSBG9z9+>+MroY;3$H9Mt=%~)BkhB zor@@HG|1EOmrB&PUuRkD6SddL(Zx-6bd>8?Cnv4wYhv#Vs9$YqL&qGJkN5Znr2Hmu zLt&`q^_;z~sA|*9NIGs?XC>7{kRG+izv$!_Q8AnV)PNe_Fk9VzC^yaMqCMz*D(^qoDl{iO}&$w~!W5+r>IoR5|noM4H zV^EW?AK?7_CT%dhSHTH>Qvf3~>iszC95)3#@`xqlKb__36YQ_DSfdd~$f)z2I_W>5 zl#ieqT152XHjOd&XtdM0r6~~2(ilmd_54!GAAWf7pSPZ2cP4_Qn9fQo|A}9SvteXA z11hko3oMQA$e#!lvZ3FD!q_;CWm*$(2DdDcV_=64AFk+E(k!1*=kc{{yEs7QZp$T+ zU>bFZDtHsqV00fbYuNH))))~hBT{Lo@38iAq7;X_e;^;zI^W6YCtD)fjht}Zayqm{ z6k*he3{jlF8taq?QRhIm24hj<>?a2Vq>LeL0_Ngh)=Mw9xU0hn(SOqy*;sdY@bz7V zaSiv?)g5Szn}6}E`*#2KcvaTghnq4Q@&n_S9ADi2_WmCtMpZwoleMxO+ zKG+h}ePwe_=EnXtDffT%c-{2VpWZ)V$GiKA`l1&fYV=?Eg^TFC%d~YG3&RB|1gd(V zP(pOfN+>NoihzMNcW?J-Pmf{-Fz6Z4z1f37i;pzK9*L^|}Gb(J({)VNyqm#YrcR82s`_Zc2)vxU@E8g|m`W^9$_sqy^O7dwsT$RwUXGU+; zJ&O}!JPtaAOn?P24<~dpXyp+z^ceNBx1T(!R)(C6;iMRLuqdt?KCG!~Mk?I&_P&y` zJuj@-0_x;_mhhs(9mQ*{R7E9wFF2r_D%vn7{+)O7gMv`d*e)kAwRkBDvAn>1w%^J1Nnbl_i?Vl> zn%u==)K}7$^kmJucV_6MvTX~qtAnHJ^D<{g(>Z62AG<^A(W*My2R29eHAh77Hy?3< zfH(Km-mVeQ&Kj3J#=QRCQ2&@7#Gi ztFZ4atMJh#zNQUKJj9czBeJO{%MQ2NxZq5*X*pCjcyIZ!N z@S+uRYnz8|aTxKBG>|$s&L+Y$wh6~Ew0iR7T0^6 z91lA=XKs37{R`_~+MMm;c*xm4V)nL~FU{49p}>j>syX(mJhOi_?F{kEi=qYP_+GY^`fIQW*l8q zy^#KMsXA$PZ0zi$>cm-bakH4ssMIm`8KQ;?d;ILy&?y zW!)(RCtb00Nc#~iqPQDHi$pYTt)-WpKbF~$kldI#hzgR0kF-?X+mJZ>=uwQDfr`$P zsOY4dL|&P8J_ZAx)wXdCpA3%uWGcn|xL>cuan8Wls$ISUuFgAL6T6(!JMxKF@pO7aYHthPmtFNzYaL{;-8~b#=qE{QW_p&xd;=|FN zhfg+w(R5Tvt{(P_cH{(QhlZw4D##7X50ye?RNK{dBo*||nYXhfx#NO7p*MNeoUWwO zprFht=_USgzENSb)AQF9m#r@-+q)of>A6lcM`5UOH%J$Fg|NeOsO;pG$h@u>mJB58 zf_nA>o2RDA?sL$>Z&*|J;^w+)+YTx z9V_nhh}xymf7ciwoi5u^Q?sL@Y)5VFjG6HrMY^bX0DJbZvtn4Z5 z^NSBm%=AkLOw9OINn>nmV@d9!goH&&0Wm&4F#(gN`uI#`Thr@fW9!ok7RAOcs+tht zoe(`f(mRP}Up(}^bQp7q#F~ut@v#eF5n={N$|7FIrC%%b)GCW{Zj7od#?pvP*87eP z8&U$(r-Y{YPe_l5$PQ#t(zK?Wnyyr=N&U9`xYma{oYus(PCuAfI5{*gWJ+exq&V-0 z$eBqwtINwamElHU?6Na$$)IXKZhJc1;kKuPrB}P{d0NSo+HfyRrr0(1udVBtF{pi% zy{Ney6yCU>YH_*5ShIOVd#^<6HifLQi3eZ&?RSm&d5ah4S9p5xbC|f}RpSCXblI%QQJ5g$P%Z%wf{7basgtknq!mrzt;W zSsKwV{nYZZbkMRwk6)=?c(eS^{GRCqwODb%J=3u`+2^7-JvP(3e!i(QFlgL3r_8LZ z$m(h*Cu4DVZ6L1Pp`h;$vrjFahJ)W|MlJc4(JE@Q5qdY(}fjEV%LSX5DnEMo8h z*4lCA=^tFb{QAbt*IUy&PR-x+qvic+D~gMjPfJ@-RJbB7c%Dz@q?CXswq99OuMnX+H~+BorDdzfjt}Cqe@h|xq@1sXuAlMRE_AZO~tI%&f1bE-XEixTkq#e z*3Q~SSJZ{Xy7ETaO~;zk9qiLvjxT9A)tWRgF{h;@V_r_UqkU-3f=u2>JHGsx=FHl| z9qjM!n}2??wrJ<|wIyq=?&8d$u$BRhB1$vkPi<&Ty>Nxqgpk$w3r z2>+9j-ZO|apmTkY4bmgx=#x4fZLBSKpx&tncSI>3520uZ(Mh^P=V3H5X_DB|u00v) z+gjA;uS;Vd{7pyAgPr4QJRW|b++n=;8{@rA8LOo3Q>mTP%eykDlg0e2_tqb5^!9OW zL)YZ(Qz~-2ynO2Gyth=W$|AcHv>L~QRuK!Ih<7e*Y$b|^BOFfc1BqOce4>`f#W6I( zpYhdHOOxRS;y7xG<=P4MI6|S)dCP2VCdy5HCf=EjCVR7u zgAqY}31eU$)LDu9v)UZA5io~|mMZm2`!!NSm?i#PrE%~%!jCz~ z-=NRC&MA#cGz&I*7X0AGC~%oa%p>0N+Y=tM*{%dNxkbvN9!{ol>Lkm+arPvpVYuJ2 z9I^vlq)db^qFg>Iw|u;vBKq67wrunv}G> z@}qusWN}}|w9z?3}glT1yslGF)e9iQN z?jp10#be>qlT&BK>_Iv5rlRVeno!6D)fZ7rMO2fc0(OkSBs(}T#3_x`FonAmRDuSE zQl%vI2VzvNh0@ZMFS3u2lA{xwaTG`Kzn5LoT z+wNK8X7oIyJIEtCr=AYan$R-IN7=t6wR?eY^|YnwYamTqm5-(0n|9+Qbv2B=Jl56D zFotRXH~e<$FUH~f!!2rrjT;;s>nRl+FS@sW-o3@ga%(E{EVO5RaYuAx`G*ZuG*dJ}xc}gd9Rj-U$AO0X#KBh>YxD%@~5^5s0%} zMS!|ECLt>&d{T5xXh}>&Zg!~uRP}GYk3VlbJT7{AOi)m4kb87>Y*1iq_8y8F&6OpV~??S_x|`T<&Vsy z-stOlhDoZ5=t;fuu3TX1<(Oc3n=@@}WYcE!2+?ryM}Y}d01HHuFkB0n{KjLCzRo78 zC-&`q%lHfR`_reH`AJn}66ji{{7L@8^d7KzL*I{a8Ecd2Z95txc;b!bKz#8Dt1$q^ zFSow<69_2Sk>c6d$6tT#G3Ly^SjoZ@yn`}>O_>XGCRN88|EhjJFu=?`sB-u6@bVi| z?-`aFn3almQ25vdrk`W%@u+^n9R{=WIEPF(S65#L2h(Ou$or>=kYX{s(MXtIbkD-r z=77M&A6Od6i$!ueBEByRD~y;rEk0yITv=FAcxZZB@Ps(yufFT+`)<%?S5i~&1B5qo27(5_FMVl4kFK_Qx*YIjR<7;X(dry6f z!fI5rcdPWexZ-YxCcWc+aVP18P>q(}_d1S0bgT4YPyAkb2L;Sd=ngO?)5XWv*ClZ5 zSQi&tAh)$eSHleQs!4v)Pl5g;To|iGuk94Een^FD>xTtQy1!uS+&S9{4(3*r=M-&b zF=^wy!c+atg)54J!g7O%kob7MjWT*92TwO=Pe;>q_n`QR!O`Q4ypN=`7u`m$qTk{l zF#ThQ;3i1N&~5ZdAg6Al^qV@sJi>oN-VOa`27dE|@d{w5+bI1e72mqRVGsx9zo`Js zX~N9AjS@_;bRW8n(r+>d1{!7~V&CJ@~q*PV8J{#eUP4|Jr@x$r;tp8saQ(~q2HpXgY0sy#KdtaV#y_n|8G?ylc# z%IFxC~L!WpYHT|g1Nlh(Dn9~ir$KSU)U#~m#9tcUGcr0KxVE(ArIYPxFNK!3 zY%iH}YIRO{xPcQ=MNUKBq;&K|^KzJ+t!MEPl1 zL4CD$KaQVaLRZlxxwrb&0}{&0a4J8%`{B3oP4t(7rS_nKdiL8PmG8b$o;Xpz%17~c zUN4}jc~M(K`$mM4%}kj%cn|xmB-?pvLs`YT{Qgxx+eS~l`NWsTsQZr{+s3YD%`Fa^ zIx`XRmX3k;xVm-u(e-m?g{!benZZdthnq@!A$_Q&hw>jUdpKDu7DaP5ZfQXD+>!bg z;dQ<4gC9Hrtc|U*tdv)(o7%4|uQ~S9bMe{wVs|7AZIVEpbaa~nR10DP@?)Eo&o8PKS$+_52v1x8}NO@bqjK%S6v5P$J zakbkUhGi}`--?$HR3)bFii=KaxVJKQ)$FMOnLK);J}*;kiwU<<7zxGj{Wk4d@Hdt- z*QA*iTqas}e!jke?(POUV4`Ai3gdX; z2Mf*@{1=ncI|Yiv#r21d=CmV)+v?_SD?FTCS(%+&bBpIu!Dt*DO;9mf9sCoDq}gVm2bglV~Qir0C@U>_%=568HqUDqkI!O zzM*Y_xAl&b&%6~46C$dPY>u{GgNojAM0#svumxrUghs#MB^#9^Ha7YVFdLgedu#Ec z9*tp!t?Hvnrg8?cLANUg_X!i??L*>`T;b6jsHQwp+PoDj-DjohX!=DWKm4^q2Q6I% zoyzwU`+fb1j`Wa<&Z5-D(y-vtmYnIU%Yv45y}G*fiFKI;n=UM9dwzRiO7s2CHfA5n z44T^dO2PgQ_cEJB_b;9rS+{d;-lj##F_rb<(!JLf6z{mYq6cNod3!$G|INNnE_SE- zPT1#4cS(nyS4-GZ+~akD{XYhtG(yA_#3Ns4L_t^l#R2NR73b&Zl;?<-5RZS;{y?SK z?v%+CTAfCgfqG7X5>rf4Q#e^1E3xm~ls)*o)R8_AR^DsUOqul#@`aBQZ_T_(H{#*LofxkN$Uqjz_ zU*5OxJJ^hV9A85X{yuaGvuwia7$yU{QV)LyJpWApX0=KRKBK$}fB#;c#pd915PfDQ z!$WVPGU0i&?l2f~tZz_``VJ(2H#%D0fP~AaNs7TPb#n>|8WRv;>w_nW6l`Kw+l~B! zG#81H@0nqvcxs6P$_u)}vxHQ;S{tEpPgA>D&KHH{T)ldA<}>&7zEYu{SoY=knvCjs zL8b8%Y7%qmr%qctr>1-1skYR{=em;GrLQec($~`dmlw2@s+BD)*fY3nHIKPyVAbUXOF!=0M_V zmz-?QaB#?IKGD?l(8~0Ipwf=g{3V4E_RdA?=azJo1R;3zU;iZqyRUld@brQmFRiXy zgO~6s4!(V#d(HmVY0*_n62H#hGCjR@AH)fLzdlD^vmCk{y&wI&yzpu*dYp)NYw5M1 zD6(Z>_9p(@2#J)L(_UzN%DDQKmFu2glR9n9<#n50T8>|2wUyMa%=7chUD=yBKW)MJ z#u-OmT^_pp)gv>OJky@jnw8VBvmmr!cY6+TY%Vwk0oOS;#$Qo9?7=fL9_Jv2fnS^+ zsAIgMpjGkS5)1{Y@xmCC3kJ>%-ppBhZei2$7KB1GT23sPe{OBgKyZ0y$?%hcE6aob z%NTjXAi212c;{e6!96dn!P|j53Z@@=YqxvVzSgvI?d`y6tp_-jYAtTLmm}H`g34p` z8;cIK?@YlfOO7Tq5JXU)(@85!_fQ;hNDSK~E_Xq7!>RPOPjyuMxFRY2!SaTUX%@Gv zHRl#%3VhK|EIFUKtd$u$N^=UfUv2N~-(FC#t-qrK|LqU0TwPk)T@?~qxvH$RyE0UA z@x(ey>?FtGJwx7g z?7!JFE@IEH{DIT5P5MggnZ6ZS4i4GNAHbgJN*kC`(IxhbTj83yB`YhY$jvwYVf?k_ zb8MPJ({r~z-`OrU%{$mM2gIgPE&1z9L9md41&U!bokC6($4=gFPutsBF6ha17i*sH zb!KpG=IRSgO(&P5{4aC)sU;2PS7(9Fl_hoSi{0#_ikIb=c2!JK7Z~l{yAdQmjkUdh zgsk3AvWhmBRlRuUa%pu;`ilFkyZc+v8A`j`b?jJ!7xvO*gFQHjy(BK9@>j~No5>v} z%N;6b5XkOa``X?TdVsp?(bl-ZUrBATo%8AW>FBxJYU}nyCxt)aR&n1OtHL+`cvoT3 z_Djuck9rqvnI607Kz(@C-j-JWc$u!FEd&L-eLEQ zKKSp3lQSMz@&3{L%LPe^d-7=WOi69)qs^o3|AlbU`~CA;OX}7XxYrv{6PrzPlh=}(p=g^RMnU09!6=+p_$Y0lAhL1R*C zGZX#P6|OPakq%*Fm`jF|awEw2Yp3|?y5Pamu-srtK5aG!9iAJ;p=zn?WsE7#@3b+ zn zH^vExQ_0g)D6!+IdbHPSoe>`%e{pSDR&P5`A!Q+J_ZxnyL!YT72kKD=Gne`ZPUL$`C zVH3AASK@RG9)9hU?!IXC2fO>H{zde6`1DgWcUbb~e)9D{KOR?4{T+^@xkLNe8JP8g zp*PUm0j>kjN201FDhll#A|lWrhZF~VyK(S@Hc;<OPEPCgO{*jIg@60gVhvMcV`$qdQ+G}?316&Lw>4BK*C$L1l5e1)w`(Z9f zxB5-;_lF*}8TRy}>4{l?OJs`rc2vwL2>C%vJoJ(Gr zW5)Y=*g+i|I$b1K-8l5Zrt-FT;D9UI2k=N0e&)rgU<~cghW@=}EVJv(BK{=F)CKvt zzZJpqExjTsJD4xGy|||=Y~JA(ORDr{?UJaJre5jSBr!6&+!>UkSR3^Fod2~*gf+nK=R3R{ymHq&3O;rOSIYFCQRojL3*?h8=M($R0zuum{7&JDzO zjpNOU@HVl!3~$G=D3%C+Q+7r?bD`DWycMdOy)2*{<$nX!XLw6Mxd2z|Xj}pMi02Kj zu~Zt-7r0tRL76N%RI}lG;2Lv9kpqBi4S~fC(|^AXcaBxEY!2rIxPn{Y4q4%(EH;}> zqE{&u*$17Jt`0r>G>44?Pu8e8%CF$j2f<4so~&gb4E=+A@F_+-*l4BjQLKHwIU7*8 z6-%5m+`!28&nsg`&h}O)yk#Juh`%~dZiVVI91>8t%EnfY9??c0u^F7Bxf+!~fc0AE zI`lE<*~}?J5{oDsy1=n96F97o`3zLv^bt_qh6DknjDQ;Y3{V9lp!$X~1r*BnbjpT4 z1k`*pQ6@%g9%|>5Nz4miI%NjT(`aj^wa3l@W#_>&T6;FmG-B;nT^f4Ap1&TtGzuYR+g}f%=H&4S!~7G-3*%W{-k04e3zL z%on)ET#=`hZZ;sGJW$H~gHU}~bK0NG z3%K;P+^VqpLq-Yrg$*%!WU+8lw^KD*<+nz_>mTwAbly=kO}2J%`H&T<0xt^;S433(DlL&{Do2u*aFUvwJcN92eu>h0Yz56Rh`Os+-jasPG?#0@v_} z2UaNxFgn*bCL?h~jMr>HfCn>`SQfx^jtqec)@W1#C*p~@yH&zPx}eX>-6Pn)i6daT zeK-th0xQhWyMPJTVa$fUp}-*yMVjDI4b#v7pmMqHq|wZHZKM`ZL=|a*%R2w@SNZpR z&0*Ff2y}iSU-M7^J!Yknig}KQUIgNp@1Q-3!0j#4;GiNEP&};H|&Hoe8i;zgkMy&>Si+plprSYzNbyzzbnJv>V=J8Q{F^j8(i~rHt=LWbTDF zF&WyI+tBKeE_vSy!!em1bxdar-*PP{)A?6zg%X&4sbMl3&QK3f!j%bJ8#xqw1V6Q$ zSO1Bsr2s=%raL+Z${v0u10F{}wIWJWjS?AXFsLYbOMV;=jKfwVuT3vwV)XVWufWxe zEOLvb*VOT^OJSSXeX5_lNs9OwFHxK+4lAD;5au7}iFsw={!?6S);aktoR`qN2)5pGwOsxT(Ged*3T~-}WqOWJNpZ?4cIpg{vF}Ws zHq8s|?Tt=mqLtz|AXYqWNGUFE!|L4);PSyBM5FtlAiNjD6FuBKOhEZFrFDY1!*<6R z$@Z~LCHc!z`=@oBUbO1+5?9yjj?NRMYZV)6!XxW*Yqyrk73TCMduOdUKR0*tr8X9R z+jRS-h|KwM(d7{-o%M;8XV$7V3G2e0C$>n&tzJ`0rpDI96fTXK9okf%HsiiIGq=x< zv^;t{JIM0Fj{lW;CRSKjUft|Xyix&eHN}~+@!veMNtE7cKO z_MJK}6z)aV%uCDfRsg9i#$qh$+nuy`KPz}9R7u+GD zJ1SRAI)FD=zg?M?cUx}(na^iwgVXy2Wmd=;Nk6S9 zq<$~Yz?NvyzhW)sJh3rxDa3QQ#X0@GP`USNtJsV4-cA7MulCbQuzi{bJr z7Xp{AC)QcMRX*Z&6vxGW&VH>PHaK2l@kYk34jtpxDL5)-c)t}J+7pK%7K}M&0NXJu z_1H;98}7x0Yux&p^IdiVPgh%^xJ|}2N~s1lo@D`-gjcFrkOnpMJS@C*X5v=_hdMu$ z%JONyjRlldgAGwZ*?i+?fD(4#`JsuBZ4K%XE3Tgcs+H>3Y99(f)E2nNhW#HauFnCL%{7m}b!MoU&ym>z%Bp#W-Un2$@k1TgnV||{#S+v3 zDA76R3Al3_%GIWA8fT!<7@amM!@gO**E<6@sRy(=6GO@ z6zjx#SUk~(c0sJ!$`8~XNGV)msak;S_1K|b5|le!LjBZwRGnco^0A``3UYQx{TJj+ z_C)E-)uBII#<8PB8+jZZR;Wi+lj6&1;m9>#!4IT{X+Q>(dJ?X71mL~E$K;qVshF5HG`j+Y>bS*SfKaPu)Kyfqi>(J(!zhAY`H8nA0i)#fESUEZuWQK$J@beSZKw%m%Jj*W%7!^ zJyr+oppar?x^)h|7;QOUI4)55o|vyI*9|&f_Npdy3@8U*I5xn=3jPJ&#uv~=Q2{~Q zY33$qbG99pC)&sG*hniAHfZJboU%!vO!xk-ar%uK45Vpjp3#Hop9rWF>nwTX#`Gzm zV)-oF4W9`;L-CzMRw&_Vk+lKihpdez?l+L{r&}8$2IQ<`I>S6fbcdXttkIZAcM~SE zHVz7|#sHVDy#=l$9oGfcqD6Nopp-|*c!WXFMSKw0E*Sm{Y~&r0!^t-mha(Ajo3D5R zAGMop;aru*kBkC=tEjG{eFe{t@0TlrHqTpDTzQ+XILC#RTr5^Hc-$xnuU5^{F}~s) z8~FbZ^-%hzY!~rmu5a! zR`OU~qBhIDd<9qvd@?;&2P;&5fzJ|oN-oz`e3mQ$J!*7M{#IOlhNA)(`HFh1ZZs}f z>~o1KmJCd~#a@Lx?Mkkj1yyU=Y3xBjrQkGmwaUeO#C2ZaaUDU`T2?ROItd{o;yP89 znZ;FT{rqd0Bw zPR=rlG)j~)SZ0N?Mb_hU(>iDg~x9w27>s_ToF^kEcKX@h?G*yLSSk){D{Xa*f?O)V-^Av=^lZpeID066ubGv zO1q$mVip3^8N-)CYx-ah=`jm|iF6NPBHgn~aLXUK4vxk}Sqy>e94mpHKv6S)AREqN zaBL=uZVGJYpnF7gGZ^T05IU}V1Z}jx1-=U`2ec7B58#`r!vUXS&!tS@yI?@>fa@{} zK=Gg#Tx$t+I_s3j;5|`>7*+7&1DU4`3tCSIkF2nzC3yz2LkFI?Oyjv{~d%E7Pq z9{Otc%QZ(E8jsf8bDNi8e0nhWpl{(@I9 z6+}{E(5u5`a+p&mb1`-L`uQo!I27Sg1Vw~{iB@UAl3O%C(PQpOb>KF# zp^wr~eDDQ7AyT<)^s(2u1SwvqD`~aL1~isYoDfiSkf1&`nM)9d(&K-KS&xeB_ZjZz z`;AdMQ+|L#Ij;E;mE};AIQ1M#w`e#n)Atxt;5y5e3tXNy!#)Mcy!;%Gun1hZ zY9nwF)MYEb1YIQi0@rE0otlVQ%5H$t&o+UJPI1Bo{hCFb;4r#1^EC{+S=?*dB50T1 z=GME3LfQKIR1E#q)MH}6v_pehhbk4|nh$-x*Gkpup>sw*^JYNdjs@q#XKXV;#d(a1 zF0WPM_X8y;(>IN(wrt!c6?*bI%8h(yS15iYnNnK)Yh3aMv0*dtb4?sb#R&Y$Nx5LE<4??zuk;sA)tKAB{3NWqb`JI6#c)RPkHgvrSk}_Akz> z^M*sHQvqF+%e7(?J)roQ|Kw1)hA;S-JeNDyx;uEMs1Z=zXe~zcOJsWgAXJ~>9sxzx z{w+{Mml*N9VJoLgA}Bni6vickkH}{!pqdR=_$*~ym0+yP%6sve6Q>N?fbWub`Iy{Z zuyXb`sBSi2Kv6FD2cRGk%lI1k+y=wx65}-^$3Ze7u?YYh=Ds6#uP>+T3r-iszzLh| z2%k+Kk?#}ZJK8>h57in(FqEI+rWHjQ1kB&{y>tc{EslwHZ$0N1&Q&+WPlxY<2T(sO zkyWvgdz>8cf`@qTjn|}!t|E7nOS!{6aqruX)!VL)tGn)Wn)3Vp#1Y!lT*EetHK5#q zE*+zxum)Xx4I*xXG5%i1be2^LOcW7pw@Q$}Dl7OLA5-?|43D4-Lx_}|Nh4wnj#&e3eI@v5% zlKS_SkLnB)h}tseaTBcFVQ|08h!=~st4};HcGofR(T!sym)n+dxb{-(C3EwM+m=%2 z2V zpw2SVc+g~VBiFmZ@-KRQ$O;8-3s+#szl-Bhm*ZAng;snvr%UBy_8}8X*kr1Lvg*a5 z`=MoLa_vbv9hERc#7`gkfVg-MMqfm-uAMDszvnY2RWBq4FnBpiapW@^6f*+7 z9Ex(r))_&rBznIJm~PUU3jVmM58>OecjVpt+c)rSlcC#&zRTl*x}MstDpb{r?-C4s z*Gav=Jz1P3SBDOd;4R?x3phT;M)^UEL6Wb3pGV!AEb-#^+FFP+s+;W>lvC7g#OPQt zK02T{))w`w{%!1C?SwmRxG28ujBo35C{7YuBXN?n8@BT`;wPML;R+~{L;-ci@C?Vr zPbFOv#W_eL3aAU<7ioW4ipHrlY}detRDw89zvC;?&4L9sG^ZS%*Dh62g7~}u#pg9f zy?}X%Z__CBZRn(Yz9ND6u0AXNY9JrsTWy4NF?WjV=-PBN6trm}mx~O*=zB0%Eu;4b zF&Z#&TMKbBP$9lcRL}^ZqMm=3q9FQrLo7qXmMfPFSM|(rgyVo!F1UqD`hqN6!d1y< zyRc`t{T)iL{G?ag{Kd&;T0x0H$EeQ?SkPiToNSa4LBY!j>EP$ zbmK`)d(TQ98lMuCQcA!*IKZ>>XIj@G!Vv0H9&32sv)~ z4}3gigSZV~wTj@A>ieBTah)fiC{xcz^aPZy^#m@GDT0DbRdX0O!05Zgk5|B?!>gkT z7?a_mj1I+@WJhEQt`4PZ(mAvTyY(?s2BKUc4Mf#N_2<|Lj+Yc?Gr?t8;Z6Wst0@a` z;RwAF96H3nuX_-2%9sKEi*0Tq?Uim$T8ql!ia%^$}W<3Ji$)RthCrn-7!K<9- zPYs2jlFk{O2PEVo89Pe|(H zFW`+f{Bt<}dm&4I{yqg)c%gV3I2}gBtG+qKcd7Y%0r01 zE7$qov9<6SP&^)Lq8KJV^B4>Ndm8_og1`Taf1XC4@45<>j$HO|?K z@KV30oso_6=ftq}^11xEi84zsYfxcB{{obh88!mypt?~0n&Y~zLFIE?5=HYbT5-LK zwJbGJoR!uR@*Mgnl8doe#^Y9)1GsDaPrlyoaG0kiSywv=s(W~~Pl7v{{9ZGBQ*e*x zO%QE2QQicfPpS*#FZtggkNE75y?c#+p2I(Dcdc+04d)fO`zO%0f@kl9oHY+^f}D}& z4A!NL@NoJ#oS?6Hjev^NsUs*(4d(}_p*_iR1V1Q=qw`kGmH0up2r6KRb7TUL=^R;y z2#~jFBA|XIpro3iKkyZpFe0`?(w@(2q#@nF^#j%0)P+Tf=253-I9xY7Aaceg*qI`7 zfcO&6BA_!-GR0%XtG>3Kc-I&0NknrS#AH#=_J4d2QhsJa**!~IFK)=r?|r`gp-2! zrlR!b+{lv51h0S@E%(&UKG2ky)P8<>NlU(e(o%5#mR)<-y9QZr`6qn0MRNH7pQz3c z@{!L!gTF?jIR>kM-4FhDvkj=Mz;2;Pm2R<=VNboos|0?>q0Sion{!v9c7N6?f&Zwf z_!@Cqutt~@Ut9heYm3iBn}Lt=TQLfY7HbHx86E}Sx=lZg)Ni-yC&J43D9`Joj?`DY zwv6kuVE*^1kR-|vS@qW~`pUrTDfy1fM#-74HdW$e8h?THc!1M-dZcY<3w2iOt#F1qwZtZq*IdAoz^d0P2lzOv)e_qnUcCkDl74SQ z76}}#R4bGg?CBuGZwRl{!cL;54)$6$bdnx7oCR<7Nu>->UujS|LstkYBg9~6q`?W- z8sHjnQw_L2;kZ~BBv8jyuU3q_zXGV~8kFq_D85rT->{#7J4&68>LKb&^CJIz%ejqA ztHGSW&$wT6{+wa|;wx+PgNLM_q~Nwm;b+K~8q|EgW*kaCLvF?Ghcz7k|HXX(%!A{) zi<^YvcENwVVKfBIYo5jVB-WGX|C9>wX$R+%*cbobR6x%|U%auVUqc3Gaqcx65LM$g zZ#I+wFL2^=uo$+{XG$e#IK^q;pN(%qdhi*t+l=~Z#=n<2%2pO@NcZ-{8a6W(YdGeL z$ryn}?A2vltRhv;iB)W7jaWr1oUr-1Y@?o_#SEWW$Tr5kf(lsLGd0lO!+!cK(EFtE zb-X0l%U53|*+thrW zgiYOUc#^M^qYu33EO)->rU9rh6{#=ps~582iy@I?kFVfE$+enLF1aO zhA#>>%rJFJVI#iCJ<#v@Oq&~NugFpaj^tq(f=rrXIJQqTY#(UYrlKBW7+W87;U+#SYOahLci{vAFWvMr z>-5b;wSrlvkB4#uhe^t9Jal}`z?ZM%G*g6X~c?Am7rKLezS4p zicQp4Ouy?mV#Rp;PX zyve$w>md`kX#_Z7bw7zb0%-_rMJ^3iIAPI{whhsCem|RzJ*`kia|5xMcD}ITY3B=U zk1M@==d%bTEcBf(a7F64&Kvgg+at<&;L@Z0;sy}yLBfTc%xtRUWGTSJ)iI4nm_9MT zYsCZ|vKKeM$OCi(DlYm~x|FB54&k_@s+puiBo+;1x(*qFT?20hSv)aEX}aM)R>-lD zMp|N(qoJ$NA>VQxBCt)L`~TE-^}$h9SNy&E?%NGX2;oDL7*hxV5)2q1fP}9^Tq1!Y zNElE+B1)(=QV~V`Xc48Uv*v%2KML)Pmey)3BTfqxt=fts)oEu$ zG5fZ^bIyBr-v(6v>0~A|Z};r)o_pW9=iYPg`Jma1tSUfDOF8j&cl4A&YeYOkyU> zK3^Q=>k#CcU|m2`Qt*sRC9S$rcR#yWo9q+*8zy-PtTfUgD@2Eo)D%3UCX3WOyEq-E zF2AGQ{ijiqpD`joPBY7o?Lwg%`S}^XI~KVYRKNp2ac2&yJ>snsl{umA@Z$E}EZc&W zaBf7t9>{Ezx!xGa?DtT)F&&))P3K(Bb3pZ)0~@ePX0tU0D~;zRc22;9GgZbMSnjp! zi9A>2ycm_tAcI7Qi`lN!mY`jUB6tD45WKkC-)^AwZw%;6G#T#;d6g8jIA$KNl4H=S znJa`foyU!_AmQ)XS~m zP$cg;p!+`b*D;)E1m5!ry-?;lxz>f+74Lc4=?Y$4pWvM?jIYp6SA6kl)UM)IV`#){dq z1Z&urVa2RNf>m)Q7U^W+EADF6$*L4sMkh00k()pze8u%cI2VIlmji28AlJedt?7cb z*ZLc;>FOdF{bo&vk|A>$#0LetOJm9s#=Rp}HNJi=uY1uNCj7_08>E`PP z4PObpMI>kxS`58OWR%y>vt&%?aZTvwaSj5>$Ysg6Ex;QL|Dz{ZGOiXnw5lyd>xocRqHjnFVYHce_gPg2<~6d;*@O53VcFR$GpXQ6|L{^{o_6=3;y(=s)23_ zs3Zmw8a!eF3%(ifBw<=d3b#-Qi+_Lzp0_&&IUD- zGS6kUjGFdA%hE7XQ~rM%R*1F1?KFB!b*~54A@61shi__~HYMOFNo1*?a8Qp;&fIUm zO3Ci_P~EoV55c@jCFu?gr|v!m$_fD^f;`zl8M!cqGuzAUwvSU1ySAPtJ;j>_49o%8 zY2(3%R4(gpAR7j(^A4+`dXX+dv1-2+7pKU6*(@VJT%%X3}#X*w3 zo>1X8{VI*WP94I(QMmRA+81`{N3acHDZHP`}s@Ayu1H2{w<_- z6tdp12MyjPs*~0s5r=E*Y$q?T56-F-jT~j*D5x1gy=Vx^wf9jpUL)-l_KLT?3HF=s zcx_(WJNBF21m_;^(Cf&vT9qtSHOZINarKvEE033d;~Q=(Tl;NEg2i-Omkb<;G(csJ zQzngNpZxf*-X43+;nSzzL=ZZXOp`o%x7j~PevmwB|HQjN4OfFvr zQ|bfDKMTV;`=Lr8-f0z=bhMJUc}Eu{M?4;aZ`ANZ)4iNhJB(@R^U*sn^)!VJ+Zof{dL< zdA~h{&imXGpu-6Y`0vyMv3ntlhYlkjqz*7$6UMPVS_wK>#7sAt&ktQb;NYW8rW-}J zKjSs{bc*p5A8{DG6m*mQ0pp2{+Ns`UJjJH0!Sm>j;WZaGk-o*-sSd&ii7a+9c^@_W ziFHIfg=)Xu?TGf^6J=0TfI(y)?in{(g*U>2=#%^byo7bSRi!%xsHGSV{Ag;v2WE# z@HHic29MtbA<1RNx^pj%_}>5Gu^rkL+{h zegZUj!7$a2Ao0+_V=+;U-A!YREE1rSd7sc-!doe4kuZJO>?J6R1n6YmC!D!hHDL`= z#ow-!vBIvvnOHc>LP8&eWWc(bUP*N3OsvV<5zdSN8Mu$~ce4#7C(1ol&(M$hCKUgoR1^QjsxTIexM4tzbWf`lVPDaUo z#eM{%x7T`F?AJo9O~!sL#oF|o`wvhp1r;o@lG9`#5KVez!U!jMgas*~-<1*W%bLfG za4BXi`ML@gX|j1|rJD}vJjr-dnX`oZ5!2}+xT=I^MJ1&35aTWL=^D>Uw*>r=M|BVz z9cWO0)*Ak<0)J049`+zC#n&-jFFFq%hwcLRFnOHUgU-I$%Yp>RnWAM4Fj71SBYd-4 zjL0-tSGafiRqBX$6e|5;j43KKYDBC}TdYUgo_wBin0`OF*-G?w z(*Hb*>5n5?qM7MW!Kfqra;8VRFMTC{e$vO^&7Ysb=Ud$K(Js>YGkvXh1e`OUh?t2G zpLo6>{Gom?o}pR&=g0XOacOvlz3seB&lDpd-*Y5U#D?vQXKZrqEBJfZJpT6-BNd?- zDS38p=jYFYXaDz~@-xQ+A$BH0@j(9XQJf9WpYSvGD;(1jp|}@(pND>3z~7(b=W;xc z=V#n+U|gTViheG!y^Qk((H4As!1IPi=W(J^7!AybaP)E}RV&40DA?|-nW>tPm&HavO!rNxOAWIO&h`9@7e_zCa>G zqRu7}F>a+#(5eng2uG>XxT?AQeQ$}$kPz5=v*9cDhTaX%W>_;(OLpK8Td5f{&a0g; zp>FJjIqUl6oVO9dp|4cF@+ZWGk`*Y9=2Uiq>xggxvGdCO@(^_BQ6AKXu-)N-8i#sR`bklN4wSNTPx>Js$Nhve9n})8y1ZjG4GB>B<>$QtE%x^wM#ZNRt%kY z@5-4AhL3Knsh&Ho(7TLl0cu1kw&~At+aekJ`@wSVOL-qrKcZhA%7{?lYyPWx672z#7RY~)LN8S9bl{!Eq}0S<*mE(`XhG3>2F(T3PtS9p8S zEA%lV5-n!8^X$`w*h`!9z1~#oF*c+L^x!k7SUY~ioZMiyfQoLOhk%TyGV4dyqO5zM zjj_ta;8*lD@Zx7E#wslKYlr=9?yr*vUx1pRCw^{VP>J>0;rqm@so>I#%46P@8F&31 z$R$Pf0(HI$)eF2DG2h{v{tjqs8etoTMH= 3600: - hours += 1 - time_stamp_in_seconds -= 3600 - # get minutes - mins = 0 - while time_stamp_in_seconds >= 60: - mins += 1 - time_stamp_in_seconds -= 60 - time_hours = f"{int(hours):02d}" - time_mins = f"{int(mins):02d}" - time_secs = f"{time_stamp_in_seconds:05.02f}" - fi_time_stamp = time_hours + ":" + time_mins + ":" + time_secs - - return fi_time_stamp - - -def get_timestamp_for_uniform_frame_extraction(num_frames, frame_id, duration): - """ - function: get the timestamp of a frame, 在均匀抽帧时用。 - - num_frames: 总帧数 - frameid_list: 被抽帧的帧的索引 - duration: 视频的总时长 - return: timestamp; xx:xx:xx (str) - """ - time_stamp = duration * 1.0 * frame_id / num_frames - - return time_stamp - - -def render_frame_timestamp(frame, timestamp, font_rate=0.1): - """ - 函数功能, 给frame, 按照顺序将 index 渲染上去 - 逻辑思路: 把index渲染到图片的左上方 - - frame: 帧,PIL.Image object - timestamp: 时间戳,单位是秒 - font_rate: 字体大小占 min(wi, hei)的比率 - """ - - time_stamp = "time: " + timestamp_converting(timestamp) - new_frame = render_single_image_with_timestamp(frame, time_stamp, font_rate) - - return new_frame diff --git a/fastdeploy/input/mm_processor/utils/video_utils.py b/fastdeploy/input/mm_processor/utils/video_utils.py deleted file mode 100644 index a4769ca8ec..0000000000 --- a/fastdeploy/input/mm_processor/utils/video_utils.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -import io -import os -from tempfile import NamedTemporaryFile as ntf - -import decord - -try: - # moviepy 1.0 - import moviepy.editor as mp -except: - # moviepy 2.0 - import moviepy as mp - - -def is_gif(data: bytes) -> bool: - """ - check if a bytes is a gif based on the magic head - """ - return data[:6] in (b"GIF87a", b"GIF89a") - - -class VideoReaderWrapper(decord.VideoReader): - """ - Solving memory leak bug - - https://github.com/dmlc/decord/issues/208 - """ - - def __init__(self, video_path, *args, **kwargs): - with ntf(delete=True, suffix=".gif") as gif_file: - gif_input = None - self.original_file = None - if isinstance(video_path, str): - self.original_file = video_path - if video_path.lower().endswith(".gif"): - gif_input = video_path - elif isinstance(video_path, bytes): - if is_gif(video_path): - gif_file.write(video_path) - gif_input = gif_file.name - elif isinstance(video_path, io.BytesIO): - video_path.seek(0) - tmp_bytes = video_path.read() - video_path.seek(0) - if is_gif(tmp_bytes): - gif_file.write(tmp_bytes) - gif_input = gif_file.name - - if gif_input is not None: - clip = mp.VideoFileClip(gif_input) - mp4_file = ntf(delete=False, suffix=".mp4") - clip.write_videofile(mp4_file.name, verbose=False, logger=None) - clip.close() - video_path = mp4_file.name - self.original_file = video_path - - super().__init__(video_path, *args, **kwargs) - self.seek(0) - - def __getitem__(self, key): - frames = super().__getitem__(key) - self.seek(0) - return frames - - def __del__(self): - if self.original_file and os.path.exists(self.original_file): - os.remove(self.original_file) From 00d2747731563d1fee99416c73c39fb7619825a3 Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Thu, 14 Aug 2025 14:07:43 +0800 Subject: [PATCH 03/16] add qwen25-vl processor --- fastdeploy/input/ernie_vl_processor.py | 26 +-- .../input/mm_processor/image_preprocessor.py | 155 +++++------------- fastdeploy/input/mm_processor/process.py | 77 +++------ 3 files changed, 70 insertions(+), 188 deletions(-) diff --git a/fastdeploy/input/ernie_vl_processor.py b/fastdeploy/input/ernie_vl_processor.py index d7ce66e932..b455cbe057 100644 --- a/fastdeploy/input/ernie_vl_processor.py +++ b/fastdeploy/input/ernie_vl_processor.py @@ -250,20 +250,20 @@ def append_generated_tokens(self, multimodal_inputs, generated_token_ids): def pack_outputs(self, outs): # Stack or nullify image-related fields - # if not outs["images"]: - # outs["images"] = None - # outs["grid_thw"] = None - # outs["image_type_ids"] = None - # else: - # outs["images"] = np.vstack(outs["images"]) - # outs["grid_thw"] = np.vstack(outs["grid_thw"]) - # outs["image_type_ids"] = np.array(outs["image_type_ids"]) + if not outs["images"]: + outs["images"] = None + outs["grid_thw"] = None + outs["image_type_ids"] = None + else: + outs["images"] = np.vstack(outs["images"]) + outs["grid_thw"] = np.vstack(outs["grid_thw"]) + outs["image_type_ids"] = np.array(outs["image_type_ids"]) - outs["images"] = np.array(outs["images"]) - outs["grid_thw"] = np.array(outs["grid_thw"]) - # outs["pixel_values_videos"] = np.array(outs["pixel_values_videos"]) - # outs["video_grid_thw"] = np.array(outs["video_grid_thw"]) - outs["image_type_ids"] = np.array(outs["image_type_ids"]) + # outs["images"] = np.array(outs["images"]) + # outs["grid_thw"] = np.array(outs["grid_thw"]) + # # outs["pixel_values_videos"] = np.array(outs["pixel_values_videos"]) + # # outs["video_grid_thw"] = np.array(outs["video_grid_thw"]) + # outs["image_type_ids"] = np.array(outs["image_type_ids"]) outs["image_patch_id"] = self.image_patch_id # Convert lists to arrays diff --git a/fastdeploy/input/mm_processor/image_preprocessor.py b/fastdeploy/input/mm_processor/image_preprocessor.py index 6508755926..c3a02d24dd 100644 --- a/fastdeploy/input/mm_processor/image_preprocessor.py +++ b/fastdeploy/input/mm_processor/image_preprocessor.py @@ -25,7 +25,6 @@ from paddleformers.transformers.feature_extraction_utils import BatchFeature from paddleformers.transformers.image_processing_utils import BaseImageProcessor from paddleformers.transformers.image_transforms import ( - convert_to_rgb, normalize, rescale, resize, @@ -148,47 +147,6 @@ def is_scaled_image(image: np.ndarray) -> bool: return np.min(image) >= 0 and np.max(image) <= 1 -def make_batched_images(images) -> List[List[ImageInput]]: - """ - Accepts images in list or nested list format, and makes a list of images for preprocessing. - - Args: - images (`Union[List[List[ImageInput]], List[ImageInput], ImageInput]`): - The input image. - - Returns: - list: A list of images. - """ - if isinstance(images, (list, tuple)) and isinstance(images[0], (list, tuple)) and is_valid_image(images[0][0]): - return [img for img_list in images for img in img_list] - - elif isinstance(images, (list, tuple)) and is_valid_image(images[0]): - return images - - elif is_valid_image(images): - return [images] - - raise ValueError(f"Could not make batched images from {images}") - - -# Copied from transformers.models.llava_next_video.image_processing_llava_next_video.make_batched_videos -def make_batched_videos(videos) -> List[VideoInput]: - """dummy""" - if isinstance(videos, (list, tuple)) and isinstance(videos[0], (list, tuple)) and is_valid_image(videos[0][0]): - return videos - - elif isinstance(videos, (list, tuple)) and is_valid_image(videos[0]): - if isinstance(videos[0], Image.Image): - return [videos] - elif len(videos[0].shape) == 4: - return [list(video) for video in videos] - - elif is_valid_image(videos) and len(videos.shape) == 4: - return [list(videos)] - - raise ValueError(f"Could not make batched video from {videos}") - - class ImageProcessor(BaseImageProcessor): r""" Constructs a adaptive image processor that dynamically resizes images based on the original images. @@ -207,8 +165,6 @@ class ImageProcessor(BaseImageProcessor): image_std (`float` or `List[float]`, *optional*, defaults to `[0.26862954, 0.26130258, 0.27577711]`): Standard deviation to use if normalizing the image. This is a float or list of floats for each channel in the image. - do_convert_rgb (`bool`, *optional*, defaults to `True`): - Whether to convert the image to RGB. min_pixels (`int`, *optional*, defaults to `56 * 56`): The min pixels of the image to resize the image. max_pixels (`int`, *optional*, defaults to `28 * 28 * 1280`): @@ -236,7 +192,6 @@ def __init__( do_normalize: bool = True, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = True, min_pixels: int = 56 * 56, max_pixels: int = 28 * 28 * 1280, patch_size: int = 14, @@ -258,7 +213,6 @@ def __init__( self.temporal_patch_size = temporal_patch_size self.merge_size = merge_size self.size = {"min_pixels": min_pixels, "max_pixels": max_pixels} - self.do_convert_rgb = do_convert_rgb def set_pixels(self, min_pixels=None, max_pixels=None, msg=""): """设定pixels""" @@ -298,7 +252,6 @@ def _preprocess( do_normalize: bool = True, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = False, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, ): @@ -325,8 +278,6 @@ def _preprocess( image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`): Standard deviation to use if normalizing the image. Can be a float or a list of floats corresponding to the number of channels in the image. - do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`): - Whether to convert the image to RGB. data_format (`ChannelDimension`, *optional*, defaults to `ChannelDimension.FIRST`): The channel dimension format for the output image. Can be one of: - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. @@ -341,9 +292,6 @@ def _preprocess( """ images = make_list_of_images(images) - if do_convert_rgb: - images = [convert_to_rgb(image) for image in images] - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] @@ -439,12 +387,12 @@ def _preprocess( ] ) - return flatten_patches, (grid_t, grid_h, grid_w) + return flatten_patches, np.array([grid_t, grid_h, grid_w]) def preprocess( self, - images: ImageInput = None, - videos: VideoInput = None, + image: ImageInput = None, + video: VideoInput = None, size: Optional[Union[int, List[int]]] = None, resample: PILImageResampling = None, do_rescale: bool = True, @@ -452,7 +400,6 @@ def preprocess( do_normalize: bool = True, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, - do_convert_rgb: bool = False, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, input_data_format: Optional[Union[str, ChannelDimension]] = None, @@ -482,8 +429,6 @@ def preprocess( image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`): Image standard deviation to use for normalization. Only has an effect if `do_normalize` is set to `True`. - do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`): - Whether to convert the image to RGB. return_tensors (`str` or `TensorType`, *optional*): The type of tensors to return. Can be one of: - Unset: Return a list of `np.ndarray`. @@ -509,65 +454,41 @@ def preprocess( do_normalize = do_normalize if do_normalize is not None else self.do_normalize image_mean = image_mean if image_mean is not None else self.image_mean image_std = image_std if image_std is not None else self.image_std - do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb - if images is not None: - images = make_batched_images(images) - if videos is not None: - videos = make_batched_videos(videos) - - if images is not None and not valid_images(images): + if image is not None and not valid_images(image): raise ValueError("Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " "paddle.Tensor.") + if video is not None and not valid_images(video): + raise ValueError("Invalid frame type. Must be of type PIL.Image.Image, numpy.ndarray, " "paddle.Tensor.") + + data = dict() + if image is not None: + pixel_values, image_grid_thw = self._preprocess( + image, + resample=resample, + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + data_format=data_format, + input_data_format=input_data_format, + ) + data["pixel_values"] = pixel_values + data["image_grid_thw"] = image_grid_thw + + if video is not None: + pixel_values_videos, video_grid_thw = self._preprocess( + video, + resample=resample, + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + data_format=data_format, + input_data_format=input_data_format, + ) + data["pixel_values_videos"] = pixel_values_videos + data["video_grid_thw"] = video_grid_thw - if images is not None: - pixel_values, vision_grid_thws = [], [] - for image in images: - patches, image_grid_thw = self._preprocess( - image, - resample=resample, - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - data_format=data_format, - do_convert_rgb=do_convert_rgb, - input_data_format=input_data_format, - ) - pixel_values.extend(patches) - vision_grid_thws.append(image_grid_thw) - - pixel_values = np.array(pixel_values) - vision_grid_thws = np.array(vision_grid_thws) - data = { - "pixel_values": pixel_values, - "image_grid_thw": vision_grid_thws, - } - - if videos is not None: - pixel_values, vision_grid_thws = [], [] - for images in videos: - patches, video_grid_thw = self._preprocess( - images, - resample=resample, - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - data_format=data_format, - do_convert_rgb=do_convert_rgb, - input_data_format=input_data_format, - ) - pixel_values.extend(patches) - vision_grid_thws.append(video_grid_thw) - - pixel_values = np.array(pixel_values) - vision_grid_thws = np.array(vision_grid_thws) - - data = { - "pixel_values_videos": pixel_values, - "video_grid_thw": vision_grid_thws, - } - - return BatchFeature(data=data, tensor_type=return_tensors) \ No newline at end of file + return BatchFeature(data=data, tensor_type=return_tensors) diff --git a/fastdeploy/input/mm_processor/process.py b/fastdeploy/input/mm_processor/process.py index 69e612a449..613ac619d5 100644 --- a/fastdeploy/input/mm_processor/process.py +++ b/fastdeploy/input/mm_processor/process.py @@ -213,7 +213,7 @@ def request2ids( video_bytes = image_message.get("video") if video_bytes is None: continue - frames = self._load_and_process_video(video_bytes, image_message) + frames = self._load_and_process_video(video_bytes) # ----------- # mm_parser = MultiModalPartParser() # fimg = mm_parser.parse_image("file:///home/liudongdong/github/FastDeploy/data/images/demo.jpeg") @@ -231,6 +231,7 @@ def request2ids( def _add_text(self, tokens, outputs: Dict) -> None: if isinstance(tokens, str): tokens = self.tokenizer.encode(tokens, add_special_tokens=False)["input_ids"] + outputs["input_ids"].extend(tokens) outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]] * len(tokens)) @@ -240,85 +241,45 @@ def _add_text(self, tokens, outputs: Dict) -> None: outputs["cur_position"] += len(tokens) def _add_image(self, img, outputs: Dict) -> None: - patches_h, patches_w = self.image_preprocessor.get_smarted_resize( - img.height, - img.width, - min_pixels=self.image_min_pixels, - max_pixels=self.image_max_pixels, - )[1] - num_tokens = (patches_h * patches_w) // (self.spatial_conv_size**2) - - outputs["input_ids"].extend([self.image_token_id] * num_tokens) - outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]] * num_tokens) - - pos_ids = self._compute_3d_positions(1, patches_h, patches_w, outputs["cur_position"]) - outputs["position_ids"].extend(pos_ids) - outputs["cur_position"] = np.max(pos_ids) + 1 - - # Preprocess pixels - # image_mean = [0.48145466, 0.4578275, 0.40821073] - # image_std = [0.26862954, 0.26130258, 0.27577711] - # do_rescale = True - # do_normalize = True ret = self.image_preprocessor.preprocess( - images=[img.convert("RGB")], - # do_normalize=do_normalize, - # image_mean=image_mean, - # image_std=image_std, - # do_rescale=do_rescale, - # predetermined_grid_thw=np.array([[patches_h, patches_w]]), - do_convert_rgb=True, + image=[img.convert("RGB")], input_data_format=ChannelDimension.LAST, ) + num_tokens = ret["image_grid_thw"].prod() // self.image_preprocessor.merge_size**2 + + outputs["input_ids"].extend([self.image_token_id] * num_tokens) + outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]] * num_tokens) outputs["images"].append(ret["pixel_values"]) outputs["grid_thw"].append(ret["image_grid_thw"]) outputs["image_type_ids"].append(0) - def _add_video(self, frames, outputs: Dict) -> None: - patches_h, patches_w = self.image_preprocessor.get_smarted_resize( - frames[0].height, - frames[0].width, - min_pixels=self.video_min_pixels, - max_pixels=self.video_max_pixels, - )[1] - num_frames = len(frames) - num_tokens = (num_frames * patches_h * patches_w) // (self.spatial_conv_size**2 * self.temporal_conv_size) + pos_ids = self._compute_3d_positions(1, ret["image_grid_thw"][1], ret["image_grid_thw"][2], outputs["cur_position"]) + outputs["position_ids"].extend(pos_ids) + outputs["cur_position"] = np.max(pos_ids) + 1 + def _add_video(self, frames, outputs: Dict) -> None: pixel_stack = np.stack([np.array(f.convert("RGB")) for f in frames], axis=0) - - # rescale_factor = 1 / 255 - # image_mean = [0.48145466, 0.4578275, 0.40821073] - # image_std = [0.26862954, 0.26130258, 0.27577711] - # do_rescale = True - # do_normalize = True ret = self.image_preprocessor.preprocess( - images=None, - videos=pixel_stack, - # do_normalize=do_normalize, - # image_mean=image_mean, - # image_std=image_std, - # do_rescale=do_rescale, - # predetermined_grid_thw=np.array([[patches_h, patches_w]] * num_frames), - do_convert_rgb=True, + video=pixel_stack, input_data_format=ChannelDimension.LAST, ) + num_tokens = ret["video_grid_thw"].prod() // self.image_preprocessor.merge_size**2 + + outputs["input_ids"].extend([self.video_token_id] * num_tokens) + outputs["token_type_ids"].extend([IDS_TYPE_FLAG["video"]] * num_tokens) outputs["images"].append(ret["pixel_values_videos"]) outputs["grid_thw"].append(ret["video_grid_thw"]) # outputs["pixel_values_videos"].append(ret["pixel_values_videos"]) # outputs["video_grid_thw"].append(ret["video_grid_thw"]) - outputs["image_type_ids"].extend([1] * num_frames) - - # num_tokens = ret["video_grid_thw"] - outputs["input_ids"].extend([self.video_token_id] * num_tokens) - outputs["token_type_ids"].extend([IDS_TYPE_FLAG["video"]] * num_tokens) + outputs["image_type_ids"].extend([1] * ret["video_grid_thw"][0]) - pos_ids = self._compute_3d_positions(num_frames, patches_h, patches_w, outputs["cur_position"]) + pos_ids = self._compute_3d_positions(ret["video_grid_thw"][0], ret["video_grid_thw"][1], ret["video_grid_thw"][2], outputs["cur_position"]) outputs["position_ids"].extend(pos_ids) outputs["cur_position"] = np.max(pos_ids) + 1 - def _load_and_process_video(self, url: str, item: Dict) -> List[Image.Image]: + def _load_and_process_video(self, url: str) -> List[Image.Image]: reader, meta = read_video_decord(url) frames = [] From 50a444c114c0ec6cc19f976e761feb1949d64353 Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Thu, 14 Aug 2025 15:16:33 +0800 Subject: [PATCH 04/16] add qwen25-vl processor --- fastdeploy/input/ernie_vl_processor.py | 2 +- ...age_preprocessor.py => image_processor.py} | 201 +++++++----------- fastdeploy/input/mm_processor/process.py | 74 ++----- .../input/mm_processor/process_video.py | 2 - 4 files changed, 94 insertions(+), 185 deletions(-) rename fastdeploy/input/mm_processor/{image_preprocessor.py => image_processor.py} (79%) diff --git a/fastdeploy/input/ernie_vl_processor.py b/fastdeploy/input/ernie_vl_processor.py index b455cbe057..ae8abac30b 100644 --- a/fastdeploy/input/ernie_vl_processor.py +++ b/fastdeploy/input/ernie_vl_processor.py @@ -39,7 +39,7 @@ def __init__( processor_kwargs = self._parse_processor_kwargs(mm_processor_kwargs) self.ernie_processor = DataProcessor( - tokenizer_name=tokenizer_path, + model_path=tokenizer_path, image_preprocessor_name=preprocessor_path, **processor_kwargs, ) diff --git a/fastdeploy/input/mm_processor/image_preprocessor.py b/fastdeploy/input/mm_processor/image_processor.py similarity index 79% rename from fastdeploy/input/mm_processor/image_preprocessor.py rename to fastdeploy/input/mm_processor/image_processor.py index c3a02d24dd..e7397bcaa2 100644 --- a/fastdeploy/input/mm_processor/image_preprocessor.py +++ b/fastdeploy/input/mm_processor/image_processor.py @@ -36,7 +36,7 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_valid_image, + make_list_of_images, to_numpy_array, valid_images, @@ -49,10 +49,8 @@ OPENAI_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073] OPENAI_CLIP_STD = [0.26862954, 0.26130258, 0.27577711] -IMAGE_FACTOR = 28 MIN_PIXELS = 4 * 28 * 28 MAX_PIXELS = 16384 * 28 * 28 -MAX_RATIO = 200 VideoInput = Union[ @@ -67,11 +65,6 @@ ] -__all__ = [ - "AdaptiveImageProcessor", -] - - def round_by_factor(number: int, factor: int) -> int: """Returns the closest integer to 'number' that is divisible by 'factor'.""" return round(number / factor) * factor @@ -90,9 +83,10 @@ def floor_by_factor(number: int, factor: int) -> int: def smart_resize( height: int, width: int, - factor: int = IMAGE_FACTOR, - min_pixels: int = MIN_PIXELS, - max_pixels: int = MAX_PIXELS, + factor: int, + min_pixels: int, + max_pixels: int, + max_ratio: int = 200 ): """ Rescales the image so that the following conditions are met: @@ -103,16 +97,16 @@ def smart_resize( 3. The aspect ratio of the image is maintained as closely as possible. """ - if max(height, width) / min(height, width) > MAX_RATIO: + if max(height, width) / min(height, width) > max_ratio: if height > width: new_width = max(factor, round_by_factor(width, factor)) - new_height = floor_by_factor(new_width * MAX_RATIO, factor) + new_height = floor_by_factor(new_width * max_ratio, factor) else: new_height = max(factor, round_by_factor(height, factor)) - new_width = floor_by_factor(new_height * MAX_RATIO, factor) + new_width = floor_by_factor(new_height * max_ratio, factor) data_processor_logger.info( - f"absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(height, width) / min(height, width)},\ + f"absolute aspect ratio must be smaller than {max_ratio}, got {max(height, width) / min(height, width)},\ resize to {max(new_height, new_width) / min(new_height, new_width)}" ) @@ -177,83 +171,51 @@ class ImageProcessor(BaseImageProcessor): The merge size of the vision encoder to llm encoder. """ - model_input_names = [ - "pixel_values", - "image_grid_thw", - "pixel_values_videos", - "video_grid_thw", - ] - def __init__( self, - resample: PILImageResampling = PILImageResampling.BICUBIC, - do_rescale: bool = True, - rescale_factor: float = 1 / 255, - do_normalize: bool = True, - image_mean: Optional[Union[float, List[float]]] = None, - image_std: Optional[Union[float, List[float]]] = None, - min_pixels: int = 56 * 56, - max_pixels: int = 28 * 28 * 1280, patch_size: int = 14, - temporal_patch_size: int = 2, merge_size: int = 2, + temporal_patch_size: int = 2, + min_pixels: int = MIN_PIXELS, + max_pixels: int = MAX_PIXELS, + image_mean: Union[float, List[float]] = OPENAI_CLIP_MEAN, + image_std: Union[float, List[float]] = OPENAI_CLIP_STD, + rescale_factor: float = 1 / 255, + do_rescale: bool = True, + do_normalize: bool = True, + resample: PILImageResampling = PILImageResampling.BICUBIC, **kwargs, ) -> None: """init""" super().__init__(**kwargs) - self.resample = resample - self.do_rescale = do_rescale - self.rescale_factor = rescale_factor - self.do_normalize = do_normalize - self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN - self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD - self.min_pixels = min_pixels - self.max_pixels = max_pixels self.patch_size = patch_size - self.temporal_patch_size = temporal_patch_size self.merge_size = merge_size - self.size = {"min_pixels": min_pixels, "max_pixels": max_pixels} - - def set_pixels(self, min_pixels=None, max_pixels=None, msg=""): - """设定pixels""" - if min_pixels is not None: - assert isinstance(min_pixels, int) and min_pixels >= 0, "min_pixels must be positive int" - data_processor_logger.info(f"{msg} AdaptiveImageProcessor set min_pixels = {min_pixels}") - self.min_pixels = min_pixels - self.size["min_pixels"] = int(min_pixels) - if max_pixels is not None: - assert isinstance(max_pixels, int) and max_pixels > 0, "max_pixels must be positive int" - data_processor_logger.info(f"{msg} AdaptiveImageProcessor set max_pixels = {max_pixels}") - self.max_pixels = max_pixels - self.size["max_pixels"] = int(max_pixels) - - def get_smarted_resize(self, height, width, min_pixels=None, max_pixels=None): - """dummy""" - actual_min_pixels = min_pixels if min_pixels is not None else self.min_pixels - actual_max_pixels = max_pixels if max_pixels is not None else self.max_pixels - resized_height, resized_width = smart_resize( - height, - width, - factor=self.patch_size * self.merge_size, - min_pixels=actual_min_pixels, - max_pixels=actual_max_pixels, - ) - return (resized_height, resized_width), ( - resized_height // self.patch_size, - resized_width // self.patch_size, - ) + self.temporal_patch_size = temporal_patch_size + + self.min_pixels = min_pixels + self.max_pixels = max_pixels + + self.image_mean = image_mean + self.image_std = image_std + self.rescale_factor = rescale_factor + self.do_rescale = do_rescale + self.do_normalize = do_normalize + + self.resample = resample def _preprocess( self, images: Union[ImageInput, VideoInput], - resample: PILImageResampling = None, - do_rescale: bool = True, - rescale_factor: float = 1 / 255, - do_normalize: bool = True, - image_mean: Optional[Union[float, List[float]]] = None, - image_std: Optional[Union[float, List[float]]] = None, - data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, - input_data_format: Optional[Union[str, ChannelDimension]] = None, + min_pixels: int, + max_pixels: int, + image_mean: Optional[Union[float, List[float]]], + image_std: Optional[Union[float, List[float]]], + rescale_factor: float, + do_rescale: bool, + do_normalize: bool, + resample: PILImageResampling, + data_format: Optional[ChannelDimension], + input_data_format: Optional[Union[str, ChannelDimension]], ): """ Preprocess an image or batch of images. Copy of the `preprocess` method from `CLIPImageProcessor`. @@ -309,8 +271,8 @@ def _preprocess( height, width, factor=self.patch_size * self.merge_size, - min_pixels=self.min_pixels, - max_pixels=self.max_pixels, + min_pixels=min_pixels, + max_pixels=max_pixels, ) processed_images = [] @@ -391,18 +353,18 @@ def _preprocess( def preprocess( self, - image: ImageInput = None, - video: VideoInput = None, - size: Optional[Union[int, List[int]]] = None, - resample: PILImageResampling = None, - do_rescale: bool = True, - rescale_factor: float = 1 / 255, - do_normalize: bool = True, + images: Union[ImageInput, VideoInput], + min_pixels: Optional[int] = None, + max_pixels: Optional[int] = None, image_mean: Optional[Union[float, List[float]]] = None, image_std: Optional[Union[float, List[float]]] = None, + rescale_factor: Optional[float] = None, + do_rescale: Optional[bool] = None, + do_normalize: Optional[bool] = None, + resample: Optional[PILImageResampling] = None, return_tensors: Optional[Union[str, TensorType]] = None, data_format: Optional[ChannelDimension] = ChannelDimension.FIRST, - input_data_format: Optional[Union[str, ChannelDimension]] = None, + input_data_format: Optional[Union[str, ChannelDimension]] = ChannelDimension.LAST, ): """ Args: @@ -447,48 +409,33 @@ def preprocess( - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. """ - size = size if size is not None else self.size - resample = resample if resample is not None else self.resample - do_rescale = do_rescale if do_rescale is not None else self.do_rescale - rescale_factor = rescale_factor if rescale_factor is not None else self.rescale_factor - do_normalize = do_normalize if do_normalize is not None else self.do_normalize + min_pixels = min_pixels if min_pixels is not None else self.min_pixels + max_pixels = max_pixels if max_pixels is not None else self.max_pixels image_mean = image_mean if image_mean is not None else self.image_mean image_std = image_std if image_std is not None else self.image_std + rescale_factor = rescale_factor if rescale_factor is not None else self.rescale_factor + do_rescale = do_rescale if do_rescale is not None else self.do_rescale + do_normalize = do_normalize if do_normalize is not None else self.do_normalize + resample = resample if resample is not None else self.resample - if image is not None and not valid_images(image): + if images is not None and not valid_images(images): raise ValueError("Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " "paddle.Tensor.") - if video is not None and not valid_images(video): - raise ValueError("Invalid frame type. Must be of type PIL.Image.Image, numpy.ndarray, " "paddle.Tensor.") - - data = dict() - if image is not None: - pixel_values, image_grid_thw = self._preprocess( - image, - resample=resample, - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - data_format=data_format, - input_data_format=input_data_format, - ) - data["pixel_values"] = pixel_values - data["image_grid_thw"] = image_grid_thw - - if video is not None: - pixel_values_videos, video_grid_thw = self._preprocess( - video, - resample=resample, - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - data_format=data_format, - input_data_format=input_data_format, - ) - data["pixel_values_videos"] = pixel_values_videos - data["video_grid_thw"] = video_grid_thw + pixel_values, grid_thw = self._preprocess( + images, + min_pixels=min_pixels, + max_pixels=max_pixels, + image_mean=image_mean, + image_std=image_std, + rescale_factor=rescale_factor, + do_rescale=do_rescale, + do_normalize=do_normalize, + resample=resample, + data_format=data_format, + input_data_format=input_data_format, + ) + data = { + "pixel_values": pixel_values, + "grid_thw": grid_thw + } return BatchFeature(data=data, tensor_type=return_tensors) diff --git a/fastdeploy/input/mm_processor/process.py b/fastdeploy/input/mm_processor/process.py index 613ac619d5..60086fc82f 100644 --- a/fastdeploy/input/mm_processor/process.py +++ b/fastdeploy/input/mm_processor/process.py @@ -27,7 +27,7 @@ from paddleformers.transformers import AutoTokenizer -from .image_preprocessor import ImageProcessor +from .image_processor import ImageProcessor from .process_video import read_video_decord IDS_TYPE_FLAG = {"text": 0, "image": 1, "video": 2, "audio": 3} @@ -41,30 +41,21 @@ class DataProcessor: def __init__( self, - tokenizer_name: str, - image_preprocessor_name: str, - spatial_conv_size: int = 2, - temporal_conv_size: int = 2, + model_path: str, image_min_pixels: int = 3136, image_max_pixels: int = 12845056, video_min_pixels: int = 3136, video_max_pixels: int = 12845056, - # video_target_frames: int = -1, - # video_frames_sample: str = "leading", - # video_max_frames: int = 180, - # video_min_frames: int = 16, - # video_fps: int = 2, **kwargs, ) -> None: # Tokenizer and image preprocessor - self.model_name_or_path = tokenizer_name - self._load_tokenizer() + self.tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="left", use_fast=True) self.tokenizer.ignored_index = -100 - self.image_preprocessor = ImageProcessor.from_pretrained(image_preprocessor_name) + self.image_processor = ImageProcessor.from_pretrained(model_path) # Convolution sizes for patch aggregation - self.spatial_conv_size = spatial_conv_size - self.temporal_conv_size = temporal_conv_size + self.spatial_conv_size = self.image_processor.merge_size + self.temporal_conv_size = self.image_processor.temporal_patch_size # Pixel constraints self.image_min_pixels = image_min_pixels @@ -72,26 +63,15 @@ def __init__( self.video_min_pixels = video_min_pixels self.video_max_pixels = video_max_pixels - # Video sampling parameters - # self.target_frames = video_target_frames - # self.frames_sample = video_frames_sample - # self.max_frames = video_max_frames - # self.min_frames = video_min_frames - # self.fps = video_fps - # Special tokens and IDs - # self.cls_token = "<|im_start|>" - # self.eos_token = "<|im_end|>" - self.vision_start = "<|vision_start|>" - self.vision_end = "<|vision_end|>" self.image_token = "<|image_pad|>" self.video_token = "<|video_pad|>" self.image_token_id = self.tokenizer.convert_tokens_to_ids(self.image_token) self.video_token_id = self.tokenizer.convert_tokens_to_ids(self.video_token) + self.vision_start = "<|vision_start|>" self.vision_start_id = self.tokenizer.convert_tokens_to_ids(self.vision_start) - self.vision_start_id = self.tokenizer.convert_tokens_to_ids(self.vision_end) self.role_prefixes = { "system": "", @@ -241,41 +221,35 @@ def _add_text(self, tokens, outputs: Dict) -> None: outputs["cur_position"] += len(tokens) def _add_image(self, img, outputs: Dict) -> None: - ret = self.image_preprocessor.preprocess( - image=[img.convert("RGB")], - input_data_format=ChannelDimension.LAST, - ) - num_tokens = ret["image_grid_thw"].prod() // self.image_preprocessor.merge_size**2 + ret = self.image_processor.preprocess(images=[img.convert("RGB")]) + num_tokens = ret["grid_thw"].prod() // self.image_processor.merge_size**2 outputs["input_ids"].extend([self.image_token_id] * num_tokens) outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]] * num_tokens) outputs["images"].append(ret["pixel_values"]) - outputs["grid_thw"].append(ret["image_grid_thw"]) + outputs["grid_thw"].append(ret["grid_thw"]) outputs["image_type_ids"].append(0) - pos_ids = self._compute_3d_positions(1, ret["image_grid_thw"][1], ret["image_grid_thw"][2], outputs["cur_position"]) + pos_ids = self._compute_3d_positions(1, ret["grid_thw"][1], ret["grid_thw"][2], outputs["cur_position"]) outputs["position_ids"].extend(pos_ids) outputs["cur_position"] = np.max(pos_ids) + 1 def _add_video(self, frames, outputs: Dict) -> None: pixel_stack = np.stack([np.array(f.convert("RGB")) for f in frames], axis=0) - ret = self.image_preprocessor.preprocess( - video=pixel_stack, - input_data_format=ChannelDimension.LAST, - ) - num_tokens = ret["video_grid_thw"].prod() // self.image_preprocessor.merge_size**2 + ret = self.image_processor.preprocess(images=pixel_stack) + num_tokens = ret["grid_thw"].prod() // self.image_processor.merge_size**2 outputs["input_ids"].extend([self.video_token_id] * num_tokens) outputs["token_type_ids"].extend([IDS_TYPE_FLAG["video"]] * num_tokens) - outputs["images"].append(ret["pixel_values_videos"]) - outputs["grid_thw"].append(ret["video_grid_thw"]) - # outputs["pixel_values_videos"].append(ret["pixel_values_videos"]) - # outputs["video_grid_thw"].append(ret["video_grid_thw"]) - outputs["image_type_ids"].extend([1] * ret["video_grid_thw"][0]) + outputs["images"].append(ret["pixel_values"]) + outputs["grid_thw"].append(ret["grid_thw"]) + # outputs["pixel_values_videos"].append(ret["pixel_values"]) + # outputs["video_grid_thw"].append(ret["grid_thw"]) + outputs["image_type_ids"].extend([1] * ret["grid_thw"][0]) - pos_ids = self._compute_3d_positions(ret["video_grid_thw"][0], ret["video_grid_thw"][1], ret["video_grid_thw"][2], outputs["cur_position"]) + pos_ids = self._compute_3d_positions(ret["grid_thw"][0], ret["grid_thw"][1], ret["grid_thw"][2], outputs["cur_position"]) outputs["position_ids"].extend(pos_ids) outputs["cur_position"] = np.max(pos_ids) + 1 @@ -301,15 +275,6 @@ def _compute_3d_positions(self, t: int, h: int, w: int, start_idx: int) -> List[ coords = list(zip(time_idx, h_idx, w_idx)) return [[start_idx + ti, start_idx + hi, start_idx + wi] for ti, hi, wi in coords] - def _load_tokenizer(self): - """ - load tokenizer - - Returns: - tokenizer (AutoTokenizer) - """ - self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, padding_side="left", use_fast=True) - def apply_chat_template(self, request): """ Convert multi-turn messages into ID sequences. @@ -337,4 +302,3 @@ def apply_chat_template(self, request): f"req_id:{request.get('request_id', ''), } tokens: {tokens}, token_ids: {token_ids}" ) return token_ids - diff --git a/fastdeploy/input/mm_processor/process_video.py b/fastdeploy/input/mm_processor/process_video.py index 1fdfea1562..05a58fec7a 100644 --- a/fastdeploy/input/mm_processor/process_video.py +++ b/fastdeploy/input/mm_processor/process_video.py @@ -17,8 +17,6 @@ import io import os import decord -# from .utils.io_utils import EXTRACTED_FRAME_DIR, get_filename -# from .utils.video_utils import VideoReaderWrapper from tempfile import NamedTemporaryFile as ntf try: From be1bbf05543181f78d76fc6f1e4c55d16f6b56cf Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Thu, 14 Aug 2025 18:31:51 +0800 Subject: [PATCH 05/16] add qwen25-vl processor position_ids --- fastdeploy/input/ernie_vl_processor.py | 10 +-- fastdeploy/input/mm_processor/process.py | 82 +++++++++++++++--------- 2 files changed, 54 insertions(+), 38 deletions(-) diff --git a/fastdeploy/input/ernie_vl_processor.py b/fastdeploy/input/ernie_vl_processor.py index ae8abac30b..6dba8e49ea 100644 --- a/fastdeploy/input/ernie_vl_processor.py +++ b/fastdeploy/input/ernie_vl_processor.py @@ -43,7 +43,7 @@ def __init__( image_preprocessor_name=preprocessor_path, **processor_kwargs, ) - + # self.ernie_processor.eval() self.image_patch_id = self.ernie_processor.image_token_id self.spatial_conv_size = self.ernie_processor.spatial_conv_size @@ -258,18 +258,12 @@ def pack_outputs(self, outs): outs["images"] = np.vstack(outs["images"]) outs["grid_thw"] = np.vstack(outs["grid_thw"]) outs["image_type_ids"] = np.array(outs["image_type_ids"]) - - # outs["images"] = np.array(outs["images"]) - # outs["grid_thw"] = np.array(outs["grid_thw"]) - # # outs["pixel_values_videos"] = np.array(outs["pixel_values_videos"]) - # # outs["video_grid_thw"] = np.array(outs["video_grid_thw"]) - # outs["image_type_ids"] = np.array(outs["image_type_ids"]) outs["image_patch_id"] = self.image_patch_id # Convert lists to arrays outs["input_ids"] = np.array(outs["input_ids"], dtype=np.int64) outs["token_type_ids"] = np.array(outs["token_type_ids"], dtype=np.int64) - outs["position_ids"] = np.array(outs["position_ids"], dtype=np.int64) + outs["position_ids"] = np.concatenate(outs["position_ids"], axis=1) return outs diff --git a/fastdeploy/input/mm_processor/process.py b/fastdeploy/input/mm_processor/process.py index 60086fc82f..a181e6900e 100644 --- a/fastdeploy/input/mm_processor/process.py +++ b/fastdeploy/input/mm_processor/process.py @@ -46,6 +46,7 @@ def __init__( image_max_pixels: int = 12845056, video_min_pixels: int = 3136, video_max_pixels: int = 12845056, + tokens_per_second: int = 2, **kwargs, ) -> None: # Tokenizer and image preprocessor @@ -73,6 +74,8 @@ def __init__( self.vision_start = "<|vision_start|>" self.vision_start_id = self.tokenizer.convert_tokens_to_ids(self.vision_start) + self.tokens_per_second = tokens_per_second + self.role_prefixes = { "system": "", "user": "User: ", @@ -193,7 +196,7 @@ def request2ids( video_bytes = image_message.get("video") if video_bytes is None: continue - frames = self._load_and_process_video(video_bytes) + frames, meta = self._load_and_process_video(video_bytes) # ----------- # mm_parser = MultiModalPartParser() # fimg = mm_parser.parse_image("file:///home/liudongdong/github/FastDeploy/data/images/demo.jpeg") @@ -201,7 +204,7 @@ def request2ids( # frames[i] = fimg.copy() outputs["video_cnt"] += 1 - self._add_video(frames, outputs) + self._add_video(frames, meta, outputs) vision_message_index += 1 @@ -215,43 +218,73 @@ def _add_text(self, tokens, outputs: Dict) -> None: outputs["input_ids"].extend(tokens) outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]] * len(tokens)) - start = outputs["cur_position"] - for i in range(len(tokens)): - outputs["position_ids"].append([start + i] * 3) - outputs["cur_position"] += len(tokens) + position_ids = self._compute_1d_positions(outputs["cur_position"], len(tokens)) + outputs["position_ids"].append(position_ids) + outputs["cur_position"] = position_ids.max() + 1 + + def _compute_1d_positions(self, start_pos: int, tokens_num: int) -> np.ndarray: + text_array = np.arange(tokens_num).reshape(1, -1) + text_index = np.broadcast_to(text_array, (3, tokens_num)) + position = text_index + start_pos + return position def _add_image(self, img, outputs: Dict) -> None: ret = self.image_processor.preprocess(images=[img.convert("RGB")]) num_tokens = ret["grid_thw"].prod() // self.image_processor.merge_size**2 + grid_thw = ret["grid_thw"].tolist() outputs["input_ids"].extend([self.image_token_id] * num_tokens) outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]] * num_tokens) outputs["images"].append(ret["pixel_values"]) - outputs["grid_thw"].append(ret["grid_thw"]) + outputs["grid_thw"].append(grid_thw) outputs["image_type_ids"].append(0) - pos_ids = self._compute_3d_positions(1, ret["grid_thw"][1], ret["grid_thw"][2], outputs["cur_position"]) - outputs["position_ids"].extend(pos_ids) - outputs["cur_position"] = np.max(pos_ids) + 1 + t, h, w = grid_thw + position_ids = self._compute_3d_positions2(outputs["cur_position"], t,h,w, 0) + + outputs["position_ids"].append(position_ids) + outputs["cur_position"] = position_ids.max() + 1 - def _add_video(self, frames, outputs: Dict) -> None: + def _add_video(self, frames, meta: Dict, outputs: Dict) -> None: pixel_stack = np.stack([np.array(f.convert("RGB")) for f in frames], axis=0) ret = self.image_processor.preprocess(images=pixel_stack) + num_tokens = ret["grid_thw"].prod() // self.image_processor.merge_size**2 + grid_thw = ret["grid_thw"].tolist() outputs["input_ids"].extend([self.video_token_id] * num_tokens) outputs["token_type_ids"].extend([IDS_TYPE_FLAG["video"]] * num_tokens) outputs["images"].append(ret["pixel_values"]) - outputs["grid_thw"].append(ret["grid_thw"]) - # outputs["pixel_values_videos"].append(ret["pixel_values"]) - # outputs["video_grid_thw"].append(ret["grid_thw"]) - outputs["image_type_ids"].extend([1] * ret["grid_thw"][0]) + outputs["grid_thw"].append(grid_thw) + outputs["image_type_ids"].extend([1] * grid_thw[0]) + + fps = meta["fps"] + second_per_grid_t = self.temporal_conv_size / fps + t, h, w = grid_thw + position_ids = self._compute_3d_positions2(outputs["cur_position"], t,h,w, second_per_grid_t) + + outputs["position_ids"].append(position_ids) + outputs["cur_position"] = position_ids.max() + 1 + + def _compute_3d_positions2(self, start_pos: int, t: int, h: int, w: int, second_per_grid_t:float) -> np.ndarray: + h //= self.spatial_conv_size + w //= self.spatial_conv_size - pos_ids = self._compute_3d_positions(ret["grid_thw"][0], ret["grid_thw"][1], ret["grid_thw"][2], outputs["cur_position"]) - outputs["position_ids"].extend(pos_ids) - outputs["cur_position"] = np.max(pos_ids) + 1 + tn = np.arange(t).reshape(-1, 1) + tn = np.broadcast_to(tn, (t, h * w)) + tn = tn * second_per_grid_t * self.tokens_per_second + t_index = tn.flatten() + + hn = np.arange(h).reshape(1, -1, 1) + h_index = np.broadcast_to(hn, (t, h, w)).flatten() + + wn = np.arange(w).reshape(1, 1, -1) + w_index = np.broadcast_to(wn, (t, h, w)).flatten() + + position = np.stack([t_index, h_index, w_index]) + start_pos + return position def _load_and_process_video(self, url: str) -> List[Image.Image]: reader, meta = read_video_decord(url) @@ -262,18 +295,7 @@ def _load_and_process_video(self, url: str) -> List[Image.Image]: image = Image.fromarray(frame, "RGB") frames.append(image) - return frames - - def _compute_3d_positions(self, t: int, h: int, w: int, start_idx: int) -> List[List[int]]: - # Downsample time if needed - t_eff = t // self.temporal_conv_size if t != 1 else 1 - gh, gw = h // self.spatial_conv_size, w // self.spatial_conv_size - time_idx = np.repeat(np.arange(t_eff), gh * gw) - h_idx = np.tile(np.repeat(np.arange(gh), gw), t_eff) - w_idx = np.tile(np.arange(gw), t_eff * gh) - - coords = list(zip(time_idx, h_idx, w_idx)) - return [[start_idx + ti, start_idx + hi, start_idx + wi] for ti, hi, wi in coords] + return frames, meta def apply_chat_template(self, request): """ From 933a9ab8ba3c39ce2970e811b396619fc0311956 Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Thu, 14 Aug 2025 21:35:23 +0800 Subject: [PATCH 06/16] add qwen25-vl processor --- fastdeploy/input/ernie_vl_processor.py | 48 +++++++------- fastdeploy/input/mm_processor/process.py | 66 +++++++++++-------- .../input/mm_processor/process_video.py | 49 ++++++++++++++ fastdeploy/input/preprocess.py | 3 +- 4 files changed, 113 insertions(+), 53 deletions(-) diff --git a/fastdeploy/input/ernie_vl_processor.py b/fastdeploy/input/ernie_vl_processor.py index 6dba8e49ea..c4ed4520be 100644 --- a/fastdeploy/input/ernie_vl_processor.py +++ b/fastdeploy/input/ernie_vl_processor.py @@ -21,6 +21,7 @@ from fastdeploy.input.ernie_processor import ErnieProcessor from fastdeploy.input.mm_processor import IDS_TYPE_FLAG, DataProcessor from fastdeploy.utils import data_processor_logger +from fastdeploy.engine.config import ModelConfig class ErnieMoEVLProcessor(ErnieProcessor): @@ -29,27 +30,22 @@ class ErnieMoEVLProcessor(ErnieProcessor): def __init__( self, model_name_or_path, + model_conf = None, limit_mm_per_prompt=None, mm_processor_kwargs=None, reasoning_parser_obj=None, ): data_processor_logger.info(f"model_name_or_path: {model_name_or_path}") - tokenizer_path = model_name_or_path - preprocessor_path = model_name_or_path processor_kwargs = self._parse_processor_kwargs(mm_processor_kwargs) + model_conf = ModelConfig({"model": model_name_or_path}) + self.ernie_processor = DataProcessor( - model_path=tokenizer_path, - image_preprocessor_name=preprocessor_path, + model_path=model_name_or_path, **processor_kwargs, ) - - # self.ernie_processor.eval() - self.image_patch_id = self.ernie_processor.image_token_id - self.spatial_conv_size = self.ernie_processor.spatial_conv_size - - self.decode_status = dict() self._load_tokenizer() + self.decode_status = dict() # Generation config try: @@ -60,7 +56,6 @@ def __init__( ) self.generation_config = None - # self.eos_token_ids = [self.tokenizer.eos_token_id] from paddleformers.trl.llm_utils import get_eos_token_id self.eos_token_ids = get_eos_token_id(self.tokenizer, self.generation_config) @@ -108,11 +103,10 @@ def set_value(req, key, value): def process_request(self, request, max_model_len=None, **kwargs): """process the input data""" task = request.to_dict() - task["enable_thinking"] = kwargs.get("enable_thinking", True) + task["enable_thinking"] = kwargs.get("enable_thinking", False) self.process_request_dict(task, max_model_len) request = Request.from_dict(task) request = self._apply_default_parameters(request) - return request def _parse_processor_kwargs(self, kwargs): @@ -127,17 +121,8 @@ def _parse_processor_kwargs(self, kwargs): # 验证参数类型 data_processor_logger.info(f"kwargs:{kwargs}") expected_types = { - "spatial_conv_size": int, - "temporal_conv_size": int, - "image_min_pixels": int, - "image_max_pixels": int, - "video_min_pixels": int, - "video_max_pixels": int, - "video_target_frames": int, - "video_frames_sample": str, "video_max_frames": int, "video_min_frames": int, - "video_fps": int, } for key, value in kwargs.items(): @@ -259,12 +244,13 @@ def pack_outputs(self, outs): outs["grid_thw"] = np.vstack(outs["grid_thw"]) outs["image_type_ids"] = np.array(outs["image_type_ids"]) - outs["image_patch_id"] = self.image_patch_id + outs["image_patch_id"] = self.ernie_processor.image_token_id + outs["video_patch_id"] = self.ernie_processor.video_token_id + # Convert lists to arrays outs["input_ids"] = np.array(outs["input_ids"], dtype=np.int64) outs["token_type_ids"] = np.array(outs["token_type_ids"], dtype=np.int64) outs["position_ids"] = np.concatenate(outs["position_ids"], axis=1) - return outs def process_response_dict(self, response_dict, stream, **kwargs): @@ -284,3 +270,17 @@ def process_response_dict(self, response_dict, stream, **kwargs): return self.process_response_dict_streaming(response_dict, enable_thinking=enable_thinking, **kwargs) else: return self.process_response_dict_normal(response_dict, enable_thinking=enable_thinking, **kwargs) + + def update_stop_seq(self, stop_sequences): + """ + Update stop sequences from request. + """ + stop_seqs = [] + if isinstance(stop_sequences, str): + stop_sequences = [stop_sequences] + for seq in stop_sequences: + if seq != self.tokenizer.eos_token_id: + stop_seqs.append(self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(seq))) + stop_seqs, stop_seqs_len = self.pad_batch_data(stop_seqs, pad_id=-1, return_seq_len=True, return_array=False) + data_processor_logger.debug(f"processed stop_seqs: {stop_seqs}, {stop_seqs_len}") + return stop_seqs, stop_seqs_len \ No newline at end of file diff --git a/fastdeploy/input/mm_processor/process.py b/fastdeploy/input/mm_processor/process.py index a181e6900e..4bcc7e3c06 100644 --- a/fastdeploy/input/mm_processor/process.py +++ b/fastdeploy/input/mm_processor/process.py @@ -17,18 +17,14 @@ """ process.py """ from typing import Any, Dict, List, Union - import numpy as np -from paddleformers.transformers.image_utils import ChannelDimension from PIL import Image - +from paddleformers.transformers import AutoTokenizer from fastdeploy.entrypoints.chat_utils import parse_chat_messages from fastdeploy.utils import data_processor_logger -from paddleformers.transformers import AutoTokenizer - from .image_processor import ImageProcessor -from .process_video import read_video_decord +from .process_video import read_video_decord, sample_frames IDS_TYPE_FLAG = {"text": 0, "image": 1, "video": 2, "audio": 3} @@ -42,13 +38,14 @@ class DataProcessor: def __init__( self, model_path: str, - image_min_pixels: int = 3136, - image_max_pixels: int = 12845056, - video_min_pixels: int = 3136, - video_max_pixels: int = 12845056, + video_min_frames: int = 4, + video_max_frames: int = 768, tokens_per_second: int = 2, **kwargs, ) -> None: + self.min_frames = video_min_frames + self.max_frames = video_max_frames + # Tokenizer and image preprocessor self.tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="left", use_fast=True) self.tokenizer.ignored_index = -100 @@ -58,12 +55,6 @@ def __init__( self.spatial_conv_size = self.image_processor.merge_size self.temporal_conv_size = self.image_processor.temporal_patch_size - # Pixel constraints - self.image_min_pixels = image_min_pixels - self.image_max_pixels = image_max_pixels - self.video_min_pixels = video_min_pixels - self.video_max_pixels = video_max_pixels - # Special tokens and IDs self.image_token = "<|image_pad|>" self.video_token = "<|video_pad|>" @@ -196,7 +187,7 @@ def request2ids( video_bytes = image_message.get("video") if video_bytes is None: continue - frames, meta = self._load_and_process_video(video_bytes) + frames, meta = self._load_and_process_video(video_bytes, image_message) # ----------- # mm_parser = MultiModalPartParser() # fimg = mm_parser.parse_image("file:///home/liudongdong/github/FastDeploy/data/images/demo.jpeg") @@ -221,17 +212,17 @@ def _add_text(self, tokens, outputs: Dict) -> None: position_ids = self._compute_1d_positions(outputs["cur_position"], len(tokens)) outputs["position_ids"].append(position_ids) outputs["cur_position"] = position_ids.max() + 1 - - def _compute_1d_positions(self, start_pos: int, tokens_num: int) -> np.ndarray: - text_array = np.arange(tokens_num).reshape(1, -1) - text_index = np.broadcast_to(text_array, (3, tokens_num)) + + def _compute_1d_positions(self, start_pos: int, num_tokens: int) -> np.ndarray: + text_array = np.arange(num_tokens).reshape(1, -1) + text_index = np.broadcast_to(text_array, (3, num_tokens)) position = text_index + start_pos return position def _add_image(self, img, outputs: Dict) -> None: ret = self.image_processor.preprocess(images=[img.convert("RGB")]) num_tokens = ret["grid_thw"].prod() // self.image_processor.merge_size**2 - grid_thw = ret["grid_thw"].tolist() + grid_thw = ret["grid_thw"].tolist() outputs["input_ids"].extend([self.image_token_id] * num_tokens) outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]] * num_tokens) @@ -247,11 +238,10 @@ def _add_image(self, img, outputs: Dict) -> None: outputs["cur_position"] = position_ids.max() + 1 def _add_video(self, frames, meta: Dict, outputs: Dict) -> None: - pixel_stack = np.stack([np.array(f.convert("RGB")) for f in frames], axis=0) - ret = self.image_processor.preprocess(images=pixel_stack) + ret = self.image_processor.preprocess(images=frames) num_tokens = ret["grid_thw"].prod() // self.image_processor.merge_size**2 - grid_thw = ret["grid_thw"].tolist() + grid_thw = ret["grid_thw"].tolist() outputs["input_ids"].extend([self.video_token_id] * num_tokens) outputs["token_type_ids"].extend([IDS_TYPE_FLAG["video"]] * num_tokens) @@ -260,14 +250,14 @@ def _add_video(self, frames, meta: Dict, outputs: Dict) -> None: outputs["grid_thw"].append(grid_thw) outputs["image_type_ids"].extend([1] * grid_thw[0]) - fps = meta["fps"] + fps = meta["fps"] second_per_grid_t = self.temporal_conv_size / fps t, h, w = grid_thw position_ids = self._compute_3d_positions2(outputs["cur_position"], t,h,w, second_per_grid_t) outputs["position_ids"].append(position_ids) outputs["cur_position"] = position_ids.max() + 1 - + def _compute_3d_positions2(self, start_pos: int, t: int, h: int, w: int, second_per_grid_t:float) -> np.ndarray: h //= self.spatial_conv_size w //= self.spatial_conv_size @@ -286,7 +276,7 @@ def _compute_3d_positions2(self, start_pos: int, t: int, h: int, w: int, second_ position = np.stack([t_index, h_index, w_index]) + start_pos return position - def _load_and_process_video(self, url: str) -> List[Image.Image]: + def _load_and_process_video(self, url: str, item: Dict) -> np.ndarray: reader, meta = read_video_decord(url) frames = [] @@ -294,6 +284,26 @@ def _load_and_process_video(self, url: str) -> List[Image.Image]: frame = reader[i].asnumpy() image = Image.fromarray(frame, "RGB") frames.append(image) + frames = np.stack([np.array(f.convert("RGB")) for f in frames], axis=0) + + fps = item.get("fps", None) + num_frames = item.get("target_frames", None) + if fps is not None or num_frames is not None: + min_frames = item.get("min_frames", self.min_frames) + max_frames = item.get("max_frames", self.max_frames) + frames = sample_frames(video=frames, + frame_factor=self.temporal_conv_size, + min_frames=min_frames, + max_frames=max_frames, + metadata=meta, + fps=fps, + num_frames=num_frames) + + meta["num_of_frame"] = frames.shape[0] + if fps is not None: + meta["fps"] = fps + else: + meta["fps"] = frames.shape[0] / meta["duration"] return frames, meta diff --git a/fastdeploy/input/mm_processor/process_video.py b/fastdeploy/input/mm_processor/process_video.py index 05a58fec7a..9dd79cf97f 100644 --- a/fastdeploy/input/mm_processor/process_video.py +++ b/fastdeploy/input/mm_processor/process_video.py @@ -18,6 +18,9 @@ import os import decord from tempfile import NamedTemporaryFile as ntf +from typing import Union, Optional +import numpy as np +import math try: # moviepy 1.0 @@ -99,3 +102,49 @@ def read_video_decord(video_path): video_meta = {"fps": fps, "duration": duration, "num_of_frame": vlen} return video_reader, video_meta + + +def sample_frames( + video: np.ndarray, + frame_factor: int, + min_frames: int, + max_frames: int, + metadata: Optional[dict] = None, + fps: Optional[Union[int, float]] = None, + num_frames: Optional[int] = None, +): + if fps is not None and num_frames is not None: + raise ValueError("`num_frames` and `fps` are mutually exclusive arguments, please use only one!") + + if fps is None and num_frames is None: + return video + + total_num_frames = video.shape[0] + + # If num_frames is not given but fps is, calculate num_frames from fps + if num_frames is not None: + num_frames = round(num_frames / frame_factor) * frame_factor + elif fps is not None: + if metadata is None: + raise ValueError( + "Asked to sample `fps` frames per second but no video metadata was provided which is required when sampling with `fps`. " + "Please pass in `VideoMetadata` object or use a fixed `num_frames` per input video" + ) + max_frames = math.floor(min(max_frames, total_num_frames) / frame_factor) * frame_factor + num_frames = total_num_frames / metadata["fps"] * fps + num_frames = min(min(max(num_frames, min_frames), max_frames), total_num_frames) + num_frames = math.floor(num_frames / frame_factor) * frame_factor + + if num_frames > total_num_frames: + raise ValueError( + f"Video can't be sampled. The inferred `num_frames={num_frames}` exceeds `total_num_frames={total_num_frames}`. " + "Decrease `num_frames` or `fps` for sampling." + ) + + if num_frames is not None: + indices = np.arange(0, total_num_frames, total_num_frames / num_frames).astype("int") + else: + indices = np.arange(0, total_num_frames).astype("int") + video = video[indices] + + return video diff --git a/fastdeploy/input/preprocess.py b/fastdeploy/input/preprocess.py index 120be9ce88..e2ef518d23 100644 --- a/fastdeploy/input/preprocess.py +++ b/fastdeploy/input/preprocess.py @@ -70,7 +70,8 @@ def create_processor(self): reasoning_parser_obj = None if self.reasoning_parser: reasoning_parser_obj = ReasoningParserManager.get_reasoning_parser(self.reasoning_parser) - architectures = ModelConfig({"model": self.model_name_or_path}).architectures[0] + model_conf = ModelConfig({"model": self.model_name_or_path}) + architectures = model_conf.architectures[0] if not self.enable_mm: if not ErnieArchitectures.contains_ernie_arch(architectures): from fastdeploy.input.text_processor import DataProcessor From a7173e09a189314fd199039ee9f28ab519d9f7d6 Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Fri, 15 Aug 2025 12:34:00 +0800 Subject: [PATCH 07/16] add qwen25-vl processor --- fastdeploy/input/ernie_vl_processor.py | 48 +- fastdeploy/input/mm_processor/__init__.py | 3 +- .../image_preprocessor/__init__.py | 0 .../get_image_preprocessor.py | 0 .../image_preprocessor_adaptive.py | 0 fastdeploy/input/mm_processor/process.py | 454 +++++++++---- .../input/mm_processor/process_video.py | 269 ++++---- .../mm_processor/tokenizer/__init__.py | 0 .../mm_processor/tokenizer/tokenizer_vl.py | 0 .../mm_processor/utils/Roboto-Regular.ttf | Bin .../mm_processor/utils/__init__.py | 0 .../mm_processor/utils/io_utils.py | 0 .../mm_processor/utils/render_timestamp.py | 0 .../mm_processor/utils/video_utils.py | 0 fastdeploy/input/preprocess.py | 21 +- .../qwen_mm_processor}/__init__.py | 3 +- .../image_processor.py | 0 fastdeploy/input/qwen_mm_processor/process.py | 340 ++++++++++ .../input/qwen_mm_processor/process_video.py | 150 +++++ .../qwen_vl_processor.py} | 51 +- fastdeploy/input2/__init__.py | 15 - fastdeploy/input2/ernie_processor.py | 425 ------------- fastdeploy/input2/ernie_tokenizer.py | 394 ------------ fastdeploy/input2/mm_processor/process.py | 512 --------------- .../input2/mm_processor/process_video.py | 205 ------ fastdeploy/input2/preprocess.py | 101 --- fastdeploy/input2/text_processor.py | 602 ------------------ 27 files changed, 1034 insertions(+), 2559 deletions(-) rename fastdeploy/{input2 => input}/mm_processor/image_preprocessor/__init__.py (100%) rename fastdeploy/{input2 => input}/mm_processor/image_preprocessor/get_image_preprocessor.py (100%) rename fastdeploy/{input2 => input}/mm_processor/image_preprocessor/image_preprocessor_adaptive.py (100%) rename fastdeploy/{input2 => input}/mm_processor/tokenizer/__init__.py (100%) rename fastdeploy/{input2 => input}/mm_processor/tokenizer/tokenizer_vl.py (100%) rename fastdeploy/{input2 => input}/mm_processor/utils/Roboto-Regular.ttf (100%) rename fastdeploy/{input2 => input}/mm_processor/utils/__init__.py (100%) rename fastdeploy/{input2 => input}/mm_processor/utils/io_utils.py (100%) rename fastdeploy/{input2 => input}/mm_processor/utils/render_timestamp.py (100%) rename fastdeploy/{input2 => input}/mm_processor/utils/video_utils.py (100%) rename fastdeploy/{input2/mm_processor => input/qwen_mm_processor}/__init__.py (89%) rename fastdeploy/input/{mm_processor => qwen_mm_processor}/image_processor.py (100%) create mode 100644 fastdeploy/input/qwen_mm_processor/process.py create mode 100644 fastdeploy/input/qwen_mm_processor/process_video.py rename fastdeploy/{input2/ernie_vl_processor.py => input/qwen_vl_processor.py} (90%) delete mode 100644 fastdeploy/input2/__init__.py delete mode 100644 fastdeploy/input2/ernie_processor.py delete mode 100644 fastdeploy/input2/ernie_tokenizer.py delete mode 100644 fastdeploy/input2/mm_processor/process.py delete mode 100644 fastdeploy/input2/mm_processor/process_video.py delete mode 100644 fastdeploy/input2/preprocess.py delete mode 100644 fastdeploy/input2/text_processor.py diff --git a/fastdeploy/input/ernie_vl_processor.py b/fastdeploy/input/ernie_vl_processor.py index c4ed4520be..365d93f481 100644 --- a/fastdeploy/input/ernie_vl_processor.py +++ b/fastdeploy/input/ernie_vl_processor.py @@ -21,7 +21,6 @@ from fastdeploy.input.ernie_processor import ErnieProcessor from fastdeploy.input.mm_processor import IDS_TYPE_FLAG, DataProcessor from fastdeploy.utils import data_processor_logger -from fastdeploy.engine.config import ModelConfig class ErnieMoEVLProcessor(ErnieProcessor): @@ -30,22 +29,26 @@ class ErnieMoEVLProcessor(ErnieProcessor): def __init__( self, model_name_or_path, - model_conf = None, limit_mm_per_prompt=None, mm_processor_kwargs=None, reasoning_parser_obj=None, ): data_processor_logger.info(f"model_name_or_path: {model_name_or_path}") + tokenizer_path = model_name_or_path + preprocessor_path = model_name_or_path processor_kwargs = self._parse_processor_kwargs(mm_processor_kwargs) - model_conf = ModelConfig({"model": model_name_or_path}) - self.ernie_processor = DataProcessor( - model_path=model_name_or_path, + tokenizer_name=tokenizer_path, + image_preprocessor_name=preprocessor_path, **processor_kwargs, ) - self._load_tokenizer() + self.ernie_processor.eval() + self.image_patch_id = self.ernie_processor.image_patch_id + self.spatial_conv_size = self.ernie_processor.spatial_conv_size + self.decode_status = dict() + self._load_tokenizer() # Generation config try: @@ -56,6 +59,7 @@ def __init__( ) self.generation_config = None + # self.eos_token_ids = [self.tokenizer.eos_token_id] from paddleformers.trl.llm_utils import get_eos_token_id self.eos_token_ids = get_eos_token_id(self.tokenizer, self.generation_config) @@ -103,10 +107,11 @@ def set_value(req, key, value): def process_request(self, request, max_model_len=None, **kwargs): """process the input data""" task = request.to_dict() - task["enable_thinking"] = kwargs.get("enable_thinking", False) + task["enable_thinking"] = kwargs.get("enable_thinking", True) self.process_request_dict(task, max_model_len) request = Request.from_dict(task) request = self._apply_default_parameters(request) + return request def _parse_processor_kwargs(self, kwargs): @@ -121,8 +126,17 @@ def _parse_processor_kwargs(self, kwargs): # 验证参数类型 data_processor_logger.info(f"kwargs:{kwargs}") expected_types = { + "spatial_conv_size": int, + "temporal_conv_size": int, + "image_min_pixels": int, + "image_max_pixels": int, + "video_min_pixels": int, + "video_max_pixels": int, + "video_target_frames": int, + "video_frames_sample": str, "video_max_frames": int, "video_min_frames": int, + "video_fps": int, } for key, value in kwargs.items(): @@ -244,13 +258,11 @@ def pack_outputs(self, outs): outs["grid_thw"] = np.vstack(outs["grid_thw"]) outs["image_type_ids"] = np.array(outs["image_type_ids"]) - outs["image_patch_id"] = self.ernie_processor.image_token_id - outs["video_patch_id"] = self.ernie_processor.video_token_id - + outs["image_patch_id"] = self.image_patch_id # Convert lists to arrays outs["input_ids"] = np.array(outs["input_ids"], dtype=np.int64) outs["token_type_ids"] = np.array(outs["token_type_ids"], dtype=np.int64) - outs["position_ids"] = np.concatenate(outs["position_ids"], axis=1) + outs["position_ids"] = np.concatenate(outs["position_ids"], axis=0) return outs def process_response_dict(self, response_dict, stream, **kwargs): @@ -270,17 +282,3 @@ def process_response_dict(self, response_dict, stream, **kwargs): return self.process_response_dict_streaming(response_dict, enable_thinking=enable_thinking, **kwargs) else: return self.process_response_dict_normal(response_dict, enable_thinking=enable_thinking, **kwargs) - - def update_stop_seq(self, stop_sequences): - """ - Update stop sequences from request. - """ - stop_seqs = [] - if isinstance(stop_sequences, str): - stop_sequences = [stop_sequences] - for seq in stop_sequences: - if seq != self.tokenizer.eos_token_id: - stop_seqs.append(self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(seq))) - stop_seqs, stop_seqs_len = self.pad_batch_data(stop_seqs, pad_id=-1, return_seq_len=True, return_array=False) - data_processor_logger.debug(f"processed stop_seqs: {stop_seqs}, {stop_seqs_len}") - return stop_seqs, stop_seqs_len \ No newline at end of file diff --git a/fastdeploy/input/mm_processor/__init__.py b/fastdeploy/input/mm_processor/__init__.py index 5a97e41863..ba59bc1654 100644 --- a/fastdeploy/input/mm_processor/__init__.py +++ b/fastdeploy/input/mm_processor/__init__.py @@ -14,9 +14,10 @@ # limitations under the License. """ -from .process import IDS_TYPE_FLAG, DataProcessor +from .process import IDS_TYPE_FLAG, DataProcessor, fancy_print __all__ = [ "DataProcessor", + "fancy_print", "IDS_TYPE_FLAG", ] diff --git a/fastdeploy/input2/mm_processor/image_preprocessor/__init__.py b/fastdeploy/input/mm_processor/image_preprocessor/__init__.py similarity index 100% rename from fastdeploy/input2/mm_processor/image_preprocessor/__init__.py rename to fastdeploy/input/mm_processor/image_preprocessor/__init__.py diff --git a/fastdeploy/input2/mm_processor/image_preprocessor/get_image_preprocessor.py b/fastdeploy/input/mm_processor/image_preprocessor/get_image_preprocessor.py similarity index 100% rename from fastdeploy/input2/mm_processor/image_preprocessor/get_image_preprocessor.py rename to fastdeploy/input/mm_processor/image_preprocessor/get_image_preprocessor.py diff --git a/fastdeploy/input2/mm_processor/image_preprocessor/image_preprocessor_adaptive.py b/fastdeploy/input/mm_processor/image_preprocessor/image_preprocessor_adaptive.py similarity index 100% rename from fastdeploy/input2/mm_processor/image_preprocessor/image_preprocessor_adaptive.py rename to fastdeploy/input/mm_processor/image_preprocessor/image_preprocessor_adaptive.py diff --git a/fastdeploy/input/mm_processor/process.py b/fastdeploy/input/mm_processor/process.py index 4bcc7e3c06..ea2559a0fe 100644 --- a/fastdeploy/input/mm_processor/process.py +++ b/fastdeploy/input/mm_processor/process.py @@ -16,57 +16,127 @@ """ """ process.py """ +import copy +import os +from collections import defaultdict from typing import Any, Dict, List, Union + import numpy as np +from paddleformers.transformers.image_utils import ChannelDimension from PIL import Image -from paddleformers.transformers import AutoTokenizer + from fastdeploy.entrypoints.chat_utils import parse_chat_messages +from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer from fastdeploy.utils import data_processor_logger -from .image_processor import ImageProcessor -from .process_video import read_video_decord, sample_frames +from .image_preprocessor.image_preprocessor_adaptive import AdaptiveImageProcessor +from .process_video import read_frames_decord, read_video_decord +from .utils.render_timestamp import render_frame_timestamp IDS_TYPE_FLAG = {"text": 0, "image": 1, "video": 2, "audio": 3} +def fancy_print(input_ids, tokenizer, image_patch_id=None): + """ + input_ids: input_ids + tokenizer: the tokenizer of models + """ + i = 0 + res = "" + text_ids = [] + real_image_token_len = 0 + while i < len(input_ids): + if input_ids[i] == image_patch_id: + if len(text_ids) > 0: + res += tokenizer.decode(text_ids) + text_ids = [] + + real_image_token_len += 1 + else: + if real_image_token_len != 0: + res += f"<|IMAGE@{real_image_token_len}|>" + real_image_token_len = 0 + + text_ids.append(input_ids[i]) + + i += 1 + if len(text_ids) > 0: + + res += tokenizer.decode(text_ids) + text_ids = [] + return res + + class DataProcessor: """ Processes multimodal chat messages into model-ready inputs, handling text, images, and videos with 3D positional embeddings. """ + CLS_TOKEN = "<|begin_of_sentence|>" + SEP_TOKEN = "<|end_of_sentence|>" + EOS_TOKEN = "" + IMG_START = "<|IMAGE_START|>" + IMG_END = "<|IMAGE_END|>" + VID_START = "<|VIDEO_START|>" + VID_END = "<|VIDEO_END|>" + def __init__( self, - model_path: str, - video_min_frames: int = 4, - video_max_frames: int = 768, - tokens_per_second: int = 2, + tokenizer_name: str, + image_preprocessor_name: str, + spatial_conv_size: int = 2, + temporal_conv_size: int = 2, + image_min_pixels: int = 4 * 28 * 28, + image_max_pixels: int = 6177 * 28 * 28, + video_min_pixels: int = 299 * 28 * 28, + video_max_pixels: int = 1196 * 28 * 28, + video_target_frames: int = -1, + video_frames_sample: str = "leading", + video_max_frames: int = 180, + video_min_frames: int = 16, + video_fps: int = 2, **kwargs, ) -> None: - self.min_frames = video_min_frames - self.max_frames = video_max_frames - # Tokenizer and image preprocessor - self.tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="left", use_fast=True) + self.model_name_or_path = tokenizer_name + self._load_tokenizer() self.tokenizer.ignored_index = -100 - self.image_processor = ImageProcessor.from_pretrained(model_path) + self.image_preprocessor = AdaptiveImageProcessor.from_pretrained(image_preprocessor_name) # Convolution sizes for patch aggregation - self.spatial_conv_size = self.image_processor.merge_size - self.temporal_conv_size = self.image_processor.temporal_patch_size + self.spatial_conv_size = spatial_conv_size + self.temporal_conv_size = temporal_conv_size + + # Pixel constraints + self.image_min_pixels = image_min_pixels + self.image_max_pixels = image_max_pixels + self.video_min_pixels = video_min_pixels + self.video_max_pixels = video_max_pixels + + # Video sampling parameters + self.target_frames = video_target_frames + self.frames_sample = video_frames_sample + self.max_frames = video_max_frames + self.min_frames = video_min_frames + self.fps = video_fps # Special tokens and IDs - self.image_token = "<|image_pad|>" - self.video_token = "<|video_pad|>" - - self.image_token_id = self.tokenizer.convert_tokens_to_ids(self.image_token) - self.video_token_id = self.tokenizer.convert_tokens_to_ids(self.video_token) - - self.vision_start = "<|vision_start|>" - self.vision_start_id = self.tokenizer.convert_tokens_to_ids(self.vision_start) - - self.tokens_per_second = tokens_per_second - + self.cls_token = self.CLS_TOKEN + self.sep_token = self.SEP_TOKEN + self.eos_token = self.EOS_TOKEN + self.image_start = self.IMG_START + self.image_end = self.IMG_END + self.video_start = self.VID_START + self.video_end = self.VID_END + self.image_patch_id = self.tokenizer.convert_tokens_to_ids("<|IMAGE_PLACEHOLDER|>") + self.image_start_id = self.tokenizer.convert_tokens_to_ids(self.image_start) + self.video_start_id = self.tokenizer.convert_tokens_to_ids(self.video_start) + self.sep_token_id = self.tokenizer.convert_tokens_to_ids(self.sep_token) + self.eos_token_id = self.tokenizer.convert_tokens_to_ids(self.eos_token) + + self.token_type_mapping = self._build_token_type_mapping() + self.is_training = True self.role_prefixes = { "system": "", "user": "User: ", @@ -74,6 +144,26 @@ def __init__( "assistant": "Assistant: ", } + def _build_token_type_mapping(self) -> Dict[Any, int]: + mapping = defaultdict(lambda: IDS_TYPE_FLAG["text"]) + for token in ( + self.IMG_START, + self.IMG_END, + self.VID_START, + self.VID_END, + ): + mapping[token] = IDS_TYPE_FLAG["image"] + mapping[self.image_patch_id] = IDS_TYPE_FLAG["image"] + return mapping + + def train(self) -> None: + """Enable training mode (produces labels).""" + self.is_training = True + + def eval(self) -> None: + """Enable evaluation mode (doesn't produce labels).""" + self.is_training = False + def text2ids(self, text, images=None, videos=None): """ Convert chat text into model inputs. @@ -161,151 +251,235 @@ def request2ids( "video", ]: image_message_list.append(item) - request["messages"] = messages prompt_token_ids = self.apply_chat_template(request) if len(prompt_token_ids) == 0: raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") - - vision_start_index = 0 - vision_message_index = 0 + image_start_index = 0 + image_message_index = 0 for i in range(len(prompt_token_ids)): - if prompt_token_ids[i] == self.vision_start_id : - self._add_text(prompt_token_ids[vision_start_index : i + 1], outputs) - - vision_start_index = i + 1 - image_message = image_message_list[vision_message_index] - + if prompt_token_ids[i] in [ + self.image_start_id, + self.video_start_id, + ]: + self._add_text(prompt_token_ids[image_start_index : i + 1], outputs) + image_start_index = i + 1 + image_message = image_message_list[image_message_index] if image_message["type"] == "image": img = image_message.get("image") if img is None: continue outputs["pic_cnt"] += 1 self._add_image(img, outputs) - elif image_message["type"] == "video": video_bytes = image_message.get("video") if video_bytes is None: continue - frames, meta = self._load_and_process_video(video_bytes, image_message) - # ----------- - # mm_parser = MultiModalPartParser() - # fimg = mm_parser.parse_image("file:///home/liudongdong/github/FastDeploy/data/images/demo.jpeg") - # for i in range(len(frames)): - # frames[i] = fimg.copy() - + frames = self._load_and_process_video(video_bytes, image_message) outputs["video_cnt"] += 1 - self._add_video(frames, meta, outputs) + self._add_video(frames, outputs) + image_message_index += 1 + self._add_text(prompt_token_ids[image_start_index:], outputs) - vision_message_index += 1 - - self._add_text(prompt_token_ids[vision_start_index:], outputs) + if self.is_training: + assert tgts, "training must give tgt !" + self._extract_labels(outputs, tgts) return outputs + def _add_special_token(self, token: Union[str, int], outputs: Dict) -> None: + token_id = token if isinstance(token, int) else self.tokenizer.convert_tokens_to_ids(token) + outputs["input_ids"].append(token_id) + outputs["token_type_ids"].append(self.token_type_mapping[token]) + pos = outputs["cur_position"] + outputs["position_ids"].append([pos] * 3) + outputs["cur_position"] += 1 + def _add_text(self, tokens, outputs: Dict) -> None: if isinstance(tokens, str): tokens = self.tokenizer.encode(tokens, add_special_tokens=False)["input_ids"] - outputs["input_ids"].extend(tokens) outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]] * len(tokens)) - position_ids = self._compute_1d_positions(outputs["cur_position"], len(tokens)) - outputs["position_ids"].append(position_ids) - outputs["cur_position"] = position_ids.max() + 1 - - def _compute_1d_positions(self, start_pos: int, num_tokens: int) -> np.ndarray: - text_array = np.arange(num_tokens).reshape(1, -1) - text_index = np.broadcast_to(text_array, (3, num_tokens)) - position = text_index + start_pos - return position + start = outputs["cur_position"] + for i in range(len(tokens)): + outputs["position_ids"].append([start + i] * 3) + outputs["cur_position"] += len(tokens) def _add_image(self, img, outputs: Dict) -> None: - ret = self.image_processor.preprocess(images=[img.convert("RGB")]) - num_tokens = ret["grid_thw"].prod() // self.image_processor.merge_size**2 - grid_thw = ret["grid_thw"].tolist() - - outputs["input_ids"].extend([self.image_token_id] * num_tokens) + patches_h, patches_w = self.image_preprocessor.get_smarted_resize( + img.height, + img.width, + min_pixels=self.image_min_pixels, + max_pixels=self.image_max_pixels, + )[1] + num_tokens = (patches_h * patches_w) // (self.spatial_conv_size**2) + + outputs["input_ids"].extend([self.image_patch_id] * num_tokens) outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]] * num_tokens) + pos_ids = self._compute_3d_positions(1, patches_h, patches_w, outputs["cur_position"]) + outputs["position_ids"].extend(pos_ids) + outputs["cur_position"] = np.max(pos_ids) + 1 + + # Preprocess pixels + ret = self.image_preprocessor.preprocess( + images=[img.convert("RGB")], + do_normalize=False, + do_rescale=False, + predetermined_grid_thw=np.array([[patches_h, patches_w]]), + do_convert_rgb=True, + input_data_format=ChannelDimension.LAST, + ) outputs["images"].append(ret["pixel_values"]) - outputs["grid_thw"].append(grid_thw) + outputs["grid_thw"].append(ret["image_grid_thw"]) outputs["image_type_ids"].append(0) - t, h, w = grid_thw - position_ids = self._compute_3d_positions2(outputs["cur_position"], t,h,w, 0) - - outputs["position_ids"].append(position_ids) - outputs["cur_position"] = position_ids.max() + 1 + def _add_video(self, frames, outputs: Dict) -> None: + patches_h, patches_w = self.image_preprocessor.get_smarted_resize( + frames[0].height, + frames[0].width, + min_pixels=self.video_min_pixels, + max_pixels=self.video_max_pixels, + )[1] + num_frames = len(frames) + num_tokens = (num_frames * patches_h * patches_w) // (self.spatial_conv_size**2 * self.temporal_conv_size) + + pixel_stack = np.stack([np.array(f.convert("RGB")) for f in frames], axis=0) + ret = self.image_preprocessor.preprocess( + images=None, + videos=pixel_stack, + do_normalize=False, + do_rescale=False, + predetermined_grid_thw=np.array([[patches_h, patches_w]] * num_frames), + do_convert_rgb=True, + input_data_format=ChannelDimension.LAST, + ) + outputs["images"].append(ret["pixel_values_videos"]) + outputs["grid_thw"].append(ret["video_grid_thw"]) + outputs["image_type_ids"].extend([1] * num_frames) - def _add_video(self, frames, meta: Dict, outputs: Dict) -> None: - ret = self.image_processor.preprocess(images=frames) + outputs["input_ids"].extend([self.image_patch_id] * num_tokens) + outputs["token_type_ids"].extend([IDS_TYPE_FLAG["video"]] * num_tokens) - num_tokens = ret["grid_thw"].prod() // self.image_processor.merge_size**2 - grid_thw = ret["grid_thw"].tolist() + pos_ids = self._compute_3d_positions(num_frames, patches_h, patches_w, outputs["cur_position"]) + outputs["position_ids"].extend(pos_ids) + outputs["cur_position"] = np.max(pos_ids) + 1 + + def _extract_labels(self, outputs: Dict, tgts: List[str]) -> None: + input_ids = copy.deepcopy(outputs["input_ids"]) + labels = [self.tokenizer.ignored_index] * len(input_ids) + + tgt_count = input_ids.count(self.sep_token_id) + assert tgt_count == len(tgts), f"len(tgts) != len(src) {len(tgts)} vs {tgt_count}" + + tgt_index = 0 + for i, token_id in enumerate(input_ids): + if token_id == self.sep_token_id: + labels_token = self.tokenizer.tokenize(tgts[tgt_index]) + labels_token_id = self.tokenizer.convert_tokens_to_ids(labels_token) + labels[i - len(labels_token_id) : i] = labels_token_id + labels[i] = self.eos_token_id # + tgt_index += 1 + + outputs["labels"] = labels + + def _load_and_process_video(self, url: str, item: Dict) -> List[Image.Image]: + reader, meta, path = read_video_decord(url, save_to_disk=False) + + video_frame_args = dict() + video_frame_args["fps"] = item.get("fps", self.fps) + video_frame_args["min_frames"] = item.get("min_frames", self.min_frames) + video_frame_args["max_frames"] = item.get("max_frames", self.max_frames) + video_frame_args["target_frames"] = item.get("target_frames", self.target_frames) + video_frame_args["frames_sample"] = item.get("frames_sample", self.frames_sample) + + video_frame_args = self._set_video_frame_args(video_frame_args, meta) + + frames_data, _, timestamps = read_frames_decord( + path, + reader, + meta, + target_frames=video_frame_args["target_frames"], + target_fps=video_frame_args["fps"], + frames_sample=video_frame_args["frames_sample"], + save_to_disk=False, + ) - outputs["input_ids"].extend([self.video_token_id] * num_tokens) - outputs["token_type_ids"].extend([IDS_TYPE_FLAG["video"]] * num_tokens) + frames: List[Image.Image] = [] + for img_array, ts in zip(frames_data, timestamps): + frames.append(render_frame_timestamp(img_array, ts)) + # Ensure even number of frames for temporal conv + if len(frames) % 2 != 0: + frames.append(copy.deepcopy(frames[-1])) + return frames - outputs["images"].append(ret["pixel_values"]) - outputs["grid_thw"].append(grid_thw) - outputs["image_type_ids"].extend([1] * grid_thw[0]) - - fps = meta["fps"] - second_per_grid_t = self.temporal_conv_size / fps - t, h, w = grid_thw - position_ids = self._compute_3d_positions2(outputs["cur_position"], t,h,w, second_per_grid_t) - - outputs["position_ids"].append(position_ids) - outputs["cur_position"] = position_ids.max() + 1 - - def _compute_3d_positions2(self, start_pos: int, t: int, h: int, w: int, second_per_grid_t:float) -> np.ndarray: - h //= self.spatial_conv_size - w //= self.spatial_conv_size - - tn = np.arange(t).reshape(-1, 1) - tn = np.broadcast_to(tn, (t, h * w)) - tn = tn * second_per_grid_t * self.tokens_per_second - t_index = tn.flatten() - - hn = np.arange(h).reshape(1, -1, 1) - h_index = np.broadcast_to(hn, (t, h, w)).flatten() - - wn = np.arange(w).reshape(1, 1, -1) - w_index = np.broadcast_to(wn, (t, h, w)).flatten() - - position = np.stack([t_index, h_index, w_index]) + start_pos - return position - - def _load_and_process_video(self, url: str, item: Dict) -> np.ndarray: - reader, meta = read_video_decord(url) - - frames = [] - for i in range(meta["num_of_frame"]): - frame = reader[i].asnumpy() - image = Image.fromarray(frame, "RGB") - frames.append(image) - frames = np.stack([np.array(f.convert("RGB")) for f in frames], axis=0) - - fps = item.get("fps", None) - num_frames = item.get("target_frames", None) - if fps is not None or num_frames is not None: - min_frames = item.get("min_frames", self.min_frames) - max_frames = item.get("max_frames", self.max_frames) - frames = sample_frames(video=frames, - frame_factor=self.temporal_conv_size, - min_frames=min_frames, - max_frames=max_frames, - metadata=meta, - fps=fps, - num_frames=num_frames) - - meta["num_of_frame"] = frames.shape[0] - if fps is not None: - meta["fps"] = fps - else: - meta["fps"] = frames.shape[0] / meta["duration"] + def _set_video_frame_args(self, video_frame_args, video_meta): + """ + 根据已知参数和优先级,设定最终的抽帧参数 + """ + # 优先级:video_target_frames > (video_min_frames, video_max_frames) > video_fps + if video_frame_args["target_frames"] > 0: + if video_frame_args["fps"] >= 0: + raise ValueError("fps must be negative if target_frames is given") + if ( + video_frame_args["min_frames"] > 0 + and video_frame_args["target_frames"] < video_frame_args["min_frames"] + ): + raise ValueError("target_frames must be larger than min_frames") + if ( + video_frame_args["max_frames"] > 0 + and video_frame_args["target_frames"] > video_frame_args["max_frames"] + ): + raise ValueError("target_frames must be smaller than max_frames") + else: + if video_frame_args["fps"] < 0: + raise ValueError("Must provide either positive target_fps or positive target_frames.") + # 先计算在video_fps下抽到的帧数 + frames_to_extract = int(video_meta["duration"] * video_frame_args["fps"]) + # 判断是否在目标区间内,如果不是,则取target_frames为上界或下界 + if ( + video_frame_args["min_frames"] > 0 + and video_frame_args["max_frames"] > 0 + and video_frame_args["min_frames"] > video_frame_args["max_frames"] + ): + raise ValueError("min_frames must be smaller than max_frames") + if video_frame_args["min_frames"] > 0 and frames_to_extract < video_frame_args["min_frames"]: + video_frame_args["target_frames"] = video_frame_args["min_frames"] + video_frame_args["fps"] = -1 + if video_frame_args["max_frames"] > 0 and frames_to_extract > video_frame_args["max_frames"]: + video_frame_args["target_frames"] = video_frame_args["max_frames"] + video_frame_args["fps"] = -1 + + return video_frame_args + + def _compute_3d_positions(self, t: int, h: int, w: int, start_idx: int) -> List[List[int]]: + # Downsample time if needed + t_eff = t // self.temporal_conv_size if t != 1 else 1 + gh, gw = h // self.spatial_conv_size, w // self.spatial_conv_size + time_idx = np.repeat(np.arange(t_eff), gh * gw) + h_idx = np.tile(np.repeat(np.arange(gh), gw), t_eff) + w_idx = np.tile(np.arange(gw), t_eff * gh) + + coords = list(zip(time_idx, h_idx, w_idx)) + return [[start_idx + ti, start_idx + hi, start_idx + wi] for ti, hi, wi in coords] + + def _load_tokenizer(self): + """ + load tokenizer - return frames, meta + Returns: + tokenizer (AutoTokenizer) + """ + vocab_file_names = [ + "tokenizer.model", + "spm.model", + "ernie_token_100k.model", + ] + for i in range(len(vocab_file_names)): + if os.path.exists(os.path.join(self.model_name_or_path, vocab_file_names[i])): + ErnieBotTokenizer.resource_files_names["vocab_file"] = vocab_file_names[i] + break + self.tokenizer = ErnieBotTokenizer.from_pretrained(self.model_name_or_path) def apply_chat_template(self, request): """ @@ -321,13 +495,15 @@ def apply_chat_template(self, request): if self.tokenizer.chat_template is None: raise ValueError("This model does not support chat_template.") - prompt_token_str = self.tokenizer.apply_chat_template( - request["messages"], - tokenize=False, - add_generation_prompt=request.get("add_generation_prompt", True), + prompt_token_str = ( + self.tokenizer.apply_chat_template( + request, + tokenize=False, + add_generation_prompt=request.get("add_generation_prompt", True), + ) + .replace("<|image@placeholder|>", "") + .replace("<|video@placeholder|>", "") ) - prompt_token_str = prompt_token_str.replace(self.image_token, "").replace(self.video_token, "") - tokens = self.tokenizer.tokenize(prompt_token_str) token_ids = self.tokenizer.convert_tokens_to_ids(tokens) data_processor_logger.info( diff --git a/fastdeploy/input/mm_processor/process_video.py b/fastdeploy/input/mm_processor/process_video.py index 9dd79cf97f..91120096c7 100644 --- a/fastdeploy/input/mm_processor/process_video.py +++ b/fastdeploy/input/mm_processor/process_video.py @@ -16,135 +16,190 @@ import io import os -import decord -from tempfile import NamedTemporaryFile as ntf -from typing import Union, Optional -import numpy as np -import math +import random -try: - # moviepy 1.0 - import moviepy.editor as mp -except: - # moviepy 2.0 - import moviepy as mp +import numpy as np +from PIL import Image from fastdeploy.utils import data_processor_logger +from .utils.io_utils import EXTRACTED_FRAME_DIR, get_filename +from .utils.video_utils import VideoReaderWrapper -def is_gif(data: bytes) -> bool: - """ - check if a bytes is a gif based on the magic head - """ - return data[:6] in (b"GIF87a", b"GIF89a") - - -class VideoReaderWrapper(decord.VideoReader): - """ - Solving memory leak bug - - https://github.com/dmlc/decord/issues/208 - """ - def __init__(self, video_path, *args, **kwargs): - with ntf(delete=True, suffix=".gif") as gif_file: - gif_input = None - self.original_file = None - if isinstance(video_path, str): - self.original_file = video_path - if video_path.lower().endswith(".gif"): - gif_input = video_path - elif isinstance(video_path, bytes): - if is_gif(video_path): - gif_file.write(video_path) - gif_input = gif_file.name - elif isinstance(video_path, io.BytesIO): - video_path.seek(0) - tmp_bytes = video_path.read() - video_path.seek(0) - if is_gif(tmp_bytes): - gif_file.write(tmp_bytes) - gif_input = gif_file.name - - if gif_input is not None: - clip = mp.VideoFileClip(gif_input) - mp4_file = ntf(delete=False, suffix=".mp4") - clip.write_videofile(mp4_file.name, verbose=False, logger=None) - clip.close() - video_path = mp4_file.name - self.original_file = video_path - - super().__init__(video_path, *args, **kwargs) - self.seek(0) - - def __getitem__(self, key): - frames = super().__getitem__(key) - self.seek(0) - return frames - - def __del__(self): - if self.original_file and os.path.exists(self.original_file): - os.remove(self.original_file) - - -def read_video_decord(video_path): +def read_video_decord(video_path, save_to_disk): """get reader and meta by decord""" + # video_path = get_downloadable(video_path, save_to_disk=save_to_disk) if isinstance(video_path, VideoReaderWrapper): video_reader = video_path else: if isinstance(video_path, bytes): video_path = io.BytesIO(video_path) video_reader = VideoReaderWrapper(video_path, num_threads=1) - vlen = len(video_reader) fps = video_reader.get_avg_fps() duration = vlen / float(fps) video_meta = {"fps": fps, "duration": duration, "num_of_frame": vlen} - return video_reader, video_meta + return video_reader, video_meta, video_path -def sample_frames( - video: np.ndarray, - frame_factor: int, - min_frames: int, - max_frames: int, - metadata: Optional[dict] = None, - fps: Optional[Union[int, float]] = None, - num_frames: Optional[int] = None, + +def get_frame_indices( + vlen, + target_frames=-1, + target_fps=-1, + frames_sample="middle", + fix_start=None, + input_fps=-1, ): - if fps is not None and num_frames is not None: - raise ValueError("`num_frames` and `fps` are mutually exclusive arguments, please use only one!") - - if fps is None and num_frames is None: - return video - - total_num_frames = video.shape[0] - - # If num_frames is not given but fps is, calculate num_frames from fps - if num_frames is not None: - num_frames = round(num_frames / frame_factor) * frame_factor - elif fps is not None: - if metadata is None: - raise ValueError( - "Asked to sample `fps` frames per second but no video metadata was provided which is required when sampling with `fps`. " - "Please pass in `VideoMetadata` object or use a fixed `num_frames` per input video" + """ + 取出对应的frame index + """ + assert frames_sample in ["rand", "middle", "leading"] + if target_frames > 0: + assert target_fps <= 0, "target_fps must be negative if target_frames is given." + if target_frames > vlen: + acc_samples = vlen + data_processor_logger.info( + f"target_frames={target_frames} is larger than video length {vlen}, " + f"will sample {acc_samples} frames." ) - max_frames = math.floor(min(max_frames, total_num_frames) / frame_factor) * frame_factor - num_frames = total_num_frames / metadata["fps"] * fps - num_frames = min(min(max(num_frames, min_frames), max_frames), total_num_frames) - num_frames = math.floor(num_frames / frame_factor) * frame_factor - - if num_frames > total_num_frames: - raise ValueError( - f"Video can't be sampled. The inferred `num_frames={num_frames}` exceeds `total_num_frames={total_num_frames}`. " - "Decrease `num_frames` or `fps` for sampling." - ) + else: + acc_samples = target_frames + data_processor_logger.debug(f"sampling at target_frames={target_frames}, frames_sample={frames_sample}") + + # split the video into `acc_samples` intervals, and sample from each interval. + intervals = np.linspace(start=0, stop=vlen, num=acc_samples + 1).astype(int) + ranges = [] + for idx, interv in enumerate(intervals[:-1]): + ranges.append((interv, intervals[idx + 1] - 1)) + if frames_sample == "rand": + try: + frame_indices = [random.choice(range(x[0], x[1])) for x in ranges] + except Exception: + frame_indices = np.random.permutation(vlen)[:acc_samples] + frame_indices.sort() + frame_indices = list(frame_indices) + elif fix_start is not None: + frame_indices = [x[0] + fix_start for x in ranges] + elif frames_sample == "leading": + frame_indices = [x[0] for x in ranges] + elif frames_sample == "middle": + frame_indices = [(x[0] + x[1]) // 2 for x in ranges] + else: + raise NotImplementedError + + elif target_fps > 0: + assert target_frames <= 0, "target_frames must be negative if target_fps is given." + assert input_fps > 0, "input_fps must be provided if target_fps is given." + data_processor_logger.info(f"sampling at fps={target_fps}, frames_sample={frames_sample}") + duration = float(vlen) / input_fps + delta = 1 / target_fps # gap between frames, this is also the clip length each frame represents + if frames_sample == "middle": + frame_seconds = np.arange(0 + delta / 2, duration + delta / 2, delta) + elif frames_sample == "leading": + frame_seconds = np.arange(0, duration, delta) + if frames_sample == "rand": + frame_seconds = np.arange(0 + delta / 2, duration + delta / 2, delta) + rand_offset = np.random.rand(*(frame_seconds.shape)) - 0.5 + frame_seconds += rand_offset * delta + frame_indices = np.around(frame_seconds * input_fps).astype(int) + frame_indices = [e for e in frame_indices if e < vlen] - if num_frames is not None: - indices = np.arange(0, total_num_frames, total_num_frames / num_frames).astype("int") else: - indices = np.arange(0, total_num_frames).astype("int") - video = video[indices] + raise ValueError("Must provide either positive target_fps or positive target_frames.") + + return frame_indices + + +def read_frames_decord( + video_path, + video_reader, + video_meta, + target_frames=-1, + target_fps=-1, + frames_sample="middle", + fix_start=None, + save_to_disk=False, + cache_dir=EXTRACTED_FRAME_DIR, + frame_indices=None, + tol=10, +): + """get frames by decord""" + + if frame_indices is None: + frame_indices = get_frame_indices( + video_meta["num_of_frame"], + target_frames=target_frames, + target_fps=target_fps, + frames_sample=frames_sample, + fix_start=fix_start, + input_fps=video_meta["fps"], + ) - return video + frames = [] + for frame_indice_index in range(0, len(frame_indices)): + frame_indice = frame_indices[frame_indice_index] + try: + frames.append(video_reader[frame_indice].asnumpy()) # (T, H, W, C) + except Exception as e: + data_processor_logger.debug(f"encounter error when get frame: {frame_indice}, error: {e}") + previous_counter = 1 + later_counter = 1 + previous_after_flag = True + if frame_indice == 0 or frame_indice == len(video_reader) - 1: + cur_tol = tol * 2 + else: + cur_tol = tol + while previous_counter < cur_tol or later_counter < cur_tol: + if previous_after_flag: + if frame_indice - previous_counter < 0: + previous_counter += 1 + previous_after_flag = not previous_after_flag + continue + try: + frames.append(video_reader[frame_indice - previous_counter].asnumpy()) + data_processor_logger.info( + f"replace {frame_indice}-th frame with {frame_indice-previous_counter}-th frame" + ) + frame_indices[frame_indice_index] = frame_indice - previous_counter + break + except Exception as e: + previous_counter += 1 + data_processor_logger.info(f"error: {e}") + else: + if frame_indice + later_counter >= len(video_reader): + later_counter += 1 + previous_after_flag = not previous_after_flag + continue + try: + frames.append(video_reader[frame_indice + later_counter].asnumpy()) + data_processor_logger.info( + f"replace {frame_indice}-th frame with {frame_indice+later_counter}-th frame" + ) + frame_indices[frame_indice_index] = frame_indice + later_counter + break + except Exception: + later_counter += 1 + previous_after_flag = not previous_after_flag + + frames = np.stack(frames, axis=0) + assert len(frames) == len(frame_indices), f"len(frames): {len(frames)} != len(frame_indices): {len(frame_indices)}" + + ret = [] + + url_sha1 = get_filename() + for idx, frame in enumerate(frames): + tmp = Image.fromarray(frame, "RGB") + if save_to_disk: + save_path = os.path.join(cache_dir, f"{url_sha1}", f"{idx}.png") + if not os.path.exists(os.path.dirname(save_path)): + os.makedirs(os.path.dirname(save_path)) + tmp.save(save_path) + tmp = save_path + ret.append(tmp) + + time_stamps = [frame_idx * video_meta["duration"] / video_meta["num_of_frame"] for frame_idx in frame_indices] + + return ret, frame_indices, time_stamps diff --git a/fastdeploy/input2/mm_processor/tokenizer/__init__.py b/fastdeploy/input/mm_processor/tokenizer/__init__.py similarity index 100% rename from fastdeploy/input2/mm_processor/tokenizer/__init__.py rename to fastdeploy/input/mm_processor/tokenizer/__init__.py diff --git a/fastdeploy/input2/mm_processor/tokenizer/tokenizer_vl.py b/fastdeploy/input/mm_processor/tokenizer/tokenizer_vl.py similarity index 100% rename from fastdeploy/input2/mm_processor/tokenizer/tokenizer_vl.py rename to fastdeploy/input/mm_processor/tokenizer/tokenizer_vl.py diff --git a/fastdeploy/input2/mm_processor/utils/Roboto-Regular.ttf b/fastdeploy/input/mm_processor/utils/Roboto-Regular.ttf similarity index 100% rename from fastdeploy/input2/mm_processor/utils/Roboto-Regular.ttf rename to fastdeploy/input/mm_processor/utils/Roboto-Regular.ttf diff --git a/fastdeploy/input2/mm_processor/utils/__init__.py b/fastdeploy/input/mm_processor/utils/__init__.py similarity index 100% rename from fastdeploy/input2/mm_processor/utils/__init__.py rename to fastdeploy/input/mm_processor/utils/__init__.py diff --git a/fastdeploy/input2/mm_processor/utils/io_utils.py b/fastdeploy/input/mm_processor/utils/io_utils.py similarity index 100% rename from fastdeploy/input2/mm_processor/utils/io_utils.py rename to fastdeploy/input/mm_processor/utils/io_utils.py diff --git a/fastdeploy/input2/mm_processor/utils/render_timestamp.py b/fastdeploy/input/mm_processor/utils/render_timestamp.py similarity index 100% rename from fastdeploy/input2/mm_processor/utils/render_timestamp.py rename to fastdeploy/input/mm_processor/utils/render_timestamp.py diff --git a/fastdeploy/input2/mm_processor/utils/video_utils.py b/fastdeploy/input/mm_processor/utils/video_utils.py similarity index 100% rename from fastdeploy/input2/mm_processor/utils/video_utils.py rename to fastdeploy/input/mm_processor/utils/video_utils.py diff --git a/fastdeploy/input/preprocess.py b/fastdeploy/input/preprocess.py index e2ef518d23..ffef83c430 100644 --- a/fastdeploy/input/preprocess.py +++ b/fastdeploy/input/preprocess.py @@ -70,8 +70,10 @@ def create_processor(self): reasoning_parser_obj = None if self.reasoning_parser: reasoning_parser_obj = ReasoningParserManager.get_reasoning_parser(self.reasoning_parser) - model_conf = ModelConfig({"model": self.model_name_or_path}) - architectures = model_conf.architectures[0] + + config = ModelConfig({"model": self.model_name_or_path}) + architectures = config.architectures[0] + if not self.enable_mm: if not ErnieArchitectures.contains_ernie_arch(architectures): from fastdeploy.input.text_processor import DataProcessor @@ -88,9 +90,8 @@ def create_processor(self): reasoning_parser_obj=reasoning_parser_obj, ) else: - if not ErnieArchitectures.contains_ernie_arch(architectures): - raise ValueError(f"Model {self.model_name_or_path} is not a valid Ernie4_5_VL model.") - else: + if ErnieArchitectures.contains_ernie_arch(architectures): + # raise ValueError(f"Model {self.model_name_or_path} is not a valid Ernie4_5_VL model.") from fastdeploy.input.ernie_vl_processor import ErnieMoEVLProcessor self.processor = ErnieMoEVLProcessor( @@ -99,4 +100,14 @@ def create_processor(self): mm_processor_kwargs=self.mm_processor_kwargs, reasoning_parser_obj=reasoning_parser_obj, ) + else: + from fastdeploy.input.qwen_vl_processor import QwenVLProcessor + + self.processor = QwenVLProcessor( + config=config, + model_name_or_path=self.model_name_or_path, + limit_mm_per_prompt=self.limit_mm_per_prompt, + mm_processor_kwargs=self.mm_processor_kwargs, + reasoning_parser_obj=reasoning_parser_obj, + ) return self.processor diff --git a/fastdeploy/input2/mm_processor/__init__.py b/fastdeploy/input/qwen_mm_processor/__init__.py similarity index 89% rename from fastdeploy/input2/mm_processor/__init__.py rename to fastdeploy/input/qwen_mm_processor/__init__.py index ba59bc1654..7cc194dd68 100644 --- a/fastdeploy/input2/mm_processor/__init__.py +++ b/fastdeploy/input/qwen_mm_processor/__init__.py @@ -14,10 +14,9 @@ # limitations under the License. """ -from .process import IDS_TYPE_FLAG, DataProcessor, fancy_print +from .process import DataProcessor, IDS_TYPE_FLAG __all__ = [ "DataProcessor", - "fancy_print", "IDS_TYPE_FLAG", ] diff --git a/fastdeploy/input/mm_processor/image_processor.py b/fastdeploy/input/qwen_mm_processor/image_processor.py similarity index 100% rename from fastdeploy/input/mm_processor/image_processor.py rename to fastdeploy/input/qwen_mm_processor/image_processor.py diff --git a/fastdeploy/input/qwen_mm_processor/process.py b/fastdeploy/input/qwen_mm_processor/process.py new file mode 100644 index 0000000000..7909ae2105 --- /dev/null +++ b/fastdeploy/input/qwen_mm_processor/process.py @@ -0,0 +1,340 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +""" process.py """ +from typing import Any, Dict, List, Union +import numpy as np +from PIL import Image +from paddleformers.transformers import AutoTokenizer +from fastdeploy.entrypoints.chat_utils import parse_chat_messages +from fastdeploy.input.mm_processor import IDS_TYPE_FLAG +from fastdeploy.utils import data_processor_logger + +from .image_processor import ImageProcessor +from .process_video import read_video_decord, sample_frames + + +class DataProcessor: + """ + Processes multimodal chat messages into model-ready inputs, + handling text, images, and videos with 3D positional embeddings. + """ + + def __init__( + self, + model_path: str, + video_min_frames: int = 4, + video_max_frames: int = 768, + tokens_per_second: int = 2, + **kwargs, + ) -> None: + self.min_frames = video_min_frames + self.max_frames = video_max_frames + + # Tokenizer and image preprocessor + self.tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="left", use_fast=True) + self.tokenizer.ignored_index = -100 + self.image_processor = ImageProcessor.from_pretrained(model_path) + + # Convolution sizes for patch aggregation + self.spatial_conv_size = self.image_processor.merge_size + self.temporal_conv_size = self.image_processor.temporal_patch_size + + # Special tokens and IDs + self.image_token = "<|image_pad|>" + self.video_token = "<|video_pad|>" + + self.image_token_id = self.tokenizer.convert_tokens_to_ids(self.image_token) + self.video_token_id = self.tokenizer.convert_tokens_to_ids(self.video_token) + + self.vision_start = "<|vision_start|>" + self.vision_start_id = self.tokenizer.convert_tokens_to_ids(self.vision_start) + + self.tokens_per_second = tokens_per_second + + self.role_prefixes = { + "system": "", + "user": "User: ", + "bot": "Assistant: ", + "assistant": "Assistant: ", + } + + def text2ids(self, text, images=None, videos=None): + """ + Convert chat text into model inputs. + Returns a dict with input_ids, token_type_ids, position_ids, images, grid_thw, image_type_ids, labels. + """ + + outputs = { + "input_ids": [], + "token_type_ids": [], + "position_ids": [], + "images": [], + "grid_thw": [], + "image_type_ids": [], + "labels": [], + "cur_position": 0, + "pic_cnt": 0, + "video_cnt": 0, + } + + IMAGE_PLACEHOLDER = "<|image@placeholder|>" + VIDEO_PLACEHOLDER = "<|video@placeholder|>" + IMAGE_PLACEHOLDER_LEN = len(IMAGE_PLACEHOLDER) + VIDEO_PLACEHOLDER_LEN = len(VIDEO_PLACEHOLDER) + st, image_idx, video_idx = 0, 0, 0 + while st < len(text): + image_pos = text.find(IMAGE_PLACEHOLDER, st) + image_pos = len(text) if image_pos == -1 else image_pos + video_pos = text.find(VIDEO_PLACEHOLDER, st) + video_pos = len(text) if video_pos == -1 else video_pos + ed = min(image_pos, video_pos) + + self._add_text(text[st:ed], outputs) + if ed == len(text): + break + + if ed == image_pos: + self._add_image(images[image_idx], outputs) + image_idx += 1 + st = ed + IMAGE_PLACEHOLDER_LEN + else: + item = videos[video_idx] + if isinstance(item, dict): + frames = self._load_and_process_video(item["video"], item) + else: + frames = self._load_and_process_video(item, {}) + + self._add_video(frames, outputs) + video_idx += 1 + st = ed + VIDEO_PLACEHOLDER_LEN + + return outputs + + def request2ids( + self, request: Dict[str, Any], tgts: List[str] = None + ) -> Dict[str, Union[np.ndarray, List[np.ndarray], None]]: + """ + Convert chat messages into model inputs. + Returns a dict with input_ids, token_type_ids, position_ids, images, grid_thw, image_type_ids, labels. + """ + + outputs = { + "input_ids": [], + "token_type_ids": [], + "position_ids": [], + "images": [], + "grid_thw": [], + "image_type_ids": [], + "labels": [], + "cur_position": 0, + "pic_cnt": 0, + "video_cnt": 0, + } + + messages = parse_chat_messages(request.get("messages")) + image_message_list = [] + for msg in messages: + role = msg.get("role") + assert role in self.role_prefixes, f"Unsupported role: {role}" + content_items = msg.get("content") + if not isinstance(content_items, list): + content_items = [content_items] + for item in content_items: + if isinstance(item, dict) and item.get("type") in [ + "image", + "video", + ]: + image_message_list.append(item) + request["messages"] = messages + + prompt_token_ids = self.apply_chat_template(request) + if len(prompt_token_ids) == 0: + raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") + + vision_start_index = 0 + vision_message_index = 0 + for i in range(len(prompt_token_ids)): + if prompt_token_ids[i] == self.vision_start_id : + self._add_text(prompt_token_ids[vision_start_index : i + 1], outputs) + + vision_start_index = i + 1 + image_message = image_message_list[vision_message_index] + + if image_message["type"] == "image": + img = image_message.get("image") + if img is None: + continue + outputs["pic_cnt"] += 1 + self._add_image(img, outputs) + + elif image_message["type"] == "video": + video_bytes = image_message.get("video") + if video_bytes is None: + continue + frames, meta = self._load_and_process_video(video_bytes, image_message) + # ----------- + # from fastdeploy.entrypoints.chat_utils import MultiModalPartParser + # mock_frames = [] + # mm_parser = MultiModalPartParser() + # fimg = mm_parser.parse_image("file:///home/liudongdong/github/llm/data/images/demo.jpeg") + # for i in range(frames.shape[0]): + # mock_frames.append(fimg.copy()) + # mock_frames = np.stack([np.array(f.convert("RGB")) for f in mock_frames], axis=0) + # meta["fps"] = 3.0 + # frames = mock_frames + + outputs["video_cnt"] += 1 + self._add_video(frames, meta, outputs) + + vision_message_index += 1 + + self._add_text(prompt_token_ids[vision_start_index:], outputs) + return outputs + + def _add_text(self, tokens, outputs: Dict) -> None: + if isinstance(tokens, str): + tokens = self.tokenizer.encode(tokens, add_special_tokens=False)["input_ids"] + + outputs["input_ids"].extend(tokens) + outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]] * len(tokens)) + + position_ids = self._compute_text_positions(outputs["cur_position"], len(tokens)) + outputs["position_ids"].append(position_ids) + outputs["cur_position"] = position_ids.max() + 1 + + def _compute_text_positions(self, start_pos: int, num_tokens: int) -> np.ndarray: + text_array = np.arange(num_tokens).reshape(1, -1) + text_index = np.broadcast_to(text_array, (3, num_tokens)) + position = text_index + start_pos + return position + + def _add_image(self, img, outputs: Dict) -> None: + ret = self.image_processor.preprocess(images=[img.convert("RGB")]) + num_tokens = ret["grid_thw"].prod() // self.image_processor.merge_size**2 + grid_thw = ret["grid_thw"].tolist() + + outputs["input_ids"].extend([self.image_token_id] * num_tokens) + outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]] * num_tokens) + + outputs["images"].append(ret["pixel_values"]) + outputs["grid_thw"].append(grid_thw) + outputs["image_type_ids"].append(0) + + t, h, w = grid_thw + position_ids = self._compute_vision_positions(outputs["cur_position"], t,h,w, 0) + + outputs["position_ids"].append(position_ids) + outputs["cur_position"] = position_ids.max() + 1 + + def _add_video(self, frames, meta: Dict, outputs: Dict) -> None: + ret = self.image_processor.preprocess(images=frames) + + num_tokens = ret["grid_thw"].prod() // self.image_processor.merge_size**2 + grid_thw = ret["grid_thw"].tolist() + + outputs["input_ids"].extend([self.video_token_id] * num_tokens) + outputs["token_type_ids"].extend([IDS_TYPE_FLAG["video"]] * num_tokens) + + outputs["images"].append(ret["pixel_values"]) + outputs["grid_thw"].append(grid_thw) + outputs["image_type_ids"].extend([1] * grid_thw[0]) + + fps = meta["fps"] + second_per_grid_t = self.temporal_conv_size / fps + t, h, w = grid_thw + position_ids = self._compute_vision_positions(outputs["cur_position"], t,h,w, second_per_grid_t) + + outputs["position_ids"].append(position_ids) + outputs["cur_position"] = position_ids.max() + 1 + + def _compute_vision_positions(self, start_pos: int, t: int, h: int, w: int, second_per_grid_t:float) -> np.ndarray: + h //= self.spatial_conv_size + w //= self.spatial_conv_size + + tn = np.arange(t).reshape(-1, 1) + tn = np.broadcast_to(tn, (t, h * w)) + tn = tn * second_per_grid_t * self.tokens_per_second + t_index = tn.flatten() + + hn = np.arange(h).reshape(1, -1, 1) + h_index = np.broadcast_to(hn, (t, h, w)).flatten() + + wn = np.arange(w).reshape(1, 1, -1) + w_index = np.broadcast_to(wn, (t, h, w)).flatten() + + position = np.stack([t_index, h_index, w_index]) + start_pos + return position + + def _load_and_process_video(self, url: str, item: Dict) -> np.ndarray: + reader, meta = read_video_decord(url) + + frames = [] + for i in range(meta["num_of_frame"]): + frame = reader[i].asnumpy() + image = Image.fromarray(frame, "RGB") + frames.append(image) + frames = np.stack([np.array(f.convert("RGB")) for f in frames], axis=0) + + fps = item.get("fps", None) + num_frames = item.get("target_frames", None) + if fps is not None or num_frames is not None: + min_frames = item.get("min_frames", self.min_frames) + max_frames = item.get("max_frames", self.max_frames) + frames = sample_frames(video=frames, + frame_factor=self.temporal_conv_size, + min_frames=min_frames, + max_frames=max_frames, + metadata=meta, + fps=fps, + num_frames=num_frames) + + meta["num_of_frame"] = frames.shape[0] + if fps is not None: + meta["fps"] = fps + else: + meta["fps"] = frames.shape[0] / meta["duration"] + + return frames, meta + + def apply_chat_template(self, request): + """ + Convert multi-turn messages into ID sequences. + + Args: + messages: Either a request dict containing 'messages' field, + or a list of message dicts directly + + Returns: + List of token IDs as strings (converted from token objects) + """ + if self.tokenizer.chat_template is None: + raise ValueError("This model does not support chat_template.") + + prompt_token_str = self.tokenizer.apply_chat_template( + request["messages"], + tokenize=False, + add_generation_prompt=request.get("add_generation_prompt", True), + ) + prompt_token_str = prompt_token_str.replace(self.image_token, "").replace(self.video_token, "") + + tokens = self.tokenizer.tokenize(prompt_token_str) + token_ids = self.tokenizer.convert_tokens_to_ids(tokens) + data_processor_logger.info( + f"req_id:{request.get('request_id', ''), } tokens: {tokens}, token_ids: {token_ids}" + ) + return token_ids diff --git a/fastdeploy/input/qwen_mm_processor/process_video.py b/fastdeploy/input/qwen_mm_processor/process_video.py new file mode 100644 index 0000000000..9dd79cf97f --- /dev/null +++ b/fastdeploy/input/qwen_mm_processor/process_video.py @@ -0,0 +1,150 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import io +import os +import decord +from tempfile import NamedTemporaryFile as ntf +from typing import Union, Optional +import numpy as np +import math + +try: + # moviepy 1.0 + import moviepy.editor as mp +except: + # moviepy 2.0 + import moviepy as mp + +from fastdeploy.utils import data_processor_logger + + +def is_gif(data: bytes) -> bool: + """ + check if a bytes is a gif based on the magic head + """ + return data[:6] in (b"GIF87a", b"GIF89a") + + +class VideoReaderWrapper(decord.VideoReader): + """ + Solving memory leak bug + + https://github.com/dmlc/decord/issues/208 + """ + + def __init__(self, video_path, *args, **kwargs): + with ntf(delete=True, suffix=".gif") as gif_file: + gif_input = None + self.original_file = None + if isinstance(video_path, str): + self.original_file = video_path + if video_path.lower().endswith(".gif"): + gif_input = video_path + elif isinstance(video_path, bytes): + if is_gif(video_path): + gif_file.write(video_path) + gif_input = gif_file.name + elif isinstance(video_path, io.BytesIO): + video_path.seek(0) + tmp_bytes = video_path.read() + video_path.seek(0) + if is_gif(tmp_bytes): + gif_file.write(tmp_bytes) + gif_input = gif_file.name + + if gif_input is not None: + clip = mp.VideoFileClip(gif_input) + mp4_file = ntf(delete=False, suffix=".mp4") + clip.write_videofile(mp4_file.name, verbose=False, logger=None) + clip.close() + video_path = mp4_file.name + self.original_file = video_path + + super().__init__(video_path, *args, **kwargs) + self.seek(0) + + def __getitem__(self, key): + frames = super().__getitem__(key) + self.seek(0) + return frames + + def __del__(self): + if self.original_file and os.path.exists(self.original_file): + os.remove(self.original_file) + + +def read_video_decord(video_path): + """get reader and meta by decord""" + if isinstance(video_path, VideoReaderWrapper): + video_reader = video_path + else: + if isinstance(video_path, bytes): + video_path = io.BytesIO(video_path) + video_reader = VideoReaderWrapper(video_path, num_threads=1) + + vlen = len(video_reader) + fps = video_reader.get_avg_fps() + duration = vlen / float(fps) + + video_meta = {"fps": fps, "duration": duration, "num_of_frame": vlen} + return video_reader, video_meta + + +def sample_frames( + video: np.ndarray, + frame_factor: int, + min_frames: int, + max_frames: int, + metadata: Optional[dict] = None, + fps: Optional[Union[int, float]] = None, + num_frames: Optional[int] = None, +): + if fps is not None and num_frames is not None: + raise ValueError("`num_frames` and `fps` are mutually exclusive arguments, please use only one!") + + if fps is None and num_frames is None: + return video + + total_num_frames = video.shape[0] + + # If num_frames is not given but fps is, calculate num_frames from fps + if num_frames is not None: + num_frames = round(num_frames / frame_factor) * frame_factor + elif fps is not None: + if metadata is None: + raise ValueError( + "Asked to sample `fps` frames per second but no video metadata was provided which is required when sampling with `fps`. " + "Please pass in `VideoMetadata` object or use a fixed `num_frames` per input video" + ) + max_frames = math.floor(min(max_frames, total_num_frames) / frame_factor) * frame_factor + num_frames = total_num_frames / metadata["fps"] * fps + num_frames = min(min(max(num_frames, min_frames), max_frames), total_num_frames) + num_frames = math.floor(num_frames / frame_factor) * frame_factor + + if num_frames > total_num_frames: + raise ValueError( + f"Video can't be sampled. The inferred `num_frames={num_frames}` exceeds `total_num_frames={total_num_frames}`. " + "Decrease `num_frames` or `fps` for sampling." + ) + + if num_frames is not None: + indices = np.arange(0, total_num_frames, total_num_frames / num_frames).astype("int") + else: + indices = np.arange(0, total_num_frames).astype("int") + video = video[indices] + + return video diff --git a/fastdeploy/input2/ernie_vl_processor.py b/fastdeploy/input/qwen_vl_processor.py similarity index 90% rename from fastdeploy/input2/ernie_vl_processor.py rename to fastdeploy/input/qwen_vl_processor.py index 63ae5bc310..25a2b4682a 100644 --- a/fastdeploy/input2/ernie_vl_processor.py +++ b/fastdeploy/input/qwen_vl_processor.py @@ -19,36 +19,31 @@ from fastdeploy.engine.request import Request from fastdeploy.input.ernie_processor import ErnieProcessor -from fastdeploy.input.mm_processor import IDS_TYPE_FLAG, DataProcessor +from fastdeploy.input.qwen_mm_processor import IDS_TYPE_FLAG, DataProcessor from fastdeploy.utils import data_processor_logger -class ErnieMoEVLProcessor(ErnieProcessor): +class QwenVLProcessor(ErnieProcessor): """The processor class for ERNIE MoE VL models.""" def __init__( self, + config, model_name_or_path, limit_mm_per_prompt=None, mm_processor_kwargs=None, reasoning_parser_obj=None, ): data_processor_logger.info(f"model_name_or_path: {model_name_or_path}") - tokenizer_path = model_name_or_path - preprocessor_path = model_name_or_path processor_kwargs = self._parse_processor_kwargs(mm_processor_kwargs) self.ernie_processor = DataProcessor( - tokenizer_name=tokenizer_path, - image_preprocessor_name=preprocessor_path, + model_path=model_name_or_path, + tokens_per_second=config.vision_config.tokens_per_second, **processor_kwargs, ) - self.ernie_processor.eval() - self.image_patch_id = self.ernie_processor.image_patch_id - self.spatial_conv_size = self.ernie_processor.spatial_conv_size - - self.decode_status = dict() self._load_tokenizer() + self.decode_status = dict() # Generation config try: @@ -59,7 +54,6 @@ def __init__( ) self.generation_config = None - # self.eos_token_ids = [self.tokenizer.eos_token_id] from paddleformers.trl.llm_utils import get_eos_token_id self.eos_token_ids = get_eos_token_id(self.tokenizer, self.generation_config) @@ -107,11 +101,10 @@ def set_value(req, key, value): def process_request(self, request, max_model_len=None, **kwargs): """process the input data""" task = request.to_dict() - task["enable_thinking"] = kwargs.get("enable_thinking", True) + task["enable_thinking"] = kwargs.get("enable_thinking", False) self.process_request_dict(task, max_model_len) request = Request.from_dict(task) request = self._apply_default_parameters(request) - return request def _parse_processor_kwargs(self, kwargs): @@ -126,17 +119,8 @@ def _parse_processor_kwargs(self, kwargs): # 验证参数类型 data_processor_logger.info(f"kwargs:{kwargs}") expected_types = { - "spatial_conv_size": int, - "temporal_conv_size": int, - "image_min_pixels": int, - "image_max_pixels": int, - "video_min_pixels": int, - "video_max_pixels": int, - "video_target_frames": int, - "video_frames_sample": str, "video_max_frames": int, "video_min_frames": int, - "video_fps": int, } for key, value in kwargs.items(): @@ -258,12 +242,13 @@ def pack_outputs(self, outs): outs["grid_thw"] = np.vstack(outs["grid_thw"]) outs["image_type_ids"] = np.array(outs["image_type_ids"]) - outs["image_patch_id"] = self.image_patch_id + outs["image_patch_id"] = self.ernie_processor.image_token_id + outs["video_patch_id"] = self.ernie_processor.video_token_id + # Convert lists to arrays outs["input_ids"] = np.array(outs["input_ids"], dtype=np.int64) outs["token_type_ids"] = np.array(outs["token_type_ids"], dtype=np.int64) - outs["position_ids"] = np.array(outs["position_ids"], dtype=np.int64) - + outs["position_ids"] = np.concatenate(outs["position_ids"], axis=1) return outs def process_response_dict(self, response_dict, stream, **kwargs): @@ -283,3 +268,17 @@ def process_response_dict(self, response_dict, stream, **kwargs): return self.process_response_dict_streaming(response_dict, enable_thinking=enable_thinking, **kwargs) else: return self.process_response_dict_normal(response_dict, enable_thinking=enable_thinking, **kwargs) + + def update_stop_seq(self, stop_sequences): + """ + Update stop sequences from request. + """ + stop_seqs = [] + if isinstance(stop_sequences, str): + stop_sequences = [stop_sequences] + for seq in stop_sequences: + if seq != self.tokenizer.eos_token_id: + stop_seqs.append(self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(seq))) + stop_seqs, stop_seqs_len = self.pad_batch_data(stop_seqs, pad_id=-1, return_seq_len=True, return_array=False) + data_processor_logger.debug(f"processed stop_seqs: {stop_seqs}, {stop_seqs_len}") + return stop_seqs, stop_seqs_len diff --git a/fastdeploy/input2/__init__.py b/fastdeploy/input2/__init__.py deleted file mode 100644 index f4ede90624..0000000000 --- a/fastdeploy/input2/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" diff --git a/fastdeploy/input2/ernie_processor.py b/fastdeploy/input2/ernie_processor.py deleted file mode 100644 index 28d91bdbf8..0000000000 --- a/fastdeploy/input2/ernie_processor.py +++ /dev/null @@ -1,425 +0,0 @@ -""" -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -import os - -import numpy as np -from paddleformers.generation import GenerationConfig - -from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer -from fastdeploy.input.text_processor import BaseDataProcessor -from fastdeploy.utils import data_processor_logger - -_SAMPLING_EPS = 1e-5 - - -class ErnieProcessor(BaseDataProcessor): - """ - 初始化模型实例。 - - Args: - model_name_or_path (str): 模型名称或路径。 - - Attributes: - model_name_or_path (str): 存储模型名称或路径。 - decode_status (dict): 存储解码状态信息。 - tokenizer (object): 存储分词器实例。 - eos_token_ids (list): 存储结束符号的token ID列表。 - eos_token_id_len (int): 存储结束符号的token ID列表的长度。 - pad_token_id (int): 存储填充符号的token ID。 - """ - - def __init__(self, model_name_or_path, reasoning_parser_obj=None): - - self.model_name_or_path = model_name_or_path - data_processor_logger.info(f"model_name_or_path: {model_name_or_path}") - - # Generation config - try: - self.generation_config = GenerationConfig.from_pretrained(self.model_name_or_path) - except Exception as e: - data_processor_logger.warning( - f"Can't find generation config, so it will not use " - f"generation_config field in the model config, details={e}" - ) - self.generation_config = None - - self.decode_status = dict() - self.thinking_parser_dict = dict() - self._load_tokenizer() - data_processor_logger.info( - f"tokenizer information: bos_token is {self.tokenizer.bos_token} \ - {self.tokenizer.bos_token_id}, \ - eos_token is {self.tokenizer.eos_token}, {self.tokenizer.eos_token_id} " - ) - from paddleformers.trl.llm_utils import get_eos_token_id - - self.eos_token_ids = get_eos_token_id(self.tokenizer, self.generation_config) - self.eos_token_id_len = len(self.eos_token_ids) - self.pad_token_id = self.get_pad_id() - self.reasoning_parser = None - if reasoning_parser_obj: - self.reasoning_parser = reasoning_parser_obj(self.tokenizer) - - def process_request(self, request, max_model_len=None, **kwargs): - """ - Preprocess the request - - Args: - request (Dict): may contain text and messages fields - - Returns: - bool: Whether preprocessing is successful - str: error message - """ - request = self._apply_default_parameters(request) - if request.get("eos_token_ids") is None or len(request.eos_token_ids) == 0: - request.eos_token_ids = self.eos_token_ids - stop_sequences = request.get("stop", []) - if stop_sequences is not None and len(stop_sequences) != 0: - stop_seqs, stop_seqs_len = self.update_stop_seq(stop_sequences) - request.set("stop_token_ids", stop_seqs) - request.set("stop_seqs_len", stop_seqs_len) - - if request.prompt_token_ids is None or len(request.prompt_token_ids) == 0: - if request.prompt is None and request.messages is None: - raise ValueError(f"The request should have `prompt_token_ids`, `prompt` or `messages`: {request}.") - if request.prompt is not None: - prompt = request.prompt if request.prompt is not None else request.messages[0] - prompt = prompt[0] if isinstance(prompt, list) else prompt - tokens = self.tokenizer.tokenize(prompt) - token_ids = self.tokenizer.convert_tokens_to_ids(tokens) - request.prompt_token_ids = token_ids - data_processor_logger.info(f"req_id:{request.request_id}, tokens:{tokens}, token_ids: {token_ids}") - else: - request.prompt_token_ids = self.messages2ids(request.to_dict()) - - if len(request.prompt_token_ids) == 0: - raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") - if max_model_len is not None and len(request.prompt_token_ids) > max_model_len: - request.prompt_token_ids = request.prompt_token_ids[: max_model_len - 1] - if request.get("max_tokens") is None: - request.set( - "max_tokens", - max(1, max_model_len - len(request.prompt_token_ids)), - ) - if request.get("temperature") < _SAMPLING_EPS: - # zero temperature is equivalent to greedy sampling - request.set("temperature", 1) - if request.get("top_p") < _SAMPLING_EPS: - request.set("top_p", _SAMPLING_EPS) - data_processor_logger.info(f"Processed request {request}") - return request - - def process_request_dict(self, request, max_model_len=None): - """ - Preprocess the request - - Args: - request (Dict): may contain text and messages fields - - Returns: - bool: Whether preprocessing is successful - str: error message - """ - request = self._apply_default_parameters(request) - if not request.get("eos_token_ids"): - request["eos_token_ids"] = self.eos_token_ids - - # processing stop_sequences - stop_sequences = request.get("stop", []) - if stop_sequences: - stop_seqs, stop_seqs_len = self.update_stop_seq(stop_sequences) - request["stop_token_ids"] = stop_seqs - request["stop_seqs_len"] = stop_seqs_len - - # processing prompt_token_ids - if not request.get("prompt_token_ids"): - if request.get("prompt") is None and request.get("messages") is None: - raise ValueError(f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}") - if request.get("prompt"): - prompt = request.get("prompt") - prompt = prompt[0] if isinstance(prompt, list) else prompt - - tokens = self.tokenizer.tokenize(prompt) - token_ids = self.tokenizer.convert_tokens_to_ids(tokens) - request["prompt_token_ids"] = token_ids - req_id = request.get("request_id", None) - data_processor_logger.info(f"req_id:{req_id}, tokens:{tokens}, token_ids: {token_ids}") - else: - request["prompt_token_ids"] = self.messages2ids(request) - if len(request["prompt_token_ids"]) == 0: - raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") - - # truncate prompts that exceed the length limit - if max_model_len is not None and len(request["prompt_token_ids"]) > max_model_len: - request["prompt_token_ids"] = request["prompt_token_ids"][: max_model_len - 1] - if request.get("max_tokens") is None: - request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"])) - if request.get("temperature") < _SAMPLING_EPS: - # zero temperature is equivalent to greedy sampling - request["temperature"] = 1 - if request.get("top_p") < _SAMPLING_EPS: - request["top_p"] = _SAMPLING_EPS - data_processor_logger.info(f"Processed request {request}") - - return request - - def process_response(self, response_dict, **kwargs): - """ - Preprocess the response - - Args: - response_dict (Dict): response for engine, contain ids fields - - Returns: - Dict: response contain text fields - """ - req_id = response_dict.request_id - token_ids = response_dict.outputs.token_ids - - response_dict.usage = {"completion_tokens": response_dict.outputs.index + 1} - if token_ids[-1] == self.tokenizer.eos_token_id: - token_ids = token_ids[:-1] - full_text = self.tokenizer.decode(token_ids) - if self.reasoning_parser: - reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict) - response_dict.outputs.text = text - response_dict.outputs.reasoning_content = reasoning_content - else: - response_dict.outputs.text = full_text - data_processor_logger.info(f"req_id:{req_id}, token)ids: {token_ids}") - if response_dict.outputs.text == "" and response_dict.outputs.reasoning_content == "": - return None - return response_dict - - def process_response_dict(self, response_dict, stream, **kwargs): - """ - Preprocess the response - - Args: - response_dict (Dict): response for engine, contain ids fields - - Returns: - Dict: response contain text fields - """ - if stream: - return self.process_response_dict_streaming(response_dict, **kwargs) - else: - return self.process_response_dict_normal(response_dict, **kwargs) - - def process_response_dict_normal(self, response_dict, **kwargs): - """ - Preprocess the response - - Args: - response_dict (Dict): response for engine, contain ids fields - - Returns: - Dict: response contain text fields - """ - enable_thinking = kwargs.get("enable_thinking") - token_ids = response_dict["outputs"]["token_ids"] - is_end = response_dict["finished"] - req_id = response_dict["request_id"] - if is_end and len(token_ids) > 0 and not kwargs.get("include_stop_str_in_output"): - if token_ids[-1] == self.tokenizer.eos_token_id: - token_ids = token_ids[:-1] - delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id) - if is_end: - full_text = previous_texts + delta_text - if enable_thinking and self.reasoning_parser: - reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict) - response_dict["outputs"]["text"] = text - response_dict["outputs"]["reasoning_content"] = reasoning_content - else: - response_dict["outputs"]["text"] = full_text - data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}") - del self.decode_status[req_id] - return response_dict - - def process_response_dict_streaming(self, response_dict, **kwargs): - """ - Preprocess the response streaming - - Args: - response_dict (Dict): response for engine, contain ids fields - - Returns: - Dict: response contain text fields - """ - enable_thinking = kwargs.get("enable_thinking") - is_end = response_dict["finished"] - req_id = response_dict["request_id"] - token_ids = response_dict["outputs"]["token_ids"] - - if is_end and len(token_ids) > 0 and not kwargs.get("include_stop_str_in_output"): - if token_ids[-1] == self.tokenizer.eos_token_id: - token_ids = token_ids[:-1] - delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id) - if enable_thinking and self.reasoning_parser: - reasoning_content, text = self.reasoning_parser.extract_reasoning_content_streaming( - previous_texts, - previous_texts + delta_text, - delta_text, - previous_token_ids, - previous_token_ids + token_ids, - token_ids, - ) - response_dict["outputs"]["text"] = text - response_dict["outputs"]["reasoning_content"] = reasoning_content - else: - response_dict["outputs"]["text"] = delta_text - if is_end: - data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}") - del self.decode_status[req_id] - return response_dict - - def messages2ids(self, request_or_messages): - """ - Convert multi-turn messages into ID sequences. - - Args: - request_or_messages: Either a request dict containing 'messages' field, - or a list of message dicts directly - - Returns: - List of token IDs as strings (converted from token objects) - """ - if self.tokenizer.chat_template is None: - raise ValueError("This model does not support chat_template.") - spliced_message = self.tokenizer.apply_chat_template( - request_or_messages, - tokenize=False, - split_special_tokens=False, - add_special_tokens=False, - ) - - req_id = None - if isinstance(request_or_messages, dict): - req_id = request_or_messages.get("request_id", None) - tokens = self.tokenizer.tokenize(spliced_message) - token_ids = self.tokenizer.convert_tokens_to_ids(tokens) - data_processor_logger.info(f"req_id:{req_id}, tokens:{tokens}, token_ids: {token_ids}") - return token_ids - - def ids2tokens(self, token_id, task_id): - """ - token ids to strings - - Args: - token_ids (List[int]): token ids - task_id (str): task id - - Returns: - List[str]: strings - """ - - if task_id not in self.decode_status: - # prefix offset & read offset & history token ids & history token strings - self.decode_status[task_id] = [0, 0, [], ""] - - prefix_offset = self.decode_status[task_id][0] - read_offset = self.decode_status[task_id][1] - previous_token_ids = self.decode_status[task_id][2] - previous_texts = self.decode_status[task_id][3] - decode_str, prefix_offset, read_offset = self.tokenizer.decode_token( - previous_token_ids + token_id, prefix_offset, read_offset - ) - self.decode_status[task_id][0] = prefix_offset - self.decode_status[task_id][1] = read_offset - self.decode_status[task_id][2] += token_id - self.decode_status[task_id][3] += decode_str - - return decode_str, previous_token_ids, previous_texts - - def _load_tokenizer(self): - """ - load tokenizer - - Returns: - tokenizer (AutoTokenizer) - """ - vocab_file_names = [ - "tokenizer.model", - "spm.model", - "ernie_token_100k.model", - ] - for i in range(len(vocab_file_names)): - if os.path.exists(os.path.join(self.model_name_or_path, vocab_file_names[i])): - ErnieBotTokenizer.resource_files_names["vocab_file"] = vocab_file_names[i] - break - self.tokenizer = ErnieBotTokenizer.from_pretrained(self.model_name_or_path) - - def get_pad_id(self): - """ - get pad_token_id, if not pad_token_id, use eos_token - - Returns: - int: pad_token_id - """ - # if isinstance(self.tokenizer, (LlamaTokenizer, Llama3Tokenizer)) and not self.tokenizer.pad_token_id: - # return self.tokenizer.eos_token - return self.tokenizer.pad_token_id - - def pad_batch_data( - self, - insts, - pad_id=0, - return_seq_len=False, - return_array=True, - pad_style="right", - ): - """Pad the instances to the max sequence length in batch.""" - if len(insts) == 0: - padded_insts = np.array([[]], dtype=np.int64) if return_array else [[]] - if return_seq_len: - seq_len = np.array([], dtype=np.int64) if return_array else [] - return padded_insts, seq_len - return padded_insts - - max_len = max(map(len, insts)) - if pad_style == "left": - padded_insts = [[pad_id] * (max_len - len(inst)) + list(inst) for inst in insts] - else: - padded_insts = [list(inst) + [pad_id] * (max_len - len(inst)) for inst in insts] - if return_array: - padded_insts = np.array(padded_insts, dtype=np.int64).reshape([-1, max_len]) - - if return_seq_len: - seq_len = [len(inst) for inst in insts] - if return_array: - seq_len = np.array(seq_len, dtype=np.int64).reshape(-1, 1) - return padded_insts, seq_len - return padded_insts - - def update_stop_seq(self, stop_sequences): - """ - Update stop sequences from request. - """ - stop_seqs = [] - if isinstance(stop_sequences, str): - stop_sequences = [stop_sequences] - for seq in stop_sequences: - if seq != self.tokenizer.eos_token_id: - stop_seqs.append(self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(seq))) - stop_seqs, stop_seqs_len = self.pad_batch_data(stop_seqs, pad_id=-1, return_seq_len=True, return_array=False) - data_processor_logger.debug(f"processed stop_seqs: {stop_seqs}, {stop_seqs_len}") - return stop_seqs, stop_seqs_len - - def process_logprob_response(self, token_ids, **kwargs): - full_text = self.tokenizer.decode(token_ids, **kwargs) - return full_text diff --git a/fastdeploy/input2/ernie_tokenizer.py b/fastdeploy/input2/ernie_tokenizer.py deleted file mode 100644 index 2bbc798c5c..0000000000 --- a/fastdeploy/input2/ernie_tokenizer.py +++ /dev/null @@ -1,394 +0,0 @@ -""" -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -# cipher_token=WjI1fQOvhN # do not edit this line - -import os -import re -from shutil import copyfile -from typing import Dict, List, Optional, Tuple - -import numpy as np -import paddle -import sentencepiece as spm -from paddleformers.transformers import PretrainedTokenizer -from paddleformers.transformers.tokenizer_utils_base import PaddingStrategy, TextInput -from paddleformers.utils.log import logger - - -class ErnieBotTokenizer(PretrainedTokenizer): - """ - 一个更好用的 `ErnieBotToknizer`, - 能 encode 目前 sft/ppo 阶段的特殊token,也支持多模态。 - """ - - resource_files_names = { - "vocab_file": "tokenizer.model", - } - pretrained_resource_files_map = {"vocab_file": {"ernie-bot-10b": None}} - pretrained_init_configuration = { - "ernie-bot-10b": {}, - } - model_input_names = [ - "input_ids", - "position_ids", - "attention_mask", - "labels", - ] - padding_side = "right" - - def __init__( - self, - vocab_file, - bos_token="", - cls_token="", - eos_token="", - mask_token="", - pad_token="", - sep_token="", - unk_token="", - additional_special_tokens=None, - verbose=False, - **kwargs, - ): - """doc""" - if additional_special_tokens is None: - additional_special_tokens = ["", ""] - super().__init__( - bos_token=bos_token, - cls_token=cls_token, - eos_token=eos_token, - mask_token=mask_token, - pad_token=pad_token, - sep_token=sep_token, - unk_token=unk_token, - additional_special_tokens=additional_special_tokens, - verbose=False, - **kwargs, - ) - self.vocab_file = vocab_file - self.sp_model = spm.SentencePieceProcessor() - self.sp_model.Load(vocab_file) - # pre-process map-type all spec token for decode accelerate. - - @property - def space_token(self): - """doc""" - return "" - - @property - def space_token_id(self): - """doc""" - return self.sp_model.piece_to_id("") - - @property - def gend_token(self): - """doc""" - return "" - - @property - def gend_token_id(self): - """doc""" - return self.sp_model.piece_to_id("") - - @property - def im_start_id(self): - """doc""" - return self.sp_model.piece_to_id("<|im_start|>") - - @property - def im_end_id(self): - """doc""" - return self.sp_model.piece_to_id("<|im_end|>") - - @property - def vocab_size(self): - """doc""" - return self.sp_model.vocab_size() - - def get_vocab(self): - """doc""" - vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)} - vocab.update(self.added_tokens_encoder) - return vocab - - def _tokenize(self, text): - """doc""" - return self.sp_model.encode_as_pieces(text) - - def _convert_token_to_id(self, token): - """doc""" - return self.sp_model.piece_to_id(token) - - def _convert_id_to_token(self, id): - """doc""" - return self.sp_model.id_to_piece(id) - - def spec_init(self): - if not hasattr(self, "all_spec_tok"): - self.all_spec_tok = set(self.all_special_tokens) - - def convert_tokens_to_string(self, tokens): - """Converts a sequence of tokens (string) in a single string.""" - self.spec_init() - current_sub_tokens = [] - out_string = "" - # prev_is_special = False - for token in tokens: - # make sure that special tokens are not decoded using sentencepiece model - if token in self.all_spec_tok: - # if not prev_is_special: - # out_string += " " - out_string += self.sp_model.decode(current_sub_tokens) + token - # prev_is_special = True - - current_sub_tokens = [] - else: - current_sub_tokens.append(token) - # prev_is_special = False - out_string += self.sp_model.decode(current_sub_tokens) - return out_string # .strip() - - def prepare_for_model(self, *args, **kwargs): - """doc""" - if "add_special_tokens" in kwargs: - kwargs.pop("add_special_tokens") - # logger.warning(f'ErnieBotTokenizer v2 does not support `add_special_tokens`') - return super().prepare_for_model(*args, **kwargs) - - def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]: - """ - Save the vocabulary and special tokens file to a directory. - Args: - save_directory (`str`): - The directory in which to save the vocabulary. - Returns: - `Tuple(str)`: Paths to the files saved. - """ - if not os.path.isdir(save_directory): - logger.error(f"Vocabulary path ({save_directory}) should be a directory") - return - out_vocab_file = os.path.join( - save_directory, - (filename_prefix + "-" if filename_prefix else "") + self.resource_files_names["vocab_file"], - ) - if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file): - copyfile(self.vocab_file, out_vocab_file) - elif not os.path.isfile(self.vocab_file): - with open(out_vocab_file, "wb") as fi: - content_spiece_model = self.sp_model.serialized_model_proto() - fi.write(content_spiece_model) - return (out_vocab_file,) - - def tokenize(self, text: TextInput, **kwargs) -> List[str]: - """ - Converts a string in a sequence of tokens, using the tokenizer. - - Split in words for word-based vocabulary or sub-words for sub-word-based vocabularies - (BPE/SentencePieces/WordPieces). Takes care of added tokens. - - Args: - text (`str`): - The sequence to be encoded. - **kwargs (additional keyword arguments): - Passed along to the model-specific `prepare_for_tokenization` preprocessing method. - - Returns: - `List[str]`: The list of tokens. - """ - # Simple mapping string => AddedToken for special tokens with specific tokenization behaviors - # all_special_tokens_extended = dict( - # (str(t), t) - # for t in self.all_special_tokens_extended - # if isinstance(t, AddedToken) - # ) - - self.spec_init() - text, kwargs = self.prepare_for_tokenization(text, **kwargs) - - # TODO: should this be in the base class? - if hasattr(self, "do_lower_case") and self.do_lower_case: - # convert non-special tokens to lowercase - escaped_special_toks = [re.escape(s_tok) for s_tok in (self.unique_no_split_tokens + self.all_spec_tok)] - pattern = r"(" + r"|".join(escaped_special_toks) + r")|" + r"(.+?)" - text = re.sub(pattern, lambda m: m.groups()[0] or m.groups()[1].lower(), text) - - no_split_token = set(self.unique_no_split_tokens) - tokens = self.tokens_trie.split(text) - - # ["This is something", "", " else"] - # for i, token in enumerate(tokens): - # if token in no_split_token: - # tok_extended = all_special_tokens_extended.get(token, None) - # print(f'>>>{token}|{tok_extended}|{all_special_tokens_extended}<<<') - # left = tokens[i - 1] if i > 0 else None - # right = tokens[i + 1] if i < len(tokens) - 1 else None - # if isinstance(tok_extended, AddedToken): - # if tok_extended.rstrip and right: - # # A bit counter-intuitive but we strip the left of the string - # # since tok_extended.rstrip means the special token is eating all white spaces on its right - # tokens[i + 1] = right.lstrip() - # # Strip white spaces on the left - # if tok_extended.lstrip and left: - # tokens[i - 1] = left.rstrip() # Opposite here - # else: - # We strip left and right by default - # if right: - # tokens[i + 1] = right.lstrip() - # if left: - # tokens[i - 1] = left.rstrip() - # ["This is something", "", "else"] - tokenized_text = [] - for token in tokens: - # Need to skip eventual empty (fully stripped) tokens - if not token: - continue - if token in no_split_token: - tokenized_text.append(token) - else: - tokenized_text.extend(self._tokenize(token)) - # ["This", " is", " something", "", "else"] - return tokenized_text - - def _decode(self, *args, **kwargs): - """doc""" - kwargs.pop("clean_up_tokenization_spaces", None) - kwargs.pop("spaces_between_special_tokens", None) - return super()._decode( - *args, - **kwargs, - clean_up_tokenization_spaces=False, - spaces_between_special_tokens=False, - ) - - def _pad( - self, - encoded_inputs: Dict, - max_length: Optional[int] = None, - padding_strategy=PaddingStrategy.DO_NOT_PAD, - pad_to_multiple_of: Optional[int] = None, - return_attention_mask: Optional[bool] = None, - ) -> dict: - """doc""" - if return_attention_mask is None: - return_attention_mask = "attention_mask" in self.model_input_names - if return_attention_mask: - required_input = encoded_inputs[self.model_input_names[0]] - if padding_strategy == PaddingStrategy.LONGEST: - max_length = len(required_input) - if max_length is not None and pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0): - max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of - needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(required_input) != max_length - if "attention_mask" in encoded_inputs and encoded_inputs["attention_mask"] is not None: - attention_mask = encoded_inputs.pop("attention_mask") - if isinstance(attention_mask, paddle.Tensor): - attention_mask = attention_mask.numpy() - elif isinstance(attention_mask, list): - attention_mask = np.array(attention_mask) - elif not isinstance(attention_mask, np.ndarray): - raise ValueError(f"Unexpected type {type(attention_mask)} of attention_mask, ") - else: - attention_mask = np.tril( - np.ones( - (len(required_input), len(required_input)), - dtype=np.int64, - ) - ) - attention_mask = np.expand_dims(attention_mask, axis=0) - if needs_to_be_padded: - difference = max_length - len(required_input) - if self.padding_side == "right": - if attention_mask.ndim == 1: - pad_width = [(0, difference)] - else: - pad_width = [(0, 0), (0, difference), (0, difference)] - elif self.padding_side == "left": - if attention_mask.ndim == 1: - pad_width = [(difference, 0)] - else: - pad_width = [(0, 0), (difference, 0), (difference, 0)] - else: - raise ValueError("Invalid padding strategy:" + str(self.padding_side)) - attention_mask = np.pad( - attention_mask, - pad_width=pad_width, - mode="constant", - constant_values=0, - ) - encoded_inputs = super()._pad( - encoded_inputs, - max_length, - padding_strategy=padding_strategy, - pad_to_multiple_of=pad_to_multiple_of, - return_attention_mask=False, - ) - if return_attention_mask: - encoded_inputs["attention_mask"] = attention_mask.tolist() - return encoded_inputs - - -def add_special_tokens( - tokenizer, - special_tokens_info, - use_ocr_specialtoken=False, - use_crop_specialtoken=False, - special_token_ids_start=254208, - special_token_ids_end=256256, -): - """ - 增加 special token - - placeholder [<|IMAGE_PLACEHOLDER|>, <|AUDIO_PLACEHOLDER|>, <|VIDEO_PLACEHOLDER|>] 共3个 - - 模态起始截止 special tokens [<|BOI|> <|EOI|> <|BOA|> <|EOA|> <|BOV|> <|EOV|>] - - ocr special tokens [<|LOC_0|> <|LOC_1|> ... <|LOC_1000|>] 共1001个 - - crop special tokens [<|CROP_COL_SEP|>, <|CROP_ROW_SEP|>, <|CROP_IMAGE_SEP|>] 共3个 - <|CROP_COL_SEP|> for col 维度切 图片width(替换原明文逗号) - <|CROP_ROW_SEP|> for row 维度切 图片height(替换原明文回车) - <|CROP_IMAGE_SEP|> for 区分原图和crop图 图片width(替换原明文两个回车) - - 共2048个 unsed token - - Args: - tokenizer (ErnieTokenizer): tokenizer - special_token_ids_start (int, optional): special token 起点 ids. Defaults to 254208. - special_token_ids_end (int, optional): 词表最多支持大小. Defaults to 256256. - """ - special_tokens = [ - special_tokens_info["image_placeholder"], - special_tokens_info["audio_placeholder"], - ] - - if use_ocr_specialtoken: - special_tokens.extend(special_tokens_info["ocr_coor"]) - special_tokens.extend(special_tokens_info["ocr_begin_end"]) - - if use_crop_specialtoken: - special_tokens.extend(special_tokens_info["crop"]) - - # add special_tokens - additional_special_tokens = {"additional_special_tokens": special_tokens} - tokenizer.add_special_tokens(additional_special_tokens) - - # check - first_special_tokens = tokenizer.encode(special_tokens[0])["input_ids"] - - assert first_special_tokens[0] == special_token_ids_start, f"[ERROR] first_special_tokens={first_special_tokens}" - assert ( - len(tokenizer.get_vocab()) < special_token_ids_end - ), f"[ERROR] vocab_size = {len(tokenizer.get_vocab())} >= {special_token_ids_end} 增加过多special token了!" diff --git a/fastdeploy/input2/mm_processor/process.py b/fastdeploy/input2/mm_processor/process.py deleted file mode 100644 index ea2559a0fe..0000000000 --- a/fastdeploy/input2/mm_processor/process.py +++ /dev/null @@ -1,512 +0,0 @@ -""" -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -""" process.py """ -import copy -import os -from collections import defaultdict -from typing import Any, Dict, List, Union - -import numpy as np -from paddleformers.transformers.image_utils import ChannelDimension -from PIL import Image - -from fastdeploy.entrypoints.chat_utils import parse_chat_messages -from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer -from fastdeploy.utils import data_processor_logger - -from .image_preprocessor.image_preprocessor_adaptive import AdaptiveImageProcessor -from .process_video import read_frames_decord, read_video_decord -from .utils.render_timestamp import render_frame_timestamp - -IDS_TYPE_FLAG = {"text": 0, "image": 1, "video": 2, "audio": 3} - - -def fancy_print(input_ids, tokenizer, image_patch_id=None): - """ - input_ids: input_ids - tokenizer: the tokenizer of models - """ - i = 0 - res = "" - text_ids = [] - real_image_token_len = 0 - while i < len(input_ids): - if input_ids[i] == image_patch_id: - if len(text_ids) > 0: - res += tokenizer.decode(text_ids) - text_ids = [] - - real_image_token_len += 1 - else: - if real_image_token_len != 0: - res += f"<|IMAGE@{real_image_token_len}|>" - real_image_token_len = 0 - - text_ids.append(input_ids[i]) - - i += 1 - if len(text_ids) > 0: - - res += tokenizer.decode(text_ids) - text_ids = [] - return res - - -class DataProcessor: - """ - Processes multimodal chat messages into model-ready inputs, - handling text, images, and videos with 3D positional embeddings. - """ - - CLS_TOKEN = "<|begin_of_sentence|>" - SEP_TOKEN = "<|end_of_sentence|>" - EOS_TOKEN = "" - IMG_START = "<|IMAGE_START|>" - IMG_END = "<|IMAGE_END|>" - VID_START = "<|VIDEO_START|>" - VID_END = "<|VIDEO_END|>" - - def __init__( - self, - tokenizer_name: str, - image_preprocessor_name: str, - spatial_conv_size: int = 2, - temporal_conv_size: int = 2, - image_min_pixels: int = 4 * 28 * 28, - image_max_pixels: int = 6177 * 28 * 28, - video_min_pixels: int = 299 * 28 * 28, - video_max_pixels: int = 1196 * 28 * 28, - video_target_frames: int = -1, - video_frames_sample: str = "leading", - video_max_frames: int = 180, - video_min_frames: int = 16, - video_fps: int = 2, - **kwargs, - ) -> None: - # Tokenizer and image preprocessor - self.model_name_or_path = tokenizer_name - self._load_tokenizer() - self.tokenizer.ignored_index = -100 - self.image_preprocessor = AdaptiveImageProcessor.from_pretrained(image_preprocessor_name) - - # Convolution sizes for patch aggregation - self.spatial_conv_size = spatial_conv_size - self.temporal_conv_size = temporal_conv_size - - # Pixel constraints - self.image_min_pixels = image_min_pixels - self.image_max_pixels = image_max_pixels - self.video_min_pixels = video_min_pixels - self.video_max_pixels = video_max_pixels - - # Video sampling parameters - self.target_frames = video_target_frames - self.frames_sample = video_frames_sample - self.max_frames = video_max_frames - self.min_frames = video_min_frames - self.fps = video_fps - - # Special tokens and IDs - self.cls_token = self.CLS_TOKEN - self.sep_token = self.SEP_TOKEN - self.eos_token = self.EOS_TOKEN - self.image_start = self.IMG_START - self.image_end = self.IMG_END - self.video_start = self.VID_START - self.video_end = self.VID_END - self.image_patch_id = self.tokenizer.convert_tokens_to_ids("<|IMAGE_PLACEHOLDER|>") - self.image_start_id = self.tokenizer.convert_tokens_to_ids(self.image_start) - self.video_start_id = self.tokenizer.convert_tokens_to_ids(self.video_start) - self.sep_token_id = self.tokenizer.convert_tokens_to_ids(self.sep_token) - self.eos_token_id = self.tokenizer.convert_tokens_to_ids(self.eos_token) - - self.token_type_mapping = self._build_token_type_mapping() - self.is_training = True - self.role_prefixes = { - "system": "", - "user": "User: ", - "bot": "Assistant: ", - "assistant": "Assistant: ", - } - - def _build_token_type_mapping(self) -> Dict[Any, int]: - mapping = defaultdict(lambda: IDS_TYPE_FLAG["text"]) - for token in ( - self.IMG_START, - self.IMG_END, - self.VID_START, - self.VID_END, - ): - mapping[token] = IDS_TYPE_FLAG["image"] - mapping[self.image_patch_id] = IDS_TYPE_FLAG["image"] - return mapping - - def train(self) -> None: - """Enable training mode (produces labels).""" - self.is_training = True - - def eval(self) -> None: - """Enable evaluation mode (doesn't produce labels).""" - self.is_training = False - - def text2ids(self, text, images=None, videos=None): - """ - Convert chat text into model inputs. - Returns a dict with input_ids, token_type_ids, position_ids, images, grid_thw, image_type_ids, labels. - """ - - outputs = { - "input_ids": [], - "token_type_ids": [], - "position_ids": [], - "images": [], - "grid_thw": [], - "image_type_ids": [], - "labels": [], - "cur_position": 0, - "pic_cnt": 0, - "video_cnt": 0, - } - - IMAGE_PLACEHOLDER = "<|image@placeholder|>" - VIDEO_PLACEHOLDER = "<|video@placeholder|>" - IMAGE_PLACEHOLDER_LEN = len(IMAGE_PLACEHOLDER) - VIDEO_PLACEHOLDER_LEN = len(VIDEO_PLACEHOLDER) - st, image_idx, video_idx = 0, 0, 0 - while st < len(text): - image_pos = text.find(IMAGE_PLACEHOLDER, st) - image_pos = len(text) if image_pos == -1 else image_pos - video_pos = text.find(VIDEO_PLACEHOLDER, st) - video_pos = len(text) if video_pos == -1 else video_pos - ed = min(image_pos, video_pos) - - self._add_text(text[st:ed], outputs) - if ed == len(text): - break - - if ed == image_pos: - self._add_image(images[image_idx], outputs) - image_idx += 1 - st = ed + IMAGE_PLACEHOLDER_LEN - else: - item = videos[video_idx] - if isinstance(item, dict): - frames = self._load_and_process_video(item["video"], item) - else: - frames = self._load_and_process_video(item, {}) - - self._add_video(frames, outputs) - video_idx += 1 - st = ed + VIDEO_PLACEHOLDER_LEN - - return outputs - - def request2ids( - self, request: Dict[str, Any], tgts: List[str] = None - ) -> Dict[str, Union[np.ndarray, List[np.ndarray], None]]: - """ - Convert chat messages into model inputs. - Returns a dict with input_ids, token_type_ids, position_ids, images, grid_thw, image_type_ids, labels. - """ - - outputs = { - "input_ids": [], - "token_type_ids": [], - "position_ids": [], - "images": [], - "grid_thw": [], - "image_type_ids": [], - "labels": [], - "cur_position": 0, - "pic_cnt": 0, - "video_cnt": 0, - } - - messages = parse_chat_messages(request.get("messages")) - image_message_list = [] - for msg in messages: - role = msg.get("role") - assert role in self.role_prefixes, f"Unsupported role: {role}" - content_items = msg.get("content") - if not isinstance(content_items, list): - content_items = [content_items] - for item in content_items: - if isinstance(item, dict) and item.get("type") in [ - "image", - "video", - ]: - image_message_list.append(item) - - prompt_token_ids = self.apply_chat_template(request) - if len(prompt_token_ids) == 0: - raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") - image_start_index = 0 - image_message_index = 0 - for i in range(len(prompt_token_ids)): - if prompt_token_ids[i] in [ - self.image_start_id, - self.video_start_id, - ]: - self._add_text(prompt_token_ids[image_start_index : i + 1], outputs) - image_start_index = i + 1 - image_message = image_message_list[image_message_index] - if image_message["type"] == "image": - img = image_message.get("image") - if img is None: - continue - outputs["pic_cnt"] += 1 - self._add_image(img, outputs) - elif image_message["type"] == "video": - video_bytes = image_message.get("video") - if video_bytes is None: - continue - frames = self._load_and_process_video(video_bytes, image_message) - outputs["video_cnt"] += 1 - self._add_video(frames, outputs) - image_message_index += 1 - self._add_text(prompt_token_ids[image_start_index:], outputs) - - if self.is_training: - assert tgts, "training must give tgt !" - self._extract_labels(outputs, tgts) - return outputs - - def _add_special_token(self, token: Union[str, int], outputs: Dict) -> None: - token_id = token if isinstance(token, int) else self.tokenizer.convert_tokens_to_ids(token) - outputs["input_ids"].append(token_id) - outputs["token_type_ids"].append(self.token_type_mapping[token]) - pos = outputs["cur_position"] - outputs["position_ids"].append([pos] * 3) - outputs["cur_position"] += 1 - - def _add_text(self, tokens, outputs: Dict) -> None: - if isinstance(tokens, str): - tokens = self.tokenizer.encode(tokens, add_special_tokens=False)["input_ids"] - outputs["input_ids"].extend(tokens) - outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]] * len(tokens)) - - start = outputs["cur_position"] - for i in range(len(tokens)): - outputs["position_ids"].append([start + i] * 3) - outputs["cur_position"] += len(tokens) - - def _add_image(self, img, outputs: Dict) -> None: - patches_h, patches_w = self.image_preprocessor.get_smarted_resize( - img.height, - img.width, - min_pixels=self.image_min_pixels, - max_pixels=self.image_max_pixels, - )[1] - num_tokens = (patches_h * patches_w) // (self.spatial_conv_size**2) - - outputs["input_ids"].extend([self.image_patch_id] * num_tokens) - outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]] * num_tokens) - - pos_ids = self._compute_3d_positions(1, patches_h, patches_w, outputs["cur_position"]) - outputs["position_ids"].extend(pos_ids) - outputs["cur_position"] = np.max(pos_ids) + 1 - - # Preprocess pixels - ret = self.image_preprocessor.preprocess( - images=[img.convert("RGB")], - do_normalize=False, - do_rescale=False, - predetermined_grid_thw=np.array([[patches_h, patches_w]]), - do_convert_rgb=True, - input_data_format=ChannelDimension.LAST, - ) - outputs["images"].append(ret["pixel_values"]) - outputs["grid_thw"].append(ret["image_grid_thw"]) - outputs["image_type_ids"].append(0) - - def _add_video(self, frames, outputs: Dict) -> None: - patches_h, patches_w = self.image_preprocessor.get_smarted_resize( - frames[0].height, - frames[0].width, - min_pixels=self.video_min_pixels, - max_pixels=self.video_max_pixels, - )[1] - num_frames = len(frames) - num_tokens = (num_frames * patches_h * patches_w) // (self.spatial_conv_size**2 * self.temporal_conv_size) - - pixel_stack = np.stack([np.array(f.convert("RGB")) for f in frames], axis=0) - ret = self.image_preprocessor.preprocess( - images=None, - videos=pixel_stack, - do_normalize=False, - do_rescale=False, - predetermined_grid_thw=np.array([[patches_h, patches_w]] * num_frames), - do_convert_rgb=True, - input_data_format=ChannelDimension.LAST, - ) - outputs["images"].append(ret["pixel_values_videos"]) - outputs["grid_thw"].append(ret["video_grid_thw"]) - outputs["image_type_ids"].extend([1] * num_frames) - - outputs["input_ids"].extend([self.image_patch_id] * num_tokens) - outputs["token_type_ids"].extend([IDS_TYPE_FLAG["video"]] * num_tokens) - - pos_ids = self._compute_3d_positions(num_frames, patches_h, patches_w, outputs["cur_position"]) - outputs["position_ids"].extend(pos_ids) - outputs["cur_position"] = np.max(pos_ids) + 1 - - def _extract_labels(self, outputs: Dict, tgts: List[str]) -> None: - input_ids = copy.deepcopy(outputs["input_ids"]) - labels = [self.tokenizer.ignored_index] * len(input_ids) - - tgt_count = input_ids.count(self.sep_token_id) - assert tgt_count == len(tgts), f"len(tgts) != len(src) {len(tgts)} vs {tgt_count}" - - tgt_index = 0 - for i, token_id in enumerate(input_ids): - if token_id == self.sep_token_id: - labels_token = self.tokenizer.tokenize(tgts[tgt_index]) - labels_token_id = self.tokenizer.convert_tokens_to_ids(labels_token) - labels[i - len(labels_token_id) : i] = labels_token_id - labels[i] = self.eos_token_id # - tgt_index += 1 - - outputs["labels"] = labels - - def _load_and_process_video(self, url: str, item: Dict) -> List[Image.Image]: - reader, meta, path = read_video_decord(url, save_to_disk=False) - - video_frame_args = dict() - video_frame_args["fps"] = item.get("fps", self.fps) - video_frame_args["min_frames"] = item.get("min_frames", self.min_frames) - video_frame_args["max_frames"] = item.get("max_frames", self.max_frames) - video_frame_args["target_frames"] = item.get("target_frames", self.target_frames) - video_frame_args["frames_sample"] = item.get("frames_sample", self.frames_sample) - - video_frame_args = self._set_video_frame_args(video_frame_args, meta) - - frames_data, _, timestamps = read_frames_decord( - path, - reader, - meta, - target_frames=video_frame_args["target_frames"], - target_fps=video_frame_args["fps"], - frames_sample=video_frame_args["frames_sample"], - save_to_disk=False, - ) - - frames: List[Image.Image] = [] - for img_array, ts in zip(frames_data, timestamps): - frames.append(render_frame_timestamp(img_array, ts)) - # Ensure even number of frames for temporal conv - if len(frames) % 2 != 0: - frames.append(copy.deepcopy(frames[-1])) - return frames - - def _set_video_frame_args(self, video_frame_args, video_meta): - """ - 根据已知参数和优先级,设定最终的抽帧参数 - """ - # 优先级:video_target_frames > (video_min_frames, video_max_frames) > video_fps - if video_frame_args["target_frames"] > 0: - if video_frame_args["fps"] >= 0: - raise ValueError("fps must be negative if target_frames is given") - if ( - video_frame_args["min_frames"] > 0 - and video_frame_args["target_frames"] < video_frame_args["min_frames"] - ): - raise ValueError("target_frames must be larger than min_frames") - if ( - video_frame_args["max_frames"] > 0 - and video_frame_args["target_frames"] > video_frame_args["max_frames"] - ): - raise ValueError("target_frames must be smaller than max_frames") - else: - if video_frame_args["fps"] < 0: - raise ValueError("Must provide either positive target_fps or positive target_frames.") - # 先计算在video_fps下抽到的帧数 - frames_to_extract = int(video_meta["duration"] * video_frame_args["fps"]) - # 判断是否在目标区间内,如果不是,则取target_frames为上界或下界 - if ( - video_frame_args["min_frames"] > 0 - and video_frame_args["max_frames"] > 0 - and video_frame_args["min_frames"] > video_frame_args["max_frames"] - ): - raise ValueError("min_frames must be smaller than max_frames") - if video_frame_args["min_frames"] > 0 and frames_to_extract < video_frame_args["min_frames"]: - video_frame_args["target_frames"] = video_frame_args["min_frames"] - video_frame_args["fps"] = -1 - if video_frame_args["max_frames"] > 0 and frames_to_extract > video_frame_args["max_frames"]: - video_frame_args["target_frames"] = video_frame_args["max_frames"] - video_frame_args["fps"] = -1 - - return video_frame_args - - def _compute_3d_positions(self, t: int, h: int, w: int, start_idx: int) -> List[List[int]]: - # Downsample time if needed - t_eff = t // self.temporal_conv_size if t != 1 else 1 - gh, gw = h // self.spatial_conv_size, w // self.spatial_conv_size - time_idx = np.repeat(np.arange(t_eff), gh * gw) - h_idx = np.tile(np.repeat(np.arange(gh), gw), t_eff) - w_idx = np.tile(np.arange(gw), t_eff * gh) - - coords = list(zip(time_idx, h_idx, w_idx)) - return [[start_idx + ti, start_idx + hi, start_idx + wi] for ti, hi, wi in coords] - - def _load_tokenizer(self): - """ - load tokenizer - - Returns: - tokenizer (AutoTokenizer) - """ - vocab_file_names = [ - "tokenizer.model", - "spm.model", - "ernie_token_100k.model", - ] - for i in range(len(vocab_file_names)): - if os.path.exists(os.path.join(self.model_name_or_path, vocab_file_names[i])): - ErnieBotTokenizer.resource_files_names["vocab_file"] = vocab_file_names[i] - break - self.tokenizer = ErnieBotTokenizer.from_pretrained(self.model_name_or_path) - - def apply_chat_template(self, request): - """ - Convert multi-turn messages into ID sequences. - - Args: - messages: Either a request dict containing 'messages' field, - or a list of message dicts directly - - Returns: - List of token IDs as strings (converted from token objects) - """ - if self.tokenizer.chat_template is None: - raise ValueError("This model does not support chat_template.") - - prompt_token_str = ( - self.tokenizer.apply_chat_template( - request, - tokenize=False, - add_generation_prompt=request.get("add_generation_prompt", True), - ) - .replace("<|image@placeholder|>", "") - .replace("<|video@placeholder|>", "") - ) - tokens = self.tokenizer.tokenize(prompt_token_str) - token_ids = self.tokenizer.convert_tokens_to_ids(tokens) - data_processor_logger.info( - f"req_id:{request.get('request_id', ''), } tokens: {tokens}, token_ids: {token_ids}" - ) - return token_ids diff --git a/fastdeploy/input2/mm_processor/process_video.py b/fastdeploy/input2/mm_processor/process_video.py deleted file mode 100644 index 91120096c7..0000000000 --- a/fastdeploy/input2/mm_processor/process_video.py +++ /dev/null @@ -1,205 +0,0 @@ -""" -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -import io -import os -import random - -import numpy as np -from PIL import Image - -from fastdeploy.utils import data_processor_logger - -from .utils.io_utils import EXTRACTED_FRAME_DIR, get_filename -from .utils.video_utils import VideoReaderWrapper - - -def read_video_decord(video_path, save_to_disk): - """get reader and meta by decord""" - # video_path = get_downloadable(video_path, save_to_disk=save_to_disk) - if isinstance(video_path, VideoReaderWrapper): - video_reader = video_path - else: - if isinstance(video_path, bytes): - video_path = io.BytesIO(video_path) - video_reader = VideoReaderWrapper(video_path, num_threads=1) - vlen = len(video_reader) - fps = video_reader.get_avg_fps() - duration = vlen / float(fps) - - video_meta = {"fps": fps, "duration": duration, "num_of_frame": vlen} - - return video_reader, video_meta, video_path - - -def get_frame_indices( - vlen, - target_frames=-1, - target_fps=-1, - frames_sample="middle", - fix_start=None, - input_fps=-1, -): - """ - 取出对应的frame index - """ - assert frames_sample in ["rand", "middle", "leading"] - if target_frames > 0: - assert target_fps <= 0, "target_fps must be negative if target_frames is given." - if target_frames > vlen: - acc_samples = vlen - data_processor_logger.info( - f"target_frames={target_frames} is larger than video length {vlen}, " - f"will sample {acc_samples} frames." - ) - else: - acc_samples = target_frames - data_processor_logger.debug(f"sampling at target_frames={target_frames}, frames_sample={frames_sample}") - - # split the video into `acc_samples` intervals, and sample from each interval. - intervals = np.linspace(start=0, stop=vlen, num=acc_samples + 1).astype(int) - ranges = [] - for idx, interv in enumerate(intervals[:-1]): - ranges.append((interv, intervals[idx + 1] - 1)) - if frames_sample == "rand": - try: - frame_indices = [random.choice(range(x[0], x[1])) for x in ranges] - except Exception: - frame_indices = np.random.permutation(vlen)[:acc_samples] - frame_indices.sort() - frame_indices = list(frame_indices) - elif fix_start is not None: - frame_indices = [x[0] + fix_start for x in ranges] - elif frames_sample == "leading": - frame_indices = [x[0] for x in ranges] - elif frames_sample == "middle": - frame_indices = [(x[0] + x[1]) // 2 for x in ranges] - else: - raise NotImplementedError - - elif target_fps > 0: - assert target_frames <= 0, "target_frames must be negative if target_fps is given." - assert input_fps > 0, "input_fps must be provided if target_fps is given." - data_processor_logger.info(f"sampling at fps={target_fps}, frames_sample={frames_sample}") - duration = float(vlen) / input_fps - delta = 1 / target_fps # gap between frames, this is also the clip length each frame represents - if frames_sample == "middle": - frame_seconds = np.arange(0 + delta / 2, duration + delta / 2, delta) - elif frames_sample == "leading": - frame_seconds = np.arange(0, duration, delta) - if frames_sample == "rand": - frame_seconds = np.arange(0 + delta / 2, duration + delta / 2, delta) - rand_offset = np.random.rand(*(frame_seconds.shape)) - 0.5 - frame_seconds += rand_offset * delta - frame_indices = np.around(frame_seconds * input_fps).astype(int) - frame_indices = [e for e in frame_indices if e < vlen] - - else: - raise ValueError("Must provide either positive target_fps or positive target_frames.") - - return frame_indices - - -def read_frames_decord( - video_path, - video_reader, - video_meta, - target_frames=-1, - target_fps=-1, - frames_sample="middle", - fix_start=None, - save_to_disk=False, - cache_dir=EXTRACTED_FRAME_DIR, - frame_indices=None, - tol=10, -): - """get frames by decord""" - - if frame_indices is None: - frame_indices = get_frame_indices( - video_meta["num_of_frame"], - target_frames=target_frames, - target_fps=target_fps, - frames_sample=frames_sample, - fix_start=fix_start, - input_fps=video_meta["fps"], - ) - - frames = [] - for frame_indice_index in range(0, len(frame_indices)): - frame_indice = frame_indices[frame_indice_index] - try: - frames.append(video_reader[frame_indice].asnumpy()) # (T, H, W, C) - except Exception as e: - data_processor_logger.debug(f"encounter error when get frame: {frame_indice}, error: {e}") - previous_counter = 1 - later_counter = 1 - previous_after_flag = True - if frame_indice == 0 or frame_indice == len(video_reader) - 1: - cur_tol = tol * 2 - else: - cur_tol = tol - while previous_counter < cur_tol or later_counter < cur_tol: - if previous_after_flag: - if frame_indice - previous_counter < 0: - previous_counter += 1 - previous_after_flag = not previous_after_flag - continue - try: - frames.append(video_reader[frame_indice - previous_counter].asnumpy()) - data_processor_logger.info( - f"replace {frame_indice}-th frame with {frame_indice-previous_counter}-th frame" - ) - frame_indices[frame_indice_index] = frame_indice - previous_counter - break - except Exception as e: - previous_counter += 1 - data_processor_logger.info(f"error: {e}") - else: - if frame_indice + later_counter >= len(video_reader): - later_counter += 1 - previous_after_flag = not previous_after_flag - continue - try: - frames.append(video_reader[frame_indice + later_counter].asnumpy()) - data_processor_logger.info( - f"replace {frame_indice}-th frame with {frame_indice+later_counter}-th frame" - ) - frame_indices[frame_indice_index] = frame_indice + later_counter - break - except Exception: - later_counter += 1 - previous_after_flag = not previous_after_flag - - frames = np.stack(frames, axis=0) - assert len(frames) == len(frame_indices), f"len(frames): {len(frames)} != len(frame_indices): {len(frame_indices)}" - - ret = [] - - url_sha1 = get_filename() - for idx, frame in enumerate(frames): - tmp = Image.fromarray(frame, "RGB") - if save_to_disk: - save_path = os.path.join(cache_dir, f"{url_sha1}", f"{idx}.png") - if not os.path.exists(os.path.dirname(save_path)): - os.makedirs(os.path.dirname(save_path)) - tmp.save(save_path) - tmp = save_path - ret.append(tmp) - - time_stamps = [frame_idx * video_meta["duration"] / video_meta["num_of_frame"] for frame_idx in frame_indices] - - return ret, frame_indices, time_stamps diff --git a/fastdeploy/input2/preprocess.py b/fastdeploy/input2/preprocess.py deleted file mode 100644 index 120be9ce88..0000000000 --- a/fastdeploy/input2/preprocess.py +++ /dev/null @@ -1,101 +0,0 @@ -""" -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -from typing import Any, Dict, Optional - -from fastdeploy.config import ErnieArchitectures -from fastdeploy.engine.config import ModelConfig -from fastdeploy.reasoning import ReasoningParserManager - - -class InputPreprocessor: - """ - Args: - model_name_or_path (str): - Model name or path to the pretrained model. If a model name is provided, it should be a - key in the Hugging Face Transformers' model registry (https://huggingface.co/models). - The model will be downloaded from the Hugging Face model hub if necessary. - If a path is provided, the model will be loaded from that path. - reasoning_parser (str, optional): - Reasoning parser type. Defaults to None. - Flag specifies the reasoning parser to use for extracting reasoning content from the model output - enable_mm (bool, optional): - Whether to use the multi-modal model processor. Defaults to False. - - Raises: - ValueError: - If the model name is not found in the Hugging Face Transformers' model registry and the path does not - exist. - """ - - def __init__( - self, - model_name_or_path: str, - reasoning_parser: str = None, - limit_mm_per_prompt: Optional[Dict[str, Any]] = None, - mm_processor_kwargs: Optional[Dict[str, Any]] = None, - enable_mm: bool = False, - ) -> None: - - self.model_name_or_path = model_name_or_path - self.reasoning_parser = reasoning_parser - self.enable_mm = enable_mm - self.limit_mm_per_prompt = limit_mm_per_prompt - self.mm_processor_kwargs = mm_processor_kwargs - - def create_processor(self): - """ - 创建数据处理器。如果启用了多模态注册表,则使用该表中的模型;否则,使用传递给构造函数的模型名称或路径。 - 返回值:DataProcessor(如果不启用多模态注册表)或MultiModalRegistry.Processor(如果启用多模态注册表)。 - - Args: - 无参数。 - - Returns: - DataProcessor or MultiModalRegistry.Processor (Union[DataProcessor, MultiModalRegistry.Processor]): 数据处理器。 - """ - reasoning_parser_obj = None - if self.reasoning_parser: - reasoning_parser_obj = ReasoningParserManager.get_reasoning_parser(self.reasoning_parser) - architectures = ModelConfig({"model": self.model_name_or_path}).architectures[0] - if not self.enable_mm: - if not ErnieArchitectures.contains_ernie_arch(architectures): - from fastdeploy.input.text_processor import DataProcessor - - self.processor = DataProcessor( - model_name_or_path=self.model_name_or_path, - reasoning_parser_obj=reasoning_parser_obj, - ) - else: - from fastdeploy.input.ernie_processor import ErnieProcessor - - self.processor = ErnieProcessor( - model_name_or_path=self.model_name_or_path, - reasoning_parser_obj=reasoning_parser_obj, - ) - else: - if not ErnieArchitectures.contains_ernie_arch(architectures): - raise ValueError(f"Model {self.model_name_or_path} is not a valid Ernie4_5_VL model.") - else: - from fastdeploy.input.ernie_vl_processor import ErnieMoEVLProcessor - - self.processor = ErnieMoEVLProcessor( - model_name_or_path=self.model_name_or_path, - limit_mm_per_prompt=self.limit_mm_per_prompt, - mm_processor_kwargs=self.mm_processor_kwargs, - reasoning_parser_obj=reasoning_parser_obj, - ) - return self.processor diff --git a/fastdeploy/input2/text_processor.py b/fastdeploy/input2/text_processor.py deleted file mode 100644 index cbaca990c5..0000000000 --- a/fastdeploy/input2/text_processor.py +++ /dev/null @@ -1,602 +0,0 @@ -""" -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -from abc import ABC, abstractmethod - -import numpy as np -from paddleformers.generation import GenerationConfig -from paddleformers.transformers import Llama3Tokenizer, LlamaTokenizer - -from fastdeploy import envs -from fastdeploy.utils import data_processor_logger - -_SAMPLING_EPS = 1e-5 - - -class BaseDataProcessor(ABC): - """base class for data processor""" - - def __init__(self): - """ - Returns: - None - """ - self.tokenizer = self._load_tokenizer() - self.tokenizer.bos_token_id = self.tokenizer._convert_token_to_id(self.tokenizer.bos_token) - self.tokenizer.cls_token_id = self.tokenizer._convert_token_to_id(self.tokenizer.cls_token) - self.tokenizer.sep_token_id = self.tokenizer._convert_token_to_id(self.tokenizer.sep_token) - self.tokenizer.eos_token_id = self.tokenizer._convert_token_to_id(self.tokenizer.eos_token) - self.tokenizer.mask_token_id = self.tokenizer._convert_token_to_id(self.tokenizer.mask_token) - data_processor_logger.info( - ( - f"tokenizer information: bos_token is {self.tokenizer.bos_token}, {self.tokenizer.bos_token_id}, ", - f"cls_token is {self.tokenizer.cls_token}, {self.tokenizer.cls_token_id}, " - f"sep_token is {self.tokenizer.sep_token}, {self.tokenizer.sep_token_id}, " - f"eos_token is {self.tokenizer.eos_token}, {self.tokenizer.eos_token_id}, " - f"mask_token is {self.tokenizer.mask_token}, {self.tokenizer.mask_token_id}", - ) - ) - - def _apply_default_parameters(self, request): - """ - Apply default value for parameters in request - """ - - def set_value(req, key, value): - value = getattr(self.generation_config, key, value) - if isinstance(req, dict): - if key not in req: - req[key] = value - else: - if req.get(key) is None: - req.set(key, value) - - set_value(request, "top_p", 0.7) - set_value(request, "temperature", 1.0) - set_value(request, "repetition_penalty", 1.0) - set_value(request, "frequency_penalty", 0.0) - set_value(request, "presence_penalty", 0.0) - return request - - @abstractmethod - def process_request(self, request, **kwargs): - """ - Preprocess the request - - Args: - request (Dict): may contain text and messages fields - **kwargs: others - - Returns: - bool: Whether preprocessing is successful - str: error message - """ - raise NotImplementedError - - @abstractmethod - def process_response(self, response_dict): - """ - Preprocess the response - - Args: - response_dict (Dict): response for engine, contain ids fields - - Returns: - Dict: response contain text fields - """ - raise NotImplementedError - - def text2ids(self, text, max_model_len=None): - """ - text to token ids - - Args: - text (str): text - - Returns: - List[int]: token ids list - """ - raise NotImplementedError - - def messages2ids(self, messages): - """ - Convert multi-turn messages into ID sequences. - - Args: - messages (List[List[Dict[str, Any]]]): multi-turn messages. - - Returns: - List[int]: ID sequences - """ - raise NotImplementedError - - def ids2tokens(self, token_id, task_id=None): - """ - token ids to strings - - Args: - token_id (List[int]): token id - task_id (str): task id - - Returns: - List[str]: strings - """ - raise NotImplementedError - - @abstractmethod - def _load_tokenizer(self): - """ - load tokenizer - - Returns: - tokenizer (AutoTokenizer) - """ - raise NotImplementedError - - -class DataProcessor(BaseDataProcessor): - def __init__(self, model_name_or_path, reasoning_parser_obj=None): - """ - Initializes the DecodeStatus object. - - Args: - model_name_or_path (str): The name or path of the pre-trained model to be loaded. - Can also be a path to a directory containing the pre-trained model file. - - Returns: - None. - - Raises: - None. - """ - - self.model_name_or_path = model_name_or_path - - # Generation config - try: - self.generation_config = GenerationConfig.from_pretrained(self.model_name_or_path) - except Exception as e: - data_processor_logger.warning( - f"Can't find generation config: {e}, so it will not use generation_config field in the model config" - ) - self.generation_config = None - - self.decode_status = dict() - self.tokenizer = self._load_tokenizer() - data_processor_logger.info( - f"tokenizer information: bos_token is {self.tokenizer.bos_token}, {self.tokenizer.bos_token_id}, \ - eos_token is {self.tokenizer.eos_token}, {self.tokenizer.eos_token_id} " - ) - - from paddleformers.trl.llm_utils import get_eos_token_id - - self.eos_token_ids = get_eos_token_id(self.tokenizer, self.generation_config) - self.eos_token_id_len = len(self.eos_token_ids) - self.pad_token_id = self.get_pad_id() - self.reasoning_parser = None - if reasoning_parser_obj: - self.reasoning_parser = reasoning_parser_obj(self.tokenizer) - self.tokenizer.pad_token_id = self.pad_token_id - - def process_request(self, request, max_model_len=None, **kwargs): - """ - Preprocess the request - - Args: - request (Dict): may contain text and messages fields - - Returns: - bool: Whether preprocessing is successful - str: error message - """ - request = self._apply_default_parameters(request) - if request.get("eos_token_ids") is None or len(request.eos_token_ids) == 0: - request.eos_token_ids = self.eos_token_ids - - stop_sequences = request.get("stop", []) - if stop_sequences is not None and len(stop_sequences) != 0: - stop_seqs, stop_seqs_len = self.update_stop_seq(stop_sequences) - request.set("stop_token_ids", stop_seqs) - request.set("stop_seqs_len", stop_seqs_len) - - if request.prompt_token_ids is None or len(request.prompt_token_ids) == 0: - if request.prompt is not None: - request.prompt_token_ids = self.text2ids(request.prompt, max_model_len) - elif request.messages is not None: - if self.tokenizer.chat_template is None: - raise ValueError("This model does not support chat_template.") - task = request.to_dict() - task["enable_thinking"] = kwargs.get("enable_thinking", True) - request.prompt_token_ids = self.messages2ids(task) - else: - raise ValueError(f"The request should have `input_ids`, `text` or `messages`: {request}.") - if len(request.prompt_token_ids) == 0: - raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") - if request.get("max_tokens") is None: - request.set( - "max_tokens", - max(1, max_model_len - len(request.prompt_token_ids)), - ) - if request.get("temperature") < _SAMPLING_EPS: - # zero temperature is equivalent to greedy sampling - request.set("temperature", 1) - if request.get("top_p") < _SAMPLING_EPS: - request.set("top_p", _SAMPLING_EPS) - data_processor_logger.info(f"Processed request {request}") - return request - - def process_request_dict(self, request, max_model_len=None, **kwargs): - """ - Preprocess the request - - Args: - request (Dict): may contain text and messages fields - - Returns: - bool: Whether preprocessing is successful - str: error message - """ - request = self._apply_default_parameters(request) - if not request.get("eos_token_ids"): - request["eos_token_ids"] = self.eos_token_ids - - # processing stop_sequences - stop_sequences = request.get("stop", []) - if stop_sequences: - stop_seqs, stop_seqs_len = self.update_stop_seq(stop_sequences) - request["stop_token_ids"] = stop_seqs - request["stop_seqs_len"] = stop_seqs_len - - data_processor_logger.info(f"Processing request {request}") - # processing prompt_token_ids - if not request.get("prompt_token_ids"): - if "prompt" in request: - request["prompt_token_ids"] = self.text2ids(request["prompt"], max_model_len).tolist() - elif "messages" in request: - if self.tokenizer.chat_template is None: - raise ValueError("This model does not support chat_template.") - request["prompt_token_ids"] = self.messages2ids(request) - else: - raise ValueError(f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}") - if len(request["prompt_token_ids"]) == 0: - raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") - if request.get("max_tokens") is None: - request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"])) - if request.get("temperature") < _SAMPLING_EPS: - # zero temperature is equivalent to greedy sampling - request["temperature"] = 1 - if request.get("top_p") < _SAMPLING_EPS: - request["top_p"] = _SAMPLING_EPS - data_processor_logger.info(f"Processed request {request}") - return request - - def process_logprob_response(self, token_ids, **kwargs): - full_text = self.tokenizer.decode(token_ids, **kwargs) - return full_text - - def process_response(self, response_dict, **kwargs): - """ - Preprocess the response - - Args: - response_dict (Dict): response for engine, contain ids fields - - Returns: - Dict: response contain text fields - """ - req_id = response_dict.request_id - token_ids = response_dict.outputs.token_ids - if token_ids[-1] == self.tokenizer.eos_token_id: - token_ids = token_ids[:-1] - full_text = self.tokenizer.decode(token_ids) - - # 模型支持思考,并且支持思考 - if self.reasoning_parser: - reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict) - response_dict.outputs.text = text - response_dict.outputs.reasoning_content = reasoning_content - else: - # 模型不支持思考,并且没单独设置enable_thinking为false - response_dict.outputs.text = full_text - data_processor_logger.info(f"req_id:{req_id}, token)ids: {token_ids}") - - return response_dict - - def process_response_dict_normal(self, response_dict, **kwargs): - """ - Preprocess the response - - Args: - response_dict (Dict): response for engine, contain ids fields - - Returns: - Dict: response contain text fields - """ - enable_thinking = kwargs.get("enable_thinking") - token_ids = response_dict["outputs"]["token_ids"] - is_end = response_dict["finished"] - req_id = response_dict["request_id"] - if is_end and len(token_ids) > 0 and not kwargs.get("include_stop_str_in_output"): - if token_ids[-1] == self.tokenizer.eos_token_id: - token_ids = token_ids[:-1] - delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id) - if is_end: - full_text = previous_texts + delta_text - if enable_thinking and self.reasoning_parser: - reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict) - response_dict["outputs"]["text"] = text - response_dict["outputs"]["reasoning_content"] = reasoning_content - else: - response_dict["outputs"]["text"] = full_text - data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}") - del self.decode_status[req_id] - return response_dict - - def process_response_dict_streaming(self, response_dict, **kwargs): - """ - Preprocess the response - - Args: - response_dict (Dict): response for engine, contain ids fields - - Returns: - Dict: response contain text fields - """ - enable_thinking = kwargs.get("enable_thinking") - is_end = response_dict["finished"] - req_id = response_dict["request_id"] - token_ids = response_dict["outputs"]["token_ids"] - - if is_end and len(token_ids) > 0 and not kwargs.get("include_stop_str_in_output"): - if token_ids[-1] == self.tokenizer.eos_token_id: - token_ids = token_ids[:-1] - delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id) - - if enable_thinking and self.reasoning_parser: - reasoning_content, text = self.reasoning_parser.extract_reasoning_content_streaming( - previous_texts, - previous_texts + delta_text, - delta_text, - previous_token_ids, - previous_token_ids + token_ids, - token_ids, - ) - response_dict["outputs"]["text"] = text - response_dict["outputs"]["reasoning_content"] = reasoning_content - else: - response_dict["outputs"]["text"] = delta_text - if is_end: - data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}") - del self.decode_status[req_id] - return response_dict - - def process_response_dict(self, response_dict, **kwargs): - """ - Preprocess the response - - Args: - response_dict (Dict): response for engine, contain ids fields - - Returns: - Dict: response contain text fields - """ - enable_thinking = kwargs.pop("enable_thinking", True) - if enable_thinking is None: - enable_thinking = True - stream = kwargs.get("stream", True) - if stream: - return self.process_response_dict_streaming(response_dict, enable_thinking=enable_thinking, **kwargs) - else: - return self.process_response_dict_normal( - response_dict=response_dict, - enable_thinking=enable_thinking, - **kwargs, - ) - - def text2ids(self, text, max_model_len): - """ - text to token ids - - Args: - text (str): text - - Returns: - List[int]: token ids list - """ - if envs.FD_USE_HF_TOKENIZER: - tokens = self.tokenizer( - text, - return_tensors="np", - padding=True, - truncation=True, - ) - else: - text = [text] if isinstance(text, str) else text - - tokens = self.tokenizer( - text, - return_tensors="np", - padding=True, - truncation=True, - max_length=max_model_len, - add_special_tokens=False, - ) - - return tokens["input_ids"][0] - - def messages2ids(self, request): - """ - Convert multi-turn messages into ID sequences. - - Args: - messages (List[List[Dict[str, Any]]]): multi-turn messages. - - Returns: - List[int]: ID sequences - """ - - spliced_message = self.tokenizer.apply_chat_template( - request, - tokenize=False, - split_special_tokens=False, - add_special_tokens=False, - return_tensors="pd", - ) - req_id = None - tokens = self.tokenizer.tokenize(spliced_message) - if isinstance(request, dict): - req_id = request.get("request_id", None) - token_ids = self.tokenizer.convert_tokens_to_ids(tokens) - data_processor_logger.info(f"req_id:{req_id}, tokens:{tokens}, token_ids: {token_ids}") - return token_ids - - def ids2tokens(self, token_id, task_id): - """ - token ids to strings - - Args: - token_ids (List[int]): token ids - task_id (str): task id - - Returns: - List[str]: strings - """ - if envs.FD_USE_HF_TOKENIZER: - if task_id not in self.decode_status: - # history token ids & history token strings & befer decode str - self.decode_status[task_id] = [[], [], ""] - - previous_token_ids = self.decode_status[task_id][0] - decode_str = self.tokenizer.batch_decode( - [previous_token_ids + token_id], - skip_special_tokens=True, - clean_up_tokenization_spaces=False, - ) - if isinstance(decode_str, list) and len(decode_str): - new_str = decode_str[0].replace(self.decode_status[task_id][2], "", 1) - self.decode_status[task_id][1].append(new_str) - self.decode_status[task_id][2] = decode_str[0] - else: - new_str = "" - self.decode_status[task_id][0] += token_id - return new_str - else: - if task_id not in self.decode_status: - # prefix offset & read offset & history token ids & history token strings - self.decode_status[task_id] = [0, 0, [], ""] - - prefix_offset = self.decode_status[task_id][0] - read_offset = self.decode_status[task_id][1] - previous_token_ids = self.decode_status[task_id][2] - previous_texts = self.decode_status[task_id][3] - decode_str, prefix_offset, read_offset = self.tokenizer.decode_token( - previous_token_ids + token_id, prefix_offset, read_offset - ) - self.decode_status[task_id][0] = prefix_offset - self.decode_status[task_id][1] = read_offset - self.decode_status[task_id][2] += token_id - self.decode_status[task_id][3] += decode_str - - return decode_str, previous_token_ids, previous_texts - - def _load_tokenizer(self): - """ - load tokenizer - - Returns: - tokenizer (AutoTokenizer) - """ - if envs.FD_USE_HF_TOKENIZER: - from transformers import AutoTokenizer - - return AutoTokenizer.from_pretrained(self.model_name_or_path, use_fast=False) - else: - from paddleformers.transformers import AutoTokenizer - - return AutoTokenizer.from_pretrained(self.model_name_or_path, padding_side="left", use_fast=True) - - def clear_request_status(self, task_id): - """ - clear request status - - Args: - task_id (str): task id - - Returns: - results_all (str): all token strings - """ - results_all = "" - if task_id in self.decode_status: - if envs.FD_USE_HF_TOKENIZER: - results_all = self.decode_status[task_id][2] - else: - results_all = "".join(self.decode_status[task_id][3]) - del self.decode_status[task_id] - return results_all - - def get_pad_id(self): - """ - get pad_token_id, if not pad_token_id, use eos_token - - Returns: - int: pad_token_id - """ - if isinstance(self.tokenizer, (LlamaTokenizer, Llama3Tokenizer)) and not self.tokenizer.pad_token_id: - return self.tokenizer.eos_token - return self.tokenizer.pad_token_id - - def pad_batch_data( - self, - insts, - pad_id=0, - return_seq_len=False, - return_array=True, - pad_style="right", - ): - """Pad the instances to the max sequence length in batch.""" - if len(insts) == 0: - padded_insts = np.array([[]], dtype=np.int64) if return_array else [[]] - if return_seq_len: - seq_len = np.array([], dtype=np.int64) if return_array else [] - return padded_insts, seq_len - return padded_insts - - max_len = max(map(len, insts)) - if pad_style == "left": - padded_insts = [[pad_id] * (max_len - len(inst)) + list(inst) for inst in insts] - else: - padded_insts = [list(inst) + [pad_id] * (max_len - len(inst)) for inst in insts] - if return_array: - padded_insts = np.array(padded_insts, dtype=np.int64).reshape([-1, max_len]) - - if return_seq_len: - seq_len = [len(inst) for inst in insts] - if return_array: - seq_len = np.array(seq_len, dtype=np.int64).reshape(-1, 1) - return padded_insts, seq_len - return padded_insts - - def update_stop_seq(self, stop_sequences): - """ - Update stop sequences from request. - """ - stop_seqs = [] - for seq in stop_sequences: - if seq != self.tokenizer.eos_token_id: - stop_seqs.append(self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(seq))) - stop_seqs, stop_seqs_len = self.pad_batch_data(stop_seqs, pad_id=-1, return_seq_len=True, return_array=False) - data_processor_logger.debug(f"processed stop_seqs: {stop_seqs}, {stop_seqs_len}") - return stop_seqs, stop_seqs_len From 3ced6b580026579069e111dd6303e06288bf3e73 Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Fri, 15 Aug 2025 15:21:39 +0800 Subject: [PATCH 08/16] position_ids --- fastdeploy/input/qwen_mm_processor/process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/input/qwen_mm_processor/process.py b/fastdeploy/input/qwen_mm_processor/process.py index 7909ae2105..70fd5c1d10 100644 --- a/fastdeploy/input/qwen_mm_processor/process.py +++ b/fastdeploy/input/qwen_mm_processor/process.py @@ -268,7 +268,7 @@ def _compute_vision_positions(self, start_pos: int, t: int, h: int, w: int, seco tn = np.arange(t).reshape(-1, 1) tn = np.broadcast_to(tn, (t, h * w)) - tn = tn * second_per_grid_t * self.tokens_per_second + tn = tn * int(second_per_grid_t) * self.tokens_per_second t_index = tn.flatten() hn = np.arange(h).reshape(1, -1, 1) From 70b77d8d45bf3b46d10745e59c1384675bbd35f7 Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Fri, 15 Aug 2025 15:23:39 +0800 Subject: [PATCH 09/16] add test for qwen25-vl --- mytest/diff.py | 50 ++++++++++++++++++++++++++++ mytest/paddle_tokenizer.py | 67 ++++++++++++++++++++++++++++++++++++++ mytest/vision.py | 30 +++++++++++++++++ 3 files changed, 147 insertions(+) create mode 100644 mytest/diff.py create mode 100644 mytest/paddle_tokenizer.py create mode 100644 mytest/vision.py diff --git a/mytest/diff.py b/mytest/diff.py new file mode 100644 index 0000000000..b0b465930c --- /dev/null +++ b/mytest/diff.py @@ -0,0 +1,50 @@ +import json +import numpy as np + + +""" +dd if=out/pixel_values_videos.json bs=1 skip=2000 count=100 + + +dd if=文件名 bs=1 skip=起始位置 count=10 2>/dev/null + - bs=1: 每次读取 1 字节(确保精确跳转)。 + - skip=N: 跳过前 N 个字节(从 N+1 字节开始读取)。 + - count=10: 读取 10 个字节。 + - 2>/dev/null: 屏蔽 dd 的警告信息。 +""" + + +def load_numpy(filename): + with open(filename, "r") as f: + return np.array(json.loads(f.read())) + + +def main(): + token_ids_1 = load_numpy("out/token_ids.json") + token_ids_2 = load_numpy("../llm/out/token_ids.json")[0] + + diff_indices = np.where(token_ids_1 != token_ids_2) + print(diff_indices) + + + # pixel_1 = load_numpy("out/pixel.json") + # pixel_2 = load_numpy("../llm/out/pixel.json") + + # diff_indices = np.where(pixel_1 != pixel_2) + # print(diff_indices) + + # video_pixel_1 = load_numpy("out/pixel_values_videos.json") + # video_pixel_2 = load_numpy("../llm/out/pixel_values_videos.json") + + # diff_indices = np.where(video_pixel_1 != video_pixel_2) + # print(diff_indices) + + position_ids_1 = load_numpy("out/position_ids.json") + position_ids_2 = load_numpy("../llm/out/position_ids.json") + + diff_indices = np.where(position_ids_1 != position_ids_2) + print(diff_indices) + + +if __name__=="__main__": + main() \ No newline at end of file diff --git a/mytest/paddle_tokenizer.py b/mytest/paddle_tokenizer.py new file mode 100644 index 0000000000..0814a5bed7 --- /dev/null +++ b/mytest/paddle_tokenizer.py @@ -0,0 +1,67 @@ +from fastdeploy.engine.request import Request +from fastdeploy.input.preprocess import InputPreprocessor +import json + + +def main(): + model_name = "./data/models/paddle/Qwen2.5-VL-3B-Instruct" + # model_name = "./data/models/paddle/ERNIE-4.5-0.3B-Paddle" + input_processor = InputPreprocessor( + model_name_or_path=model_name, + reasoning_parser=None, + limit_mm_per_prompt=None, + mm_processor_kwargs=None, + enable_mm=True + ) + data_processor = input_processor.create_processor() + + prompt = { + "request_id": "123", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": "file:///home/liudongdong/github/FastDeploy/data/images/demo.jpeg"} + }, + { + "type": "video_url", + "video_url": {"url": "file:///home/liudongdong/github/FastDeploy/data/images/3_frame_video.mp4"} + }, + { + "type": "text", + "text": "Describe image and video." + }, + ] + } + ] + } + + request = Request.from_dict(prompt) + result = data_processor.process_request(request, 1024*100) + + print(result) + + with open("out/token_ids.json", "w") as f: + f.write(json.dumps(result.prompt_token_ids)) + + # with open("out/pixel.json", "w") as f: + # f.write(json.dumps(result.multimodal_inputs["images"].tolist())) + + # with open("out/grid_thw.json", "w") as f: + # f.write(json.dumps(result.multimodal_inputs["grid_thw"].tolist())) + + # with open("out/pixel_values_videos.json", "w") as f: + # f.write(json.dumps(result.multimodal_inputs["images"].tolist())) + + # with open("out/video_grid_thw.json", "w") as f: + # f.write(json.dumps(result.multimodal_inputs["grid_thw"].tolist())) + + with open("out/position_ids.json", "w") as f: + f.write(json.dumps(result.multimodal_inputs["position_ids"].tolist())) + + +if __name__ == "__main__": + main() + diff --git a/mytest/vision.py b/mytest/vision.py new file mode 100644 index 0000000000..87678dc65c --- /dev/null +++ b/mytest/vision.py @@ -0,0 +1,30 @@ +from fastdeploy.engine.sampling_params import SamplingParams +from fastdeploy.entrypoints.llm import LLM + +model_name_or_path = "./models/Qwen2-7B-Instruct" + +IMAGE_PLACEHOLDER = "<|image@placeholder|>" + +# 超参设置 +sampling_params = SamplingParams(temperature=0.1, max_tokens=30) +llm = LLM(model=model_name_or_path, tensor_parallel_size=1) +prompt = { + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": "file://mytest/images/demo.jpeg" + }, + { + "type": "text", + "text": "Describe this image." + }, + ] + } + ] +} +output = llm.generate(prompts="who are you?", use_tqdm=True, sampling_params=sampling_params) + +print(output) From 9e64d5c5cb5c8b07c6cd01b70eaf8c33fe6fc72e Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Fri, 15 Aug 2025 16:09:39 +0800 Subject: [PATCH 10/16] organize comments --- .../qwen_mm_processor/image_processor.py | 253 +++++++++--------- fastdeploy/input/qwen_mm_processor/process.py | 216 +++++++++++---- .../input/qwen_mm_processor/process_video.py | 109 +++++++- fastdeploy/input/qwen_vl_processor.py | 181 ++++++++++--- 4 files changed, 545 insertions(+), 214 deletions(-) diff --git a/fastdeploy/input/qwen_mm_processor/image_processor.py b/fastdeploy/input/qwen_mm_processor/image_processor.py index e7397bcaa2..3fd5403912 100644 --- a/fastdeploy/input/qwen_mm_processor/image_processor.py +++ b/fastdeploy/input/qwen_mm_processor/image_processor.py @@ -1,5 +1,5 @@ """ -# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ # limitations under the License. """ -"""image preprocessor adaptive""" import math from typing import List, Optional, Union @@ -66,17 +65,44 @@ def round_by_factor(number: int, factor: int) -> int: - """Returns the closest integer to 'number' that is divisible by 'factor'.""" + """ + Round number to nearest multiple of factor. + + Args: + number: Input number to round + factor: Rounding factor + + Returns: + int: Rounded number + """ return round(number / factor) * factor def ceil_by_factor(number: int, factor: int) -> int: - """Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'.""" + """ + Round number up to nearest multiple of factor. + + Args: + number: Input number to round + factor: Rounding factor + + Returns: + int: Rounded number + """ return math.ceil(number / factor) * factor def floor_by_factor(number: int, factor: int) -> int: - """Returns the largest integer less than or equal to 'number' that is divisible by 'factor'.""" + """ + Round number down to nearest multiple of factor. + + Args: + number: Input number to round + factor: Rounding factor + + Returns: + int: Rounded number + """ return math.floor(number / factor) * factor @@ -89,13 +115,21 @@ def smart_resize( max_ratio: int = 200 ): """ - Rescales the image so that the following conditions are met: - - 1. Both dimensions (height and width) are divisible by 'factor'. - - 2. The total number of pixels is within the range ['min_pixels', 'max_pixels']. - - 3. The aspect ratio of the image is maintained as closely as possible. + Smart image resizing that maintains aspect ratio and respects constraints. + + Args: + height: Original image height + width: Original image width + factor: Patch size factor + min_pixels: Minimum allowed pixels + max_pixels: Maximum allowed pixels + max_ratio: Maximum allowed aspect ratio + + Returns: + tuple: (new_height, new_width) + + Raises: + ValueError: If calculated dimensions are invalid """ if max(height, width) / min(height, width) > max_ratio: if height > width: @@ -132,7 +166,13 @@ def smart_resize( def is_scaled_image(image: np.ndarray) -> bool: """ - Checks to see whether the pixel values have already been rescaled to [0, 1]. + Check if image pixel values are already normalized to [0, 1] range. + + Args: + image: Input image array + + Returns: + bool: True if image is already scaled """ if image.dtype == np.uint8: return False @@ -142,33 +182,11 @@ def is_scaled_image(image: np.ndarray) -> bool: class ImageProcessor(BaseImageProcessor): - r""" - Constructs a adaptive image processor that dynamically resizes images based on the original images. - - Args: - resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`): - Resampling filter to use when resizing the image. - do_rescale (`bool`, *optional*, defaults to `True`): - Whether to rescale the image by the specified scale `rescale_factor`. - rescale_factor (`int` or `float`, *optional*, defaults to `1/255`): - Scale factor to use if rescaling the image. - do_normalize (`bool`, *optional*, defaults to `True`): - Whether to normalize the image. - image_mean (`float` or `List[float]`, *optional*, defaults to `[0.48145466, 0.4578275, 0.40821073]`): - Mean to use if normalizing the image. This is a float or list of floats for each channel in the image. - image_std (`float` or `List[float]`, *optional*, defaults to `[0.26862954, 0.26130258, 0.27577711]`): - Standard deviation to use if normalizing the image. This is a float or list of floats for each channel - in the image. - min_pixels (`int`, *optional*, defaults to `56 * 56`): - The min pixels of the image to resize the image. - max_pixels (`int`, *optional*, defaults to `28 * 28 * 1280`): - The max pixels of the image to resize the image. - patch_size (`int`, *optional*, defaults to 14): - The spacial patch size of the vision encoder. - temporal_patch_size (`int`, *optional*, defaults to 2): - The temporal conv size in resampler. - merge_size (`int`, *optional*, defaults to 2): - The merge size of the vision encoder to llm encoder. + """ + Adaptive image processor for dynamic image resizing and preprocessing. + + This processor handles image resizing, rescaling, normalization and format conversion. + It dynamically adjusts image dimensions based on original size and specified constraints. """ def __init__( @@ -186,7 +204,23 @@ def __init__( resample: PILImageResampling = PILImageResampling.BICUBIC, **kwargs, ) -> None: - """init""" + """ + Initialize image processor with configuration parameters. + + Args: + patch_size (int): Spatial patch size for vision encoder + merge_size (int): Merge size between vision and LLM encoders + temporal_patch_size (int): Temporal patch size for video processing + min_pixels (int): Minimum allowed pixels in resized image + max_pixels (int): Maximum allowed pixels in resized image + image_mean (float/list): Mean values for normalization per channel + image_std (float/list): Std values for normalization per channel + rescale_factor (float): Scaling factor for pixel values (default 1/255) + do_rescale (bool): Whether to rescale images + do_normalize (bool): Whether to normalize images + resample: Resampling method for image resizing + **kwargs: Additional base class arguments + """ super().__init__(**kwargs) self.patch_size = patch_size self.merge_size = merge_size @@ -218,39 +252,25 @@ def _preprocess( input_data_format: Optional[Union[str, ChannelDimension]], ): """ - Preprocess an image or batch of images. Copy of the `preprocess` method from `CLIPImageProcessor`. - + Internal method for image preprocessing pipeline. + Args: - images (`ImageInput`): - Image or batch of images to preprocess. Expects pixel values ranging from 0 to 255. - If pixel values range from 0 to 1, set `do_rescale=False`. - vision_info (`List[Dict]`, *optional*): - Optional list of dictionaries containing additional information about vision inputs. - resample (`PILImageResampling`, *optional*, defaults to `self.resample`): - Resampling filter to use if resizing the image. This can be one of the `PILImageResampling` enums. - do_rescale (`bool`, *optional*, defaults to `self.do_rescale`): - Whether to rescale the image. - rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`): - Scale factor to use if rescaling the image. - do_normalize (`bool`, *optional*, defaults to `self.do_normalize`): - Whether to normalize the image. - image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`): - Mean to use if normalizing the image. - Can be a float or a list of floats corresponding to the number of channels in the image. - image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`): - Standard deviation to use if normalizing the image. - Can be a float or a list of floats corresponding to the number of channels in the image. - data_format (`ChannelDimension`, *optional*, defaults to `ChannelDimension.FIRST`): - The channel dimension format for the output image. Can be one of: - - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. - - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. - - Unset: Use the channel dimension format of the input image. - input_data_format (`ChannelDimension` or `str`, *optional*): - The channel dimension format for the input image. Can be one of: - - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. - - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. - - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. - - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. + images: Input image or batch of images + min_pixels: Minimum allowed pixels in output + max_pixels: Maximum allowed pixels in output + image_mean: Normalization mean values + image_std: Normalization std values + rescale_factor: Pixel value scaling factor + do_rescale: Whether to rescale pixel values + do_normalize: Whether to normalize pixel values + resample: Resampling method + data_format: Output channel format + input_data_format: Input channel format + + Returns: + tuple: (flatten_patches, grid_dimensions) + - flatten_patches: Flattened image patches + - grid_dimensions: Grid dimensions [t, h, w] """ images = make_list_of_images(images) @@ -266,11 +286,12 @@ def _preprocess( # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) + # Get original dimensions and calculate optimal resize dimensions height, width = get_image_size(images[0], channel_dim=input_data_format) resized_height, resized_width = smart_resize( height, width, - factor=self.patch_size * self.merge_size, + factor=self.patch_size * self.merge_size, # Combine patch and merge factors min_pixels=min_pixels, max_pixels=max_pixels, ) @@ -278,8 +299,9 @@ def _preprocess( processed_images = [] for image in images: if height != resized_height or width != resized_width: - image = image.astype("uint8") # TODO : 需要手动加上,否则多除255 导致结果会出错 - # 直接fromarray,不要靠paddleformers里面的 + # Convert to uint8 before resizing to avoid double scaling + image = image.astype("uint8") + # Convert to PIL Image and resize image = Image.fromarray(image) image = resize( image, @@ -289,9 +311,10 @@ def _preprocess( ) if do_rescale and do_normalize: + # Adjust mean and std for combined rescale+normalize image_mean = np.array(image_mean, dtype=np.float32) * (1.0 / rescale_factor) image_std = np.array(image_std, dtype=np.float32) * (1.0 / rescale_factor) - do_rescale = False + do_rescale = False # Skip separate rescale step if do_rescale: image = image.astype("float32") @@ -309,15 +332,21 @@ def _preprocess( image = to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) # [C, H, W] processed_images.append(image) + # Convert processed images to numpy array patches = np.array(processed_images) + + # Pad temporal dimension if needed if patches.shape[0] % self.temporal_patch_size != 0: repeats = np.repeat( - patches[-1][np.newaxis], self.temporal_patch_size - (patches.shape[0] % self.temporal_patch_size), axis=0 + patches[-1][np.newaxis], + self.temporal_patch_size - (patches.shape[0] % self.temporal_patch_size), + axis=0 ) patches = np.concatenate([patches, repeats], axis=0) + # Convert to channels-first format if needed if data_format == ChannelDimension.LAST: - patches = patches.transpose([0, 3, 1, 2]) + patches = patches.transpose([0, 3, 1, 2]) # [N, H, W, C] -> [N, C, H, W] grid_t, channel = patches.shape[:2] grid_t = grid_t // self.temporal_patch_size @@ -326,6 +355,7 @@ def _preprocess( resized_height // self.patch_size, resized_width // self.patch_size, ) + # Reshape into hierarchical patch structure patches = patches.reshape( [ grid_t, @@ -339,7 +369,8 @@ def _preprocess( self.patch_size, ] ) - # [grid_t, temporal_patch_size, grid_h/merge_size, grid_w/merge_size, merge_size, merge_size, C, psz, psz] + # Reorder dimensions for better memory access pattern + # [grid_t, grid_h/merge_size, grid_w/merge_size, merge_size, merge_size, C, temporal_patch_size, psz, psz] patches = patches.transpose([0, 3, 6, 4, 7, 2, 1, 5, 8]) flatten_patches = patches.reshape( @@ -367,47 +398,29 @@ def preprocess( input_data_format: Optional[Union[str, ChannelDimension]] = ChannelDimension.LAST, ): """ + Main preprocessing method for images/videos. + Args: - images (`ImageInput`): - Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If - passing in images with pixel values between 0 and 1, set `do_rescale=False`. - videos (`VideoInput`): - Video to preprocess. Expects a single or batch of videos with pixel values ranging from 0 to 255. If - passing in videos with pixel values between 0 and 1, set `do_rescale=False`. - size (`Dict[str, int]`, *optional*, defaults to `self.size`): - Size of the image after resizing. Shortest edge of the image is resized to size["shortest_edge"], with - the longest edge resized to keep the input aspect ratio. - resample (`int`, *optional*, defaults to `self.resample`): - Resampling filter to use if resizing the image. This can be one of the enum `PILImageResampling`. Only - has an effect if `do_resize` is set to `True`. - do_rescale (`bool`, *optional*, defaults to `self.do_rescale`): - Whether to rescale the image. - rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`): - Rescale factor to rescale the image by if `do_rescale` is set to `True`. - do_normalize (`bool`, *optional*, defaults to `self.do_normalize`): - Whether to normalize the image. - image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`): - Image mean to use for normalization. Only has an effect if `do_normalize` is set to `True`. - image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`): - Image standard deviation to use for normalization. Only has an effect if `do_normalize` is set to - `True`. - return_tensors (`str` or `TensorType`, *optional*): - The type of tensors to return. Can be one of: - - Unset: Return a list of `np.ndarray`. - - `TensorType.PADDLE` or `'pt'`: Return a batch of type `torch.Tensor`. - - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`. - data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`): - The channel dimension format for the output image. Can be one of: - - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. - - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. - - Unset: Use the channel dimension format of the input image. - input_data_format (`ChannelDimension` or `str`, *optional*): - The channel dimension format for the input image. If unset, the channel dimension format is inferred - from the input image. Can be one of: - - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. - - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. - - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. - + images: Input image/video data + min_pixels: Override for minimum pixels + max_pixels: Override for maximum pixels + image_mean: Override for normalization mean + image_std: Override for normalization std + rescale_factor: Override for rescaling factor + do_rescale: Override for rescaling flag + do_normalize: Override for normalization flag + resample: Override for resampling method + return_tensors: Desired output tensor format + data_format: Output channel dimension format + input_data_format: Input channel dimension format + + Returns: + BatchFeature: Processed features containing: + - pixel_values: Preprocessed pixel data + - grid_thw: Grid dimensions [temporal, height, width] + + Raises: + ValueError: For invalid image types or dimensions """ min_pixels = min_pixels if min_pixels is not None else self.min_pixels max_pixels = max_pixels if max_pixels is not None else self.max_pixels diff --git a/fastdeploy/input/qwen_mm_processor/process.py b/fastdeploy/input/qwen_mm_processor/process.py index 70fd5c1d10..44676045a7 100644 --- a/fastdeploy/input/qwen_mm_processor/process.py +++ b/fastdeploy/input/qwen_mm_processor/process.py @@ -15,7 +15,7 @@ # limitations under the License. """ -""" process.py """ + from typing import Any, Dict, List, Union import numpy as np from PIL import Image @@ -30,8 +30,20 @@ class DataProcessor: """ - Processes multimodal chat messages into model-ready inputs, - handling text, images, and videos with 3D positional embeddings. + Processes multimodal inputs (text, images, videos) into model-ready formats. + + Handles: + - Tokenization of text with special tokens for visual content + - Image and video preprocessing + - Generation of 3D positional embeddings + - Conversion of chat messages to model inputs + + Attributes: + tokenizer: Text tokenizer instance + image_processor: Image/video preprocessor + image_token: Special token for image placeholders + video_token: Special token for video placeholders + vision_start: Token marking start of visual content """ def __init__( @@ -42,13 +54,23 @@ def __init__( tokens_per_second: int = 2, **kwargs, ) -> None: + """ + Initialize the data processor. + + Args: + model_path: Path to pretrained model + video_min_frames: Minimum frames to sample from videos + video_max_frames: Maximum frames to sample from videos + tokens_per_second: Temporal resolution for positional embeddings + **kwargs: Additional configuration + """ self.min_frames = video_min_frames self.max_frames = video_max_frames - # Tokenizer and image preprocessor + # Initialize tokenizer with left padding and fast tokenizer self.tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="left", use_fast=True) - self.tokenizer.ignored_index = -100 - self.image_processor = ImageProcessor.from_pretrained(model_path) + self.tokenizer.ignored_index = -100 # Set ignored index for loss calculation + self.image_processor = ImageProcessor.from_pretrained(model_path) # Initialize image processor # Convolution sizes for patch aggregation self.spatial_conv_size = self.image_processor.merge_size @@ -75,8 +97,21 @@ def __init__( def text2ids(self, text, images=None, videos=None): """ - Convert chat text into model inputs. - Returns a dict with input_ids, token_type_ids, position_ids, images, grid_thw, image_type_ids, labels. + Convert text with image/video placeholders into model inputs. + + Args: + text: Input text with <|image@placeholder|> and <|video@placeholder|> markers + images: List of PIL Images corresponding to image placeholders + videos: List of video data corresponding to video placeholders + + Returns: + Dict containing: + - input_ids: Token IDs + - token_type_ids: Type identifiers (text/image/video) + - position_ids: 3D positional embeddings + - images: Preprocessed visual features + - grid_thw: Spatial/temporal dimensions + - image_type_ids: Visual content type (0=image, 1=video) """ outputs = { @@ -92,17 +127,21 @@ def text2ids(self, text, images=None, videos=None): "video_cnt": 0, } + # Define placeholders and their lengths IMAGE_PLACEHOLDER = "<|image@placeholder|>" - VIDEO_PLACEHOLDER = "<|video@placeholder|>" + VIDEO_PLACEHOLDER = "<|video@placeholder|>" IMAGE_PLACEHOLDER_LEN = len(IMAGE_PLACEHOLDER) VIDEO_PLACEHOLDER_LEN = len(VIDEO_PLACEHOLDER) - st, image_idx, video_idx = 0, 0, 0 + + # Initialize tracking variables for text parsing + st, image_idx, video_idx = 0, 0, 0 # Start position, image counter, video counter while st < len(text): + # Find next image or video placeholder in text image_pos = text.find(IMAGE_PLACEHOLDER, st) - image_pos = len(text) if image_pos == -1 else image_pos + image_pos = len(text) if image_pos == -1 else image_pos # Set to end if not found video_pos = text.find(VIDEO_PLACEHOLDER, st) - video_pos = len(text) if video_pos == -1 else video_pos - ed = min(image_pos, video_pos) + video_pos = len(text) if video_pos == -1 else video_pos # Set to end if not found + ed = min(image_pos, video_pos) # End position is first placeholder found self._add_text(text[st:ed], outputs) if ed == len(text): @@ -129,8 +168,16 @@ def request2ids( self, request: Dict[str, Any], tgts: List[str] = None ) -> Dict[str, Union[np.ndarray, List[np.ndarray], None]]: """ - Convert chat messages into model inputs. - Returns a dict with input_ids, token_type_ids, position_ids, images, grid_thw, image_type_ids, labels. + Convert chat request with multimodal messages into model inputs. + + Args: + request: Dictionary containing: + - messages: List of chat messages with text/image/video content + - request_id: Unique identifier for logging + tgts: Optional target sequences + + Returns: + Dict with same structure as text2ids() output """ outputs = { @@ -146,19 +193,22 @@ def request2ids( "video_cnt": 0, } + # Parse and validate chat messages messages = parse_chat_messages(request.get("messages")) - image_message_list = [] + image_message_list = [] # Store visual content messages + for msg in messages: role = msg.get("role") assert role in self.role_prefixes, f"Unsupported role: {role}" + + # Normalize content to list format content_items = msg.get("content") if not isinstance(content_items, list): content_items = [content_items] + + # Collect all visual content items for item in content_items: - if isinstance(item, dict) and item.get("type") in [ - "image", - "video", - ]: + if isinstance(item, dict) and item.get("type") in ["image", "video"]: image_message_list.append(item) request["messages"] = messages @@ -187,16 +237,6 @@ def request2ids( if video_bytes is None: continue frames, meta = self._load_and_process_video(video_bytes, image_message) - # ----------- - # from fastdeploy.entrypoints.chat_utils import MultiModalPartParser - # mock_frames = [] - # mm_parser = MultiModalPartParser() - # fimg = mm_parser.parse_image("file:///home/liudongdong/github/llm/data/images/demo.jpeg") - # for i in range(frames.shape[0]): - # mock_frames.append(fimg.copy()) - # mock_frames = np.stack([np.array(f.convert("RGB")) for f in mock_frames], axis=0) - # meta["fps"] = 3.0 - # frames = mock_frames outputs["video_cnt"] += 1 self._add_video(frames, meta, outputs) @@ -207,6 +247,17 @@ def request2ids( return outputs def _add_text(self, tokens, outputs: Dict) -> None: + """ + Add text tokens to model inputs dictionary. + + Args: + tokens: Text string or already tokenized IDs + outputs: Dictionary accumulating model inputs + + Note: + - Handles both raw text and pre-tokenized inputs + - Updates position IDs for 3D embeddings + """ if isinstance(tokens, str): tokens = self.tokenizer.encode(tokens, add_special_tokens=False)["input_ids"] @@ -218,12 +269,34 @@ def _add_text(self, tokens, outputs: Dict) -> None: outputs["cur_position"] = position_ids.max() + 1 def _compute_text_positions(self, start_pos: int, num_tokens: int) -> np.ndarray: + """ + Generate 3D positional embeddings for text tokens. + + Args: + start_pos: Starting position index + num_tokens: Number of tokens to generate positions for + + Returns: + numpy.ndarray: 3D position IDs shaped (3, num_tokens) + """ text_array = np.arange(num_tokens).reshape(1, -1) text_index = np.broadcast_to(text_array, (3, num_tokens)) position = text_index + start_pos return position def _add_image(self, img, outputs: Dict) -> None: + """ + Add image data to model inputs dictionary. + + Args: + img: PIL Image to process + outputs: Dictionary accumulating model inputs + + Note: + - Preprocesses image and calculates spatial dimensions + - Adds image token IDs and type markers + - Generates appropriate position embeddings + """ ret = self.image_processor.preprocess(images=[img.convert("RGB")]) num_tokens = ret["grid_thw"].prod() // self.image_processor.merge_size**2 grid_thw = ret["grid_thw"].tolist() @@ -242,6 +315,18 @@ def _add_image(self, img, outputs: Dict) -> None: outputs["cur_position"] = position_ids.max() + 1 def _add_video(self, frames, meta: Dict, outputs: Dict) -> None: + """ + Add video data to model inputs dictionary. + + Args: + frames: Video frames as numpy array + meta: Video metadata containing fps/duration + outputs: Dictionary accumulating model inputs + + Note: + - Handles temporal dimension in position embeddings + - Uses video-specific token IDs and type markers + """ ret = self.image_processor.preprocess(images=frames) num_tokens = ret["grid_thw"].prod() // self.image_processor.merge_size**2 @@ -263,6 +348,19 @@ def _add_video(self, frames, meta: Dict, outputs: Dict) -> None: outputs["cur_position"] = position_ids.max() + 1 def _compute_vision_positions(self, start_pos: int, t: int, h: int, w: int, second_per_grid_t:float) -> np.ndarray: + """ + Generate 3D positional embeddings for visual content. + + Args: + start_pos: Starting position index + t: Temporal dimension (frames) + h: Height in patches + w: Width in patches + second_per_grid_t: Seconds per temporal grid + + Returns: + numpy.ndarray: 3D position IDs shaped (3, t*h*w) + """ h //= self.spatial_conv_size w //= self.spatial_conv_size @@ -281,6 +379,18 @@ def _compute_vision_positions(self, start_pos: int, t: int, h: int, w: int, seco return position def _load_and_process_video(self, url: str, item: Dict) -> np.ndarray: + """ + Load and preprocess video into frames. + + Args: + url: Video file path or bytes + item: Dictionary containing processing parameters + + Returns: + tuple: (frames, metadata) where: + - frames: Processed video frames as numpy array + - metadata: Updated video metadata dictionary + """ reader, meta = read_video_decord(url) frames = [] @@ -290,51 +400,61 @@ def _load_and_process_video(self, url: str, item: Dict) -> np.ndarray: frames.append(image) frames = np.stack([np.array(f.convert("RGB")) for f in frames], axis=0) + # Apply frame sampling if fps or target_frames specified fps = item.get("fps", None) num_frames = item.get("target_frames", None) + if fps is not None or num_frames is not None: + # Get frame sampling constraints min_frames = item.get("min_frames", self.min_frames) max_frames = item.get("max_frames", self.max_frames) - frames = sample_frames(video=frames, - frame_factor=self.temporal_conv_size, - min_frames=min_frames, - max_frames=max_frames, - metadata=meta, - fps=fps, - num_frames=num_frames) + # Sample frames according to specifications + frames = sample_frames( + video=frames, + frame_factor=self.temporal_conv_size, # Ensure divisible by temporal patch size + min_frames=min_frames, + max_frames=max_frames, + metadata=meta, + fps=fps, + num_frames=num_frames + ) + + # Update metadata with new frame count and fps meta["num_of_frame"] = frames.shape[0] if fps is not None: - meta["fps"] = fps + meta["fps"] = fps # Use specified fps else: - meta["fps"] = frames.shape[0] / meta["duration"] + meta["fps"] = frames.shape[0] / meta["duration"] # Calculate fps from sampled frames return frames, meta def apply_chat_template(self, request): """ - Convert multi-turn messages into ID sequences. - + Apply chat template to convert messages into token sequence. + Args: - messages: Either a request dict containing 'messages' field, - or a list of message dicts directly - + request: Dictionary containing chat messages + Returns: - List of token IDs as strings (converted from token objects) + List of token IDs + + Raises: + ValueError: If model doesn't support chat templates """ if self.tokenizer.chat_template is None: raise ValueError("This model does not support chat_template.") - prompt_token_str = self.tokenizer.apply_chat_template( + raw_prompt = self.tokenizer.apply_chat_template( request["messages"], tokenize=False, add_generation_prompt=request.get("add_generation_prompt", True), ) - prompt_token_str = prompt_token_str.replace(self.image_token, "").replace(self.video_token, "") + prompt_token_str = raw_prompt.replace(self.image_token, "").replace(self.video_token, "") tokens = self.tokenizer.tokenize(prompt_token_str) token_ids = self.tokenizer.convert_tokens_to_ids(tokens) data_processor_logger.info( - f"req_id:{request.get('request_id', ''), } tokens: {tokens}, token_ids: {token_ids}" + f"req_id:{request.get('request_id', ''), } prompt: {raw_prompt} tokens: {tokens}, token_ids: {token_ids}" ) return token_ids diff --git a/fastdeploy/input/qwen_mm_processor/process_video.py b/fastdeploy/input/qwen_mm_processor/process_video.py index 9dd79cf97f..f2ee44ab6f 100644 --- a/fastdeploy/input/qwen_mm_processor/process_video.py +++ b/fastdeploy/input/qwen_mm_processor/process_video.py @@ -34,19 +34,43 @@ def is_gif(data: bytes) -> bool: """ - check if a bytes is a gif based on the magic head + Check if given bytes data is a GIF file by examining magic number. + + Args: + data: Binary data to check + + Returns: + bool: True if data is a GIF file (GIF87a or GIF89a format) """ return data[:6] in (b"GIF87a", b"GIF89a") class VideoReaderWrapper(decord.VideoReader): """ - Solving memory leak bug - - https://github.com/dmlc/decord/issues/208 + Wrapper around decord.VideoReader to handle GIF files and fix memory leaks. + + This wrapper converts GIF inputs to MP4 format to work around decord's limitations, + and implements proper cleanup to prevent memory leaks (https://github.com/dmlc/decord/issues/208). + + Attributes: + original_file (str): Path to the original video file (for cleanup) """ def __init__(self, video_path, *args, **kwargs): + """ + Initialize the video reader wrapper. + + Args: + video_path: Can be one of: + - str: Path to video file + - bytes: Raw video bytes + - io.BytesIO: Video data stream + *args: Additional arguments for decord.VideoReader + **kwargs: Additional keyword arguments for decord.VideoReader + + Note: + Automatically converts GIF files to MP4 format for compatibility. + """ with ntf(delete=True, suffix=".gif") as gif_file: gif_input = None self.original_file = None @@ -67,40 +91,82 @@ def __init__(self, video_path, *args, **kwargs): gif_input = gif_file.name if gif_input is not None: + # Convert GIF to MP4 for decord compatibility clip = mp.VideoFileClip(gif_input) mp4_file = ntf(delete=False, suffix=".mp4") clip.write_videofile(mp4_file.name, verbose=False, logger=None) clip.close() video_path = mp4_file.name - self.original_file = video_path + self.original_file = video_path # Store path for cleanup super().__init__(video_path, *args, **kwargs) self.seek(0) def __getitem__(self, key): + """ + Get video frames by index/slice and reset reader position. + + Args: + key: Index or slice of frames to retrieve + + Returns: + decord.ndarray.NDArray: Requested video frames + + Note: + Resets read position to start after frame retrieval + """ frames = super().__getitem__(key) self.seek(0) return frames def __del__(self): + """ + Clean up temporary files when object is destroyed. + + Note: + Removes any temporary MP4 files created from GIF conversions + """ if self.original_file and os.path.exists(self.original_file): os.remove(self.original_file) def read_video_decord(video_path): - """get reader and meta by decord""" + """ + Read video file using decord video reader and get metadata. + + Args: + video_path: Can be one of: + - str: Path to video file + - bytes: Raw video bytes + - io.BytesIO: Video data stream + - VideoReaderWrapper: Existing video reader instance + + Returns: + tuple: (video_reader, video_meta) where: + - video_reader: VideoReaderWrapper instance + - video_meta: Dictionary containing: + - fps: Frames per second + - duration: Video duration in seconds + - num_of_frame: Total number of frames + """ if isinstance(video_path, VideoReaderWrapper): - video_reader = video_path + video_reader = video_path # Reuse existing reader if provided else: if isinstance(video_path, bytes): - video_path = io.BytesIO(video_path) + video_path = io.BytesIO(video_path) # Convert bytes to BytesIO video_reader = VideoReaderWrapper(video_path, num_threads=1) + # Extract video metadata vlen = len(video_reader) fps = video_reader.get_avg_fps() duration = vlen / float(fps) - video_meta = {"fps": fps, "duration": duration, "num_of_frame": vlen} + # Package metadata + video_meta = { + "fps": fps, # Frames per second + "duration": duration, # Total duration in seconds + "num_of_frame": vlen # Total frame count + } return video_reader, video_meta @@ -113,6 +179,26 @@ def sample_frames( fps: Optional[Union[int, float]] = None, num_frames: Optional[int] = None, ): + """ + Sample frames from video according to specified criteria. + + Args: + video: Input video frames as numpy array + frame_factor: Ensure sampled frames are multiples of this factor + min_frames: Minimum number of frames to sample + max_frames: Maximum number of frames to sample + metadata: Video metadata containing fps information + fps: Target frames per second for sampling + num_frames: Exact number of frames to sample + + Returns: + np.ndarray: Sampled video frames + + Raises: + ValueError: If both fps and num_frames are specified, + or if required metadata is missing, + or if requested frames exceed available frames + """ if fps is not None and num_frames is not None: raise ValueError("`num_frames` and `fps` are mutually exclusive arguments, please use only one!") @@ -141,10 +227,15 @@ def sample_frames( "Decrease `num_frames` or `fps` for sampling." ) + # Calculate frame indices based on sampling strategy if num_frames is not None: + # Evenly spaced sampling for target frame count indices = np.arange(0, total_num_frames, total_num_frames / num_frames).astype("int") else: + # Keep all frames if no sampling requested indices = np.arange(0, total_num_frames).astype("int") + + # Apply frame selection video = video[indices] return video diff --git a/fastdeploy/input/qwen_vl_processor.py b/fastdeploy/input/qwen_vl_processor.py index 25a2b4682a..71a40076f7 100644 --- a/fastdeploy/input/qwen_vl_processor.py +++ b/fastdeploy/input/qwen_vl_processor.py @@ -24,7 +24,21 @@ class QwenVLProcessor(ErnieProcessor): - """The processor class for ERNIE MoE VL models.""" + """ + Processor for Qwen Vision-Language models that handles multimodal inputs. + + Inherits from ErnieProcessor and extends functionality for: + - Image and video processing + - Multimodal request handling + - Generation configuration + + Attributes: + ernie_processor: Underlying DataProcessor instance + tokenizer: Text tokenizer + generation_config: Model generation configuration + eos_token_ids: End-of-sequence token IDs + limit_mm_per_prompt: Limits for multimodal inputs + """ def __init__( self, @@ -34,6 +48,16 @@ def __init__( mm_processor_kwargs=None, reasoning_parser_obj=None, ): + """ + Initialize QwenVLProcessor. + + Args: + config: Model configuration + model_name_or_path: Path to pretrained model + limit_mm_per_prompt: Limits for multimodal inputs per prompt + mm_processor_kwargs: Additional kwargs for multimodal processor + reasoning_parser_obj: Optional reasoning parser + """ data_processor_logger.info(f"model_name_or_path: {model_name_or_path}") processor_kwargs = self._parse_processor_kwargs(mm_processor_kwargs) @@ -45,14 +69,14 @@ def __init__( self._load_tokenizer() self.decode_status = dict() - # Generation config + # Load generation config if available try: self.generation_config = GenerationConfig.from_pretrained(model_name_or_path) except Exception as e: data_processor_logger.warning( f"Can't find generation config: {e}, so it will not use generation_config field in the model config" ) - self.generation_config = None + self.generation_config = None # Fallback to None if config not found from paddleformers.trl.llm_utils import get_eos_token_id @@ -65,15 +89,20 @@ def __init__( self.reasoning_parser = reasoning_parser_obj(self.tokenizer) def get_pad_id(self): - """get pad id""" + """ + Get the padding token ID. + + Returns: + int: Padding token ID + """ return self.tokenizer.pad_token_id def _load_tokenizer(self): """ - load tokenizer - + Load and initialize the tokenizer. + Returns: - tokenizer (AutoTokenizer) + AutoTokenizer: Initialized tokenizer instance """ self.tokenizer = self.ernie_processor.tokenizer @@ -99,7 +128,17 @@ def set_value(req, key, value): return request def process_request(self, request, max_model_len=None, **kwargs): - """process the input data""" + """ + Process incoming request into model inputs. + + Args: + request: Input request object + max_model_len: Maximum model context length + **kwargs: Additional processing arguments + + Returns: + Request: Processed request with model inputs + """ task = request.to_dict() task["enable_thinking"] = kwargs.get("enable_thinking", False) self.process_request_dict(task, max_model_len) @@ -108,7 +147,18 @@ def process_request(self, request, max_model_len=None, **kwargs): return request def _parse_processor_kwargs(self, kwargs): - """解析多模态处理器参数配置""" + """ + Parse and validate multimodal processor kwargs. + + Args: + kwargs: Input kwargs dictionary + + Returns: + dict: Validated processor kwargs + + Raises: + ValueError: If kwargs format is invalid + """ if not kwargs: return {} @@ -116,11 +166,11 @@ def _parse_processor_kwargs(self, kwargs): if not isinstance(kwargs, dict): raise ValueError("mm-processor-kwargs must be a dictionary") - # 验证参数类型 - data_processor_logger.info(f"kwargs:{kwargs}") + # Validate kwargs types against expected schema + data_processor_logger.info(f"Processing kwargs: {kwargs}") expected_types = { - "video_max_frames": int, - "video_min_frames": int, + "video_max_frames": int, # Maximum video frames parameter + "video_min_frames": int, # Minimum video frames parameter } for key, value in kwargs.items(): @@ -136,7 +186,18 @@ def _parse_processor_kwargs(self, kwargs): return {} def _parse_limits(self, limits): - """解析多模态限制配置""" + """ + Parse and validate multimodal input limits. + + Args: + limits: Input limits dictionary + + Returns: + dict: Validated limits with defaults + + Raises: + ValueError: If limits format is invalid + """ DEFAULT_LIMITS = {"image": 1, "video": 1, "audio": 1} if not limits: @@ -152,6 +213,15 @@ def _parse_limits(self, limits): return DEFAULT_LIMITS def _check_mm_limits(self, item): + """ + Validate multimodal inputs against configured limits. + + Args: + item: Input request item to check + + Raises: + ValueError: If input exceeds configured limits + """ if isinstance(item, dict): # 请求包含prompt和multi_modal_data mm_data = item @@ -174,7 +244,19 @@ def _check_mm_limits(self, item): raise ValueError(f"Too many {modality} items in prompt, " f"got {len(data)} but limit is {limit}") def process_request_dict(self, request, max_model_len=None): - """process the input data""" + """ + Process request dictionary into model inputs. + + Args: + request: Input request dictionary + max_model_len: Maximum model context length + + Returns: + dict: Processed request with model inputs + + Raises: + ValueError: If request format is invalid + """ request = self._apply_default_parameters(request) if not request.get("eos_token_ids"): @@ -202,7 +284,7 @@ def process_request_dict(self, request, max_model_len=None): raise ValueError(f"Request must contain 'prompt', or 'messages': {request}") metadata = request.get("metadata") - # 如果metadata包含之前输出的token,将这些token添加到input_ids末尾 + # Handle continuation of previous generation by appending existing tokens if metadata and metadata.get("generated_token_ids"): self.append_generated_tokens(outputs, metadata["generated_token_ids"]) outputs = self.pack_outputs(outputs) @@ -210,17 +292,25 @@ def process_request_dict(self, request, max_model_len=None): request["prompt_token_ids_len"] = len(request["prompt_token_ids"]) request["multimodal_inputs"] = outputs - # 截断超过长度限制的prompt + # Handle prompt truncation if exceeds model context length if max_model_len is not None and len(request["prompt_token_ids"]) > max_model_len: - request["prompt_token_ids"] = request["prompt_token_ids"][: max_model_len - 1] + request["prompt_token_ids"] = request["prompt_token_ids"][: max_model_len - 1] # Leave space for at least 1 new token + + # Set default max_tokens if not specified if request.get("max_tokens") is None: - request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"])) + request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"])) # Ensure at least 1 token data_processor_logger.info(f"Processed request {request}") return request def append_generated_tokens(self, multimodal_inputs, generated_token_ids): - "append already generated tokens" + """ + Append previously generated tokens to inputs. + + Args: + multimodal_inputs: Current model inputs + generated_token_ids: Tokens to append + """ num_tokens = len(generated_token_ids) multimodal_inputs["input_ids"].extend(generated_token_ids) @@ -232,34 +322,45 @@ def append_generated_tokens(self, multimodal_inputs, generated_token_ids): multimodal_inputs["cur_position"] += num_tokens def pack_outputs(self, outs): - # Stack or nullify image-related fields + """ + Convert and package model outputs into standardized format. + + Args: + outs: Raw model outputs + + Returns: + dict: Packaged outputs with proper types and shapes + """ + # Process visual outputs - stack if exists or set to None if empty if not outs["images"]: - outs["images"] = None - outs["grid_thw"] = None - outs["image_type_ids"] = None + outs["images"] = None # No images case + outs["grid_thw"] = None # No spatial dimensions + outs["image_type_ids"] = None # No type IDs else: - outs["images"] = np.vstack(outs["images"]) - outs["grid_thw"] = np.vstack(outs["grid_thw"]) - outs["image_type_ids"] = np.array(outs["image_type_ids"]) + outs["images"] = np.vstack(outs["images"]) # Stack image features vertically + outs["grid_thw"] = np.vstack(outs["grid_thw"]) # Stack spatial dimensions + outs["image_type_ids"] = np.array(outs["image_type_ids"]) # Convert to numpy array outs["image_patch_id"] = self.ernie_processor.image_token_id outs["video_patch_id"] = self.ernie_processor.video_token_id - # Convert lists to arrays - outs["input_ids"] = np.array(outs["input_ids"], dtype=np.int64) - outs["token_type_ids"] = np.array(outs["token_type_ids"], dtype=np.int64) - outs["position_ids"] = np.concatenate(outs["position_ids"], axis=1) + # Convert all outputs to numpy arrays with appropriate types + outs["input_ids"] = np.array(outs["input_ids"], dtype=np.int64) # Token IDs as int64 + outs["token_type_ids"] = np.array(outs["token_type_ids"], dtype=np.int64) # Type IDs as int64 + outs["position_ids"] = np.concatenate(outs["position_ids"], axis=1) # Concatenate position IDs return outs def process_response_dict(self, response_dict, stream, **kwargs): """ - Preprocess the response - + Process model response into final output format. + Args: - response_dict (Dict): response for engine, contain ids fields - + response_dict: Raw model response + stream: Whether response is streaming + **kwargs: Additional processing arguments + Returns: - Dict: response contain text fields + dict: Processed response """ enable_thinking = kwargs.pop("enable_thinking", True) if enable_thinking is None: @@ -271,7 +372,13 @@ def process_response_dict(self, response_dict, stream, **kwargs): def update_stop_seq(self, stop_sequences): """ - Update stop sequences from request. + Update stop sequences for generation. + + Args: + stop_sequences: Stop sequences to process + + Returns: + tuple: (stop_seqs, stop_seqs_len) processed sequences """ stop_seqs = [] if isinstance(stop_sequences, str): From 28f97b037d74bd8e9a088a54580b0812e1fb9533 Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Fri, 15 Aug 2025 17:12:39 +0800 Subject: [PATCH 11/16] formatted --- .../input/qwen_mm_processor/__init__.py | 2 +- .../qwen_mm_processor/image_processor.py | 68 +++++++--------- fastdeploy/input/qwen_mm_processor/process.py | 81 ++++++++++--------- .../input/qwen_mm_processor/process_video.py | 47 ++++++----- fastdeploy/input/qwen_vl_processor.py | 56 ++++++------- 5 files changed, 123 insertions(+), 131 deletions(-) diff --git a/fastdeploy/input/qwen_mm_processor/__init__.py b/fastdeploy/input/qwen_mm_processor/__init__.py index 7cc194dd68..5a97e41863 100644 --- a/fastdeploy/input/qwen_mm_processor/__init__.py +++ b/fastdeploy/input/qwen_mm_processor/__init__.py @@ -14,7 +14,7 @@ # limitations under the License. """ -from .process import DataProcessor, IDS_TYPE_FLAG +from .process import IDS_TYPE_FLAG, DataProcessor __all__ = [ "DataProcessor", diff --git a/fastdeploy/input/qwen_mm_processor/image_processor.py b/fastdeploy/input/qwen_mm_processor/image_processor.py index 187c1b35e3..dd0f04b5cf 100644 --- a/fastdeploy/input/qwen_mm_processor/image_processor.py +++ b/fastdeploy/input/qwen_mm_processor/image_processor.py @@ -14,7 +14,6 @@ # limitations under the License. """ - import math from typing import List, Optional, Union @@ -35,7 +34,6 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - make_list_of_images, to_numpy_array, valid_images, @@ -67,11 +65,11 @@ def round_by_factor(number: int, factor: int) -> int: """ Round number to nearest multiple of factor. - + Args: number: Input number to round factor: Rounding factor - + Returns: int: Rounded number """ @@ -81,11 +79,11 @@ def round_by_factor(number: int, factor: int) -> int: def ceil_by_factor(number: int, factor: int) -> int: """ Round number up to nearest multiple of factor. - + Args: number: Input number to round factor: Rounding factor - + Returns: int: Rounded number """ @@ -95,28 +93,21 @@ def ceil_by_factor(number: int, factor: int) -> int: def floor_by_factor(number: int, factor: int) -> int: """ Round number down to nearest multiple of factor. - + Args: number: Input number to round factor: Rounding factor - + Returns: int: Rounded number """ return math.floor(number / factor) * factor -def smart_resize( - height: int, - width: int, - factor: int, - min_pixels: int, - max_pixels: int, - max_ratio: int = 200 -): +def smart_resize(height: int, width: int, factor: int, min_pixels: int, max_pixels: int, max_ratio: int = 200): """ Smart image resizing that maintains aspect ratio and respects constraints. - + Args: height: Original image height width: Original image width @@ -124,10 +115,10 @@ def smart_resize( min_pixels: Minimum allowed pixels max_pixels: Maximum allowed pixels max_ratio: Maximum allowed aspect ratio - + Returns: tuple: (new_height, new_width) - + Raises: ValueError: If calculated dimensions are invalid """ @@ -167,10 +158,10 @@ def smart_resize( def is_scaled_image(image: np.ndarray) -> bool: """ Check if image pixel values are already normalized to [0, 1] range. - + Args: image: Input image array - + Returns: bool: True if image is already scaled """ @@ -184,7 +175,7 @@ def is_scaled_image(image: np.ndarray) -> bool: class ImageProcessor(BaseImageProcessor): """ Adaptive image processor for dynamic image resizing and preprocessing. - + This processor handles image resizing, rescaling, normalization and format conversion. It dynamically adjusts image dimensions based on original size and specified constraints. """ @@ -206,10 +197,10 @@ def __init__( ) -> None: """ Initialize image processor with configuration parameters. - + Args: patch_size (int): Spatial patch size for vision encoder - merge_size (int): Merge size between vision and LLM encoders + merge_size (int): Merge size between vision and LLM encoders temporal_patch_size (int): Temporal patch size for video processing min_pixels (int): Minimum allowed pixels in resized image max_pixels (int): Maximum allowed pixels in resized image @@ -253,7 +244,7 @@ def _preprocess( ): """ Internal method for image preprocessing pipeline. - + Args: images: Input image or batch of images min_pixels: Minimum allowed pixels in output @@ -266,7 +257,7 @@ def _preprocess( resample: Resampling method data_format: Output channel format input_data_format: Input channel format - + Returns: tuple: (flatten_patches, grid_dimensions) - flatten_patches: Flattened image patches @@ -300,7 +291,7 @@ def _preprocess( for image in images: if height != resized_height or width != resized_width: # Convert to uint8 before resizing to avoid double scaling - image = image.astype("uint8") + image = image.astype("uint8") # Convert to PIL Image and resize image = Image.fromarray(image) image = resize( @@ -312,8 +303,8 @@ def _preprocess( if do_rescale and do_normalize: # Adjust mean and std for combined rescale+normalize - image_mean = np.array(image_mean, dtype=np.float32) * (1.0 / rescale_factor) - image_std = np.array(image_std, dtype=np.float32) * (1.0 / rescale_factor) + image_mean = np.array(image_mean, dtype=np.float32) * (1.0 / rescale_factor) + image_std = np.array(image_std, dtype=np.float32) * (1.0 / rescale_factor) do_rescale = False # Skip separate rescale step if do_rescale: @@ -334,13 +325,13 @@ def _preprocess( # Convert processed images to numpy array patches = np.array(processed_images) - + # Pad temporal dimension if needed if patches.shape[0] % self.temporal_patch_size != 0: repeats = np.repeat( - patches[-1][np.newaxis], - self.temporal_patch_size - (patches.shape[0] % self.temporal_patch_size), - axis=0 + patches[-1][np.newaxis], + self.temporal_patch_size - (patches.shape[0] % self.temporal_patch_size), + axis=0, ) patches = np.concatenate([patches, repeats], axis=0) @@ -399,7 +390,7 @@ def preprocess( ): """ Main preprocessing method for images/videos. - + Args: images: Input image/video data min_pixels: Override for minimum pixels @@ -413,12 +404,12 @@ def preprocess( return_tensors: Desired output tensor format data_format: Output channel dimension format input_data_format: Input channel dimension format - + Returns: BatchFeature: Processed features containing: - pixel_values: Preprocessed pixel data - grid_thw: Grid dimensions [temporal, height, width] - + Raises: ValueError: For invalid image types or dimensions """ @@ -447,8 +438,5 @@ def preprocess( data_format=data_format, input_data_format=input_data_format, ) - data = { - "pixel_values": pixel_values, - "grid_thw": grid_thw - } + data = {"pixel_values": pixel_values, "grid_thw": grid_thw} return BatchFeature(data=data, tensor_type=return_tensors) diff --git a/fastdeploy/input/qwen_mm_processor/process.py b/fastdeploy/input/qwen_mm_processor/process.py index 44676045a7..ee89e4e137 100644 --- a/fastdeploy/input/qwen_mm_processor/process.py +++ b/fastdeploy/input/qwen_mm_processor/process.py @@ -15,11 +15,12 @@ # limitations under the License. """ - from typing import Any, Dict, List, Union + import numpy as np -from PIL import Image from paddleformers.transformers import AutoTokenizer +from PIL import Image + from fastdeploy.entrypoints.chat_utils import parse_chat_messages from fastdeploy.input.mm_processor import IDS_TYPE_FLAG from fastdeploy.utils import data_processor_logger @@ -31,13 +32,13 @@ class DataProcessor: """ Processes multimodal inputs (text, images, videos) into model-ready formats. - + Handles: - Tokenization of text with special tokens for visual content - Image and video preprocessing - Generation of 3D positional embeddings - Conversion of chat messages to model inputs - + Attributes: tokenizer: Text tokenizer instance image_processor: Image/video preprocessor @@ -56,11 +57,11 @@ def __init__( ) -> None: """ Initialize the data processor. - + Args: model_path: Path to pretrained model video_min_frames: Minimum frames to sample from videos - video_max_frames: Maximum frames to sample from videos + video_max_frames: Maximum frames to sample from videos tokens_per_second: Temporal resolution for positional embeddings **kwargs: Additional configuration """ @@ -98,12 +99,12 @@ def __init__( def text2ids(self, text, images=None, videos=None): """ Convert text with image/video placeholders into model inputs. - + Args: text: Input text with <|image@placeholder|> and <|video@placeholder|> markers images: List of PIL Images corresponding to image placeholders videos: List of video data corresponding to video placeholders - + Returns: Dict containing: - input_ids: Token IDs @@ -129,10 +130,10 @@ def text2ids(self, text, images=None, videos=None): # Define placeholders and their lengths IMAGE_PLACEHOLDER = "<|image@placeholder|>" - VIDEO_PLACEHOLDER = "<|video@placeholder|>" + VIDEO_PLACEHOLDER = "<|video@placeholder|>" IMAGE_PLACEHOLDER_LEN = len(IMAGE_PLACEHOLDER) VIDEO_PLACEHOLDER_LEN = len(VIDEO_PLACEHOLDER) - + # Initialize tracking variables for text parsing st, image_idx, video_idx = 0, 0, 0 # Start position, image counter, video counter while st < len(text): @@ -169,13 +170,13 @@ def request2ids( ) -> Dict[str, Union[np.ndarray, List[np.ndarray], None]]: """ Convert chat request with multimodal messages into model inputs. - + Args: request: Dictionary containing: - messages: List of chat messages with text/image/video content - request_id: Unique identifier for logging tgts: Optional target sequences - + Returns: Dict with same structure as text2ids() output """ @@ -196,16 +197,16 @@ def request2ids( # Parse and validate chat messages messages = parse_chat_messages(request.get("messages")) image_message_list = [] # Store visual content messages - + for msg in messages: role = msg.get("role") assert role in self.role_prefixes, f"Unsupported role: {role}" - + # Normalize content to list format content_items = msg.get("content") if not isinstance(content_items, list): content_items = [content_items] - + # Collect all visual content items for item in content_items: if isinstance(item, dict) and item.get("type") in ["image", "video"]: @@ -219,7 +220,7 @@ def request2ids( vision_start_index = 0 vision_message_index = 0 for i in range(len(prompt_token_ids)): - if prompt_token_ids[i] == self.vision_start_id : + if prompt_token_ids[i] == self.vision_start_id: self._add_text(prompt_token_ids[vision_start_index : i + 1], outputs) vision_start_index = i + 1 @@ -249,11 +250,11 @@ def request2ids( def _add_text(self, tokens, outputs: Dict) -> None: """ Add text tokens to model inputs dictionary. - + Args: tokens: Text string or already tokenized IDs outputs: Dictionary accumulating model inputs - + Note: - Handles both raw text and pre-tokenized inputs - Updates position IDs for 3D embeddings @@ -271,11 +272,11 @@ def _add_text(self, tokens, outputs: Dict) -> None: def _compute_text_positions(self, start_pos: int, num_tokens: int) -> np.ndarray: """ Generate 3D positional embeddings for text tokens. - + Args: start_pos: Starting position index num_tokens: Number of tokens to generate positions for - + Returns: numpy.ndarray: 3D position IDs shaped (3, num_tokens) """ @@ -287,11 +288,11 @@ def _compute_text_positions(self, start_pos: int, num_tokens: int) -> np.ndarray def _add_image(self, img, outputs: Dict) -> None: """ Add image data to model inputs dictionary. - + Args: img: PIL Image to process outputs: Dictionary accumulating model inputs - + Note: - Preprocesses image and calculates spatial dimensions - Adds image token IDs and type markers @@ -309,7 +310,7 @@ def _add_image(self, img, outputs: Dict) -> None: outputs["image_type_ids"].append(0) t, h, w = grid_thw - position_ids = self._compute_vision_positions(outputs["cur_position"], t,h,w, 0) + position_ids = self._compute_vision_positions(outputs["cur_position"], t, h, w, 0) outputs["position_ids"].append(position_ids) outputs["cur_position"] = position_ids.max() + 1 @@ -317,12 +318,12 @@ def _add_image(self, img, outputs: Dict) -> None: def _add_video(self, frames, meta: Dict, outputs: Dict) -> None: """ Add video data to model inputs dictionary. - + Args: frames: Video frames as numpy array meta: Video metadata containing fps/duration outputs: Dictionary accumulating model inputs - + Note: - Handles temporal dimension in position embeddings - Uses video-specific token IDs and type markers @@ -342,22 +343,24 @@ def _add_video(self, frames, meta: Dict, outputs: Dict) -> None: fps = meta["fps"] second_per_grid_t = self.temporal_conv_size / fps t, h, w = grid_thw - position_ids = self._compute_vision_positions(outputs["cur_position"], t,h,w, second_per_grid_t) + position_ids = self._compute_vision_positions(outputs["cur_position"], t, h, w, second_per_grid_t) outputs["position_ids"].append(position_ids) outputs["cur_position"] = position_ids.max() + 1 - def _compute_vision_positions(self, start_pos: int, t: int, h: int, w: int, second_per_grid_t:float) -> np.ndarray: + def _compute_vision_positions( + self, start_pos: int, t: int, h: int, w: int, second_per_grid_t: float + ) -> np.ndarray: """ Generate 3D positional embeddings for visual content. - + Args: start_pos: Starting position index t: Temporal dimension (frames) h: Height in patches - w: Width in patches + w: Width in patches second_per_grid_t: Seconds per temporal grid - + Returns: numpy.ndarray: 3D position IDs shaped (3, t*h*w) """ @@ -381,11 +384,11 @@ def _compute_vision_positions(self, start_pos: int, t: int, h: int, w: int, seco def _load_and_process_video(self, url: str, item: Dict) -> np.ndarray: """ Load and preprocess video into frames. - + Args: url: Video file path or bytes item: Dictionary containing processing parameters - + Returns: tuple: (frames, metadata) where: - frames: Processed video frames as numpy array @@ -403,12 +406,12 @@ def _load_and_process_video(self, url: str, item: Dict) -> np.ndarray: # Apply frame sampling if fps or target_frames specified fps = item.get("fps", None) num_frames = item.get("target_frames", None) - + if fps is not None or num_frames is not None: # Get frame sampling constraints min_frames = item.get("min_frames", self.min_frames) max_frames = item.get("max_frames", self.max_frames) - + # Sample frames according to specifications frames = sample_frames( video=frames, @@ -417,9 +420,9 @@ def _load_and_process_video(self, url: str, item: Dict) -> np.ndarray: max_frames=max_frames, metadata=meta, fps=fps, - num_frames=num_frames + num_frames=num_frames, ) - + # Update metadata with new frame count and fps meta["num_of_frame"] = frames.shape[0] if fps is not None: @@ -432,13 +435,13 @@ def _load_and_process_video(self, url: str, item: Dict) -> np.ndarray: def apply_chat_template(self, request): """ Apply chat template to convert messages into token sequence. - + Args: request: Dictionary containing chat messages - + Returns: List of token IDs - + Raises: ValueError: If model doesn't support chat templates """ diff --git a/fastdeploy/input/qwen_mm_processor/process_video.py b/fastdeploy/input/qwen_mm_processor/process_video.py index 1e610eaff3..ae86330e75 100644 --- a/fastdeploy/input/qwen_mm_processor/process_video.py +++ b/fastdeploy/input/qwen_mm_processor/process_video.py @@ -15,12 +15,13 @@ """ import io +import math import os -import decord from tempfile import NamedTemporaryFile as ntf -from typing import Union, Optional +from typing import Optional, Union + +import decord import numpy as np -import math try: # moviepy 1.0 @@ -29,16 +30,14 @@ # moviepy 2.0 import moviepy as mp -from fastdeploy.utils import data_processor_logger - def is_gif(data: bytes) -> bool: """ Check if given bytes data is a GIF file by examining magic number. - + Args: data: Binary data to check - + Returns: bool: True if data is a GIF file (GIF87a or GIF89a format) """ @@ -48,10 +47,10 @@ def is_gif(data: bytes) -> bool: class VideoReaderWrapper(decord.VideoReader): """ Wrapper around decord.VideoReader to handle GIF files and fix memory leaks. - + This wrapper converts GIF inputs to MP4 format to work around decord's limitations, and implements proper cleanup to prevent memory leaks (https://github.com/dmlc/decord/issues/208). - + Attributes: original_file (str): Path to the original video file (for cleanup) """ @@ -59,7 +58,7 @@ class VideoReaderWrapper(decord.VideoReader): def __init__(self, video_path, *args, **kwargs): """ Initialize the video reader wrapper. - + Args: video_path: Can be one of: - str: Path to video file @@ -67,7 +66,7 @@ def __init__(self, video_path, *args, **kwargs): - io.BytesIO: Video data stream *args: Additional arguments for decord.VideoReader **kwargs: Additional keyword arguments for decord.VideoReader - + Note: Automatically converts GIF files to MP4 format for compatibility. """ @@ -105,13 +104,13 @@ def __init__(self, video_path, *args, **kwargs): def __getitem__(self, key): """ Get video frames by index/slice and reset reader position. - + Args: key: Index or slice of frames to retrieve - + Returns: decord.ndarray.NDArray: Requested video frames - + Note: Resets read position to start after frame retrieval """ @@ -122,7 +121,7 @@ def __getitem__(self, key): def __del__(self): """ Clean up temporary files when object is destroyed. - + Note: Removes any temporary MP4 files created from GIF conversions """ @@ -133,14 +132,14 @@ def __del__(self): def read_video_decord(video_path): """ Read video file using decord video reader and get metadata. - + Args: video_path: Can be one of: - str: Path to video file - bytes: Raw video bytes - io.BytesIO: Video data stream - VideoReaderWrapper: Existing video reader instance - + Returns: tuple: (video_reader, video_meta) where: - video_reader: VideoReaderWrapper instance @@ -163,9 +162,9 @@ def read_video_decord(video_path): # Package metadata video_meta = { - "fps": fps, # Frames per second + "fps": fps, # Frames per second "duration": duration, # Total duration in seconds - "num_of_frame": vlen # Total frame count + "num_of_frame": vlen, # Total frame count } return video_reader, video_meta @@ -181,7 +180,7 @@ def sample_frames( ): """ Sample frames from video according to specified criteria. - + Args: video: Input video frames as numpy array frame_factor: Ensure sampled frames are multiples of this factor @@ -190,12 +189,12 @@ def sample_frames( metadata: Video metadata containing fps information fps: Target frames per second for sampling num_frames: Exact number of frames to sample - + Returns: np.ndarray: Sampled video frames - + Raises: - ValueError: If both fps and num_frames are specified, + ValueError: If both fps and num_frames are specified, or if required metadata is missing, or if requested frames exceed available frames """ @@ -234,7 +233,7 @@ def sample_frames( else: # Keep all frames if no sampling requested indices = np.arange(0, total_num_frames).astype(np.int32) - + # Apply frame selection video = video[indices] diff --git a/fastdeploy/input/qwen_vl_processor.py b/fastdeploy/input/qwen_vl_processor.py index 39a5ffd8f7..fe994d148e 100644 --- a/fastdeploy/input/qwen_vl_processor.py +++ b/fastdeploy/input/qwen_vl_processor.py @@ -26,12 +26,12 @@ class QwenVLProcessor(ErnieProcessor): """ Processor for Qwen Vision-Language models that handles multimodal inputs. - + Inherits from ErnieProcessor and extends functionality for: - Image and video processing - Multimodal request handling - Generation configuration - + Attributes: ernie_processor: Underlying DataProcessor instance tokenizer: Text tokenizer @@ -50,7 +50,7 @@ def __init__( ): """ Initialize QwenVLProcessor. - + Args: config: Model configuration model_name_or_path: Path to pretrained model @@ -91,7 +91,7 @@ def __init__( def get_pad_id(self): """ Get the padding token ID. - + Returns: int: Padding token ID """ @@ -100,7 +100,7 @@ def get_pad_id(self): def _load_tokenizer(self): """ Load and initialize the tokenizer. - + Returns: AutoTokenizer: Initialized tokenizer instance """ @@ -130,12 +130,12 @@ def set_value(req, key, value): def process_request(self, request, max_model_len=None, **kwargs): """ Process incoming request into model inputs. - + Args: request: Input request object max_model_len: Maximum model context length **kwargs: Additional processing arguments - + Returns: Request: Processed request with model inputs """ @@ -149,13 +149,13 @@ def process_request(self, request, max_model_len=None, **kwargs): def _parse_processor_kwargs(self, kwargs): """ Parse and validate multimodal processor kwargs. - + Args: kwargs: Input kwargs dictionary - + Returns: dict: Validated processor kwargs - + Raises: ValueError: If kwargs format is invalid """ @@ -188,13 +188,13 @@ def _parse_processor_kwargs(self, kwargs): def _parse_limits(self, limits): """ Parse and validate multimodal input limits. - + Args: limits: Input limits dictionary - + Returns: dict: Validated limits with defaults - + Raises: ValueError: If limits format is invalid """ @@ -215,10 +215,10 @@ def _parse_limits(self, limits): def _check_mm_limits(self, item): """ Validate multimodal inputs against configured limits. - + Args: item: Input request item to check - + Raises: ValueError: If input exceeds configured limits """ @@ -246,14 +246,14 @@ def _check_mm_limits(self, item): def process_request_dict(self, request, max_model_len=None): """ Process request dictionary into model inputs. - + Args: request: Input request dictionary max_model_len: Maximum model context length - + Returns: dict: Processed request with model inputs - + Raises: ValueError: If request format is invalid """ @@ -294,8 +294,10 @@ def process_request_dict(self, request, max_model_len=None): # Handle prompt truncation if exceeds model context length if max_model_len is not None and len(request["prompt_token_ids"]) > max_model_len: - request["prompt_token_ids"] = request["prompt_token_ids"][: max_model_len - 1] # Leave space for at least 1 new token - + request["prompt_token_ids"] = request["prompt_token_ids"][ + : max_model_len - 1 + ] # Leave space for at least 1 new token + # Set default max_tokens if not specified if request.get("max_tokens") is None: request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"])) # Ensure at least 1 token @@ -306,7 +308,7 @@ def process_request_dict(self, request, max_model_len=None): def append_generated_tokens(self, multimodal_inputs, generated_token_ids): """ Append previously generated tokens to inputs. - + Args: multimodal_inputs: Current model inputs generated_token_ids: Tokens to append @@ -324,10 +326,10 @@ def append_generated_tokens(self, multimodal_inputs, generated_token_ids): def pack_outputs(self, outs): """ Convert and package model outputs into standardized format. - + Args: outs: Raw model outputs - + Returns: dict: Packaged outputs with proper types and shapes """ @@ -353,12 +355,12 @@ def pack_outputs(self, outs): def process_response_dict(self, response_dict, stream, **kwargs): """ Process model response into final output format. - + Args: response_dict: Raw model response stream: Whether response is streaming **kwargs: Additional processing arguments - + Returns: dict: Processed response """ @@ -373,10 +375,10 @@ def process_response_dict(self, response_dict, stream, **kwargs): def update_stop_seq(self, stop_sequences): """ Update stop sequences for generation. - + Args: stop_sequences: Stop sequences to process - + Returns: tuple: (stop_seqs, stop_seqs_len) processed sequences """ From 3c78d5e45352355883327d34212187707c58b4da Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Tue, 19 Aug 2025 16:06:48 +0800 Subject: [PATCH 12/16] qwen_vl_processor --- fastdeploy/input/qwen_mm_processor/process.py | 36 +++- fastdeploy/input/qwen_vl_processor.py | 183 +++--------------- 2 files changed, 62 insertions(+), 157 deletions(-) diff --git a/fastdeploy/input/qwen_mm_processor/process.py b/fastdeploy/input/qwen_mm_processor/process.py index ee89e4e137..8ee319b1ae 100644 --- a/fastdeploy/input/qwen_mm_processor/process.py +++ b/fastdeploy/input/qwen_mm_processor/process.py @@ -53,6 +53,7 @@ def __init__( video_min_frames: int = 4, video_max_frames: int = 768, tokens_per_second: int = 2, + tokenizer=None, **kwargs, ) -> None: """ @@ -69,8 +70,11 @@ def __init__( self.max_frames = video_max_frames # Initialize tokenizer with left padding and fast tokenizer - self.tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="left", use_fast=True) - self.tokenizer.ignored_index = -100 # Set ignored index for loss calculation + if tokenizer is None: + self.tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="left", use_fast=True) + self.tokenizer.ignored_index = -100 # Set ignored index for loss calculation + else: + self.tokenizer = tokenizer self.image_processor = ImageProcessor.from_pretrained(model_path) # Initialize image processor # Convolution sizes for patch aggregation @@ -96,6 +100,25 @@ def __init__( "assistant": "Assistant: ", } + def _pack_outputs(self, outputs): + # Process visual outputs - stack if exists or set to None if empty + if not outputs["images"]: + outputs["images"] = None # No images case + outputs["grid_thw"] = None # No spatial dimensions + outputs["image_type_ids"] = None # No type IDs + else: + outputs["images"] = np.vstack(outputs["images"]) # Stack image features vertically + outputs["grid_thw"] = np.vstack(outputs["grid_thw"]) # Stack spatial dimensions + outputs["image_type_ids"] = np.array(outputs["image_type_ids"]) # Convert to numpy array + + # Convert all outputs to numpy arrays with appropriate types + outputs["input_ids"] = np.array(outputs["input_ids"], dtype=np.int64) # Token IDs as int64 + outputs["token_type_ids"] = np.array(outputs["token_type_ids"], dtype=np.int64) # Type IDs as int64 + outputs["position_ids"] = np.concatenate( + outputs["position_ids"], axis=1, dtype=np.int64 + ) # Concatenate position IDs + return outputs + def text2ids(self, text, images=None, videos=None): """ Convert text with image/video placeholders into model inputs. @@ -163,7 +186,7 @@ def text2ids(self, text, images=None, videos=None): video_idx += 1 st = ed + VIDEO_PLACEHOLDER_LEN - return outputs + return self._pack_outputs(outputs) def request2ids( self, request: Dict[str, Any], tgts: List[str] = None @@ -245,7 +268,7 @@ def request2ids( vision_message_index += 1 self._add_text(prompt_token_ids[vision_start_index:], outputs) - return outputs + return self._pack_outputs(outputs) def _add_text(self, tokens, outputs: Dict) -> None: """ @@ -262,10 +285,11 @@ def _add_text(self, tokens, outputs: Dict) -> None: if isinstance(tokens, str): tokens = self.tokenizer.encode(tokens, add_special_tokens=False)["input_ids"] + num_tokens = len(tokens) outputs["input_ids"].extend(tokens) - outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]] * len(tokens)) + outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]] * num_tokens) - position_ids = self._compute_text_positions(outputs["cur_position"], len(tokens)) + position_ids = self._compute_text_positions(outputs["cur_position"], num_tokens) outputs["position_ids"].append(position_ids) outputs["cur_position"] = position_ids.max() + 1 diff --git a/fastdeploy/input/qwen_vl_processor.py b/fastdeploy/input/qwen_vl_processor.py index fe994d148e..1a88eb7287 100644 --- a/fastdeploy/input/qwen_vl_processor.py +++ b/fastdeploy/input/qwen_vl_processor.py @@ -15,15 +15,14 @@ """ import numpy as np -from paddleformers.generation import GenerationConfig from fastdeploy.engine.request import Request -from fastdeploy.input.ernie_processor import ErnieProcessor -from fastdeploy.input.qwen_mm_processor import IDS_TYPE_FLAG, DataProcessor +from fastdeploy.input.qwen_mm_processor import DataProcessor +from fastdeploy.input.text_processor import DataProcessor as TextProcessor from fastdeploy.utils import data_processor_logger -class QwenVLProcessor(ErnieProcessor): +class QwenVLProcessor(TextProcessor): """ Processor for Qwen Vision-Language models that handles multimodal inputs. @@ -33,7 +32,7 @@ class QwenVLProcessor(ErnieProcessor): - Generation configuration Attributes: - ernie_processor: Underlying DataProcessor instance + processor: Underlying DataProcessor instance tokenizer: Text tokenizer generation_config: Model generation configuration eos_token_ids: End-of-sequence token IDs @@ -47,6 +46,7 @@ def __init__( limit_mm_per_prompt=None, mm_processor_kwargs=None, reasoning_parser_obj=None, + tool_parser_obj=None, ): """ Initialize QwenVLProcessor. @@ -58,74 +58,18 @@ def __init__( mm_processor_kwargs: Additional kwargs for multimodal processor reasoning_parser_obj: Optional reasoning parser """ + super().__init__(model_name_or_path, reasoning_parser_obj, tool_parser_obj) + data_processor_logger.info(f"model_name_or_path: {model_name_or_path}") processor_kwargs = self._parse_processor_kwargs(mm_processor_kwargs) - - self.ernie_processor = DataProcessor( + self.processor = DataProcessor( model_path=model_name_or_path, tokens_per_second=config.vision_config.tokens_per_second, + tokenizer=self.tokenizer, **processor_kwargs, ) - self._load_tokenizer() - self.decode_status = dict() - - # Load generation config if available - try: - self.generation_config = GenerationConfig.from_pretrained(model_name_or_path) - except Exception as e: - data_processor_logger.warning( - f"Can't find generation config: {e}, so it will not use generation_config field in the model config" - ) - self.generation_config = None # Fallback to None if config not found - - from paddleformers.trl.llm_utils import get_eos_token_id - self.eos_token_ids = get_eos_token_id(self.tokenizer, self.generation_config) - self.eos_token_id_len = len(self.eos_token_ids) - self.pad_token_id = self.get_pad_id() self.limit_mm_per_prompt = self._parse_limits(limit_mm_per_prompt) - self.reasoning_parser = None - if reasoning_parser_obj: - self.reasoning_parser = reasoning_parser_obj(self.tokenizer) - - def get_pad_id(self): - """ - Get the padding token ID. - - Returns: - int: Padding token ID - """ - return self.tokenizer.pad_token_id - - def _load_tokenizer(self): - """ - Load and initialize the tokenizer. - - Returns: - AutoTokenizer: Initialized tokenizer instance - """ - self.tokenizer = self.ernie_processor.tokenizer - - def _apply_default_parameters(self, request): - """ - Apply default value for parameters in request - """ - - def set_value(req, key, value): - value = getattr(self.generation_config, key, value) - if isinstance(req, dict): - if key not in req: - req[key] = value - else: - if req.get(key) is None: - req.set(key, value) - - set_value(request, "top_p", 0.7) - set_value(request, "temperature", 1.0) - set_value(request, "repetition_penalty", 1.0) - set_value(request, "frequency_penalty", 0.0) - set_value(request, "presence_penalty", 0.0) - return request def process_request(self, request, max_model_len=None, **kwargs): """ @@ -275,11 +219,13 @@ def process_request_dict(self, request, max_model_len=None): self._check_mm_limits(multimodal_data) images = multimodal_data.get("image", None) videos = multimodal_data.get("video", None) - outputs = self.ernie_processor.text2ids(request["prompt"], images, videos) + outputs = self.processor.text2ids(request["prompt"], images, videos) + elif request.get("messages"): messages = request["messages"] self._check_mm_limits(messages) - outputs = self.ernie_processor.request2ids(request) + outputs = self.processor.request2ids(request) + else: raise ValueError(f"Request must contain 'prompt', or 'messages': {request}") @@ -288,6 +234,7 @@ def process_request_dict(self, request, max_model_len=None): if metadata and metadata.get("generated_token_ids"): self.append_generated_tokens(outputs, metadata["generated_token_ids"]) outputs = self.pack_outputs(outputs) + request["prompt_token_ids"] = outputs["input_ids"].tolist() request["prompt_token_ids_len"] = len(request["prompt_token_ids"]) request["multimodal_inputs"] = outputs @@ -305,89 +252,23 @@ def process_request_dict(self, request, max_model_len=None): return request - def append_generated_tokens(self, multimodal_inputs, generated_token_ids): - """ - Append previously generated tokens to inputs. - - Args: - multimodal_inputs: Current model inputs - generated_token_ids: Tokens to append - """ - - num_tokens = len(generated_token_ids) - multimodal_inputs["input_ids"].extend(generated_token_ids) - multimodal_inputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]] * num_tokens) + def append_generated_tokens(self, outputs, generated_token_ids): + out = {"input_ids": [], "token_type_ids": [], "position_ids": [], "cur_position": outputs["cur_position"]} + self.processor._add_text(generated_token_ids, out) - start = multimodal_inputs["cur_position"] - for i in range(num_tokens): - multimodal_inputs["position_ids"].append([start + i] * 3) - multimodal_inputs["cur_position"] += num_tokens - - def pack_outputs(self, outs): - """ - Convert and package model outputs into standardized format. - - Args: - outs: Raw model outputs - - Returns: - dict: Packaged outputs with proper types and shapes - """ - # Process visual outputs - stack if exists or set to None if empty - if not outs["images"]: - outs["images"] = None # No images case - outs["grid_thw"] = None # No spatial dimensions - outs["image_type_ids"] = None # No type IDs - else: - outs["images"] = np.vstack(outs["images"]) # Stack image features vertically - outs["grid_thw"] = np.vstack(outs["grid_thw"]) # Stack spatial dimensions - outs["image_type_ids"] = np.array(outs["image_type_ids"]) # Convert to numpy array - - outs["image_patch_id"] = self.ernie_processor.image_token_id - outs["video_patch_id"] = self.ernie_processor.video_token_id - - # Convert all outputs to numpy arrays with appropriate types - outs["input_ids"] = np.array(outs["input_ids"], dtype=np.int64) # Token IDs as int64 - outs["token_type_ids"] = np.array(outs["token_type_ids"], dtype=np.int64) # Type IDs as int64 - outs["position_ids"] = np.concatenate(outs["position_ids"], axis=1, dtype=np.int64) # Concatenate position IDs - return outs - - def process_response_dict(self, response_dict, stream, **kwargs): - """ - Process model response into final output format. - - Args: - response_dict: Raw model response - stream: Whether response is streaming - **kwargs: Additional processing arguments - - Returns: - dict: Processed response - """ - enable_thinking = kwargs.pop("enable_thinking", True) - if enable_thinking is None: - enable_thinking = True - if stream: - return self.process_response_dict_streaming(response_dict, enable_thinking=enable_thinking, **kwargs) - else: - return self.process_response_dict_normal(response_dict, enable_thinking=enable_thinking, **kwargs) - - def update_stop_seq(self, stop_sequences): - """ - Update stop sequences for generation. - - Args: - stop_sequences: Stop sequences to process + outputs["input_ids"] = np.concatenate( + [outputs["input_ids"], np.array(out["input_ids"], dtype=np.int64)], axis=0 + ) + outputs["token_type_ids"] = np.concatenate( + [outputs["token_type_ids"], np.array(out["token_type_ids"], dtype=np.int64)], axis=0 + ) + outputs["position_ids"] = np.concatenate( + [outputs["position_ids"], out["position_ids"]], axis=1, dtype=np.int64 + ) + outputs["cur_position"] = out["cur_position"] - Returns: - tuple: (stop_seqs, stop_seqs_len) processed sequences - """ - stop_seqs = [] - if isinstance(stop_sequences, str): - stop_sequences = [stop_sequences] - for seq in stop_sequences: - if seq != self.tokenizer.eos_token_id: - stop_seqs.append(self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(seq))) - stop_seqs, stop_seqs_len = self.pad_batch_data(stop_seqs, pad_id=-1, return_seq_len=True, return_array=False) - data_processor_logger.debug(f"processed stop_seqs: {stop_seqs}, {stop_seqs_len}") - return stop_seqs, stop_seqs_len + def pack_outputs(self, outputs): + outputs["image_patch_id"] = self.processor.image_token_id + outputs["video_patch_id"] = self.processor.video_token_id + outputs["position_ids"] = outputs["position_ids"].transpose(1, 0) + return outputs From 6a69e04dbcabc0b0a2b1f69ceddf693041c6b886 Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Tue, 19 Aug 2025 20:57:18 +0800 Subject: [PATCH 13/16] add qwen_vl_processor unittest --- fastdeploy/input/qwen_mm_processor/process.py | 46 ++++- fastdeploy/input/qwen_vl_processor.py | 74 ++++---- test/input/test_qwen_vl_processor.py | 159 ++++++++++++++++++ 3 files changed, 244 insertions(+), 35 deletions(-) create mode 100644 test/input/test_qwen_vl_processor.py diff --git a/fastdeploy/input/qwen_mm_processor/process.py b/fastdeploy/input/qwen_mm_processor/process.py index 8ee319b1ae..68b6ba5e6a 100644 --- a/fastdeploy/input/qwen_mm_processor/process.py +++ b/fastdeploy/input/qwen_mm_processor/process.py @@ -101,6 +101,21 @@ def __init__( } def _pack_outputs(self, outputs): + """ + Pack and convert all output data into numpy arrays with appropriate types. + + Args: + outputs (dict): Dictionary containing model outputs with keys: + - images: List of visual features + - grid_thw: List of spatial dimensions + - image_type_ids: List of content type indicators + - input_ids: List of token IDs + - token_type_ids: List of type identifiers + - position_ids: List of position embeddings + + Returns: + dict: Processed outputs with all values converted to numpy arrays + """ # Process visual outputs - stack if exists or set to None if empty if not outputs["images"]: outputs["images"] = None # No images case @@ -188,6 +203,21 @@ def text2ids(self, text, images=None, videos=None): return self._pack_outputs(outputs) + def _parse_chat_messages(self, request): + """ + Parse chat messages from request into structured format. + + Args: + request (dict): Input request containing chat messages + + Returns: + list: Parsed list of message dictionaries with: + - role (str): Message role (user/assistant) + - content (str): Message text content + - images (list, optional): List of image data if present + """ + return parse_chat_messages(request.get("messages")) + def request2ids( self, request: Dict[str, Any], tgts: List[str] = None ) -> Dict[str, Union[np.ndarray, List[np.ndarray], None]]: @@ -218,7 +248,7 @@ def request2ids( } # Parse and validate chat messages - messages = parse_chat_messages(request.get("messages")) + messages = self._parse_chat_messages(request) image_message_list = [] # Store visual content messages for msg in messages: @@ -234,11 +264,14 @@ def request2ids( for item in content_items: if isinstance(item, dict) and item.get("type") in ["image", "video"]: image_message_list.append(item) + + raw_messages = request["messages"] request["messages"] = messages prompt_token_ids = self.apply_chat_template(request) if len(prompt_token_ids) == 0: raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs") + request["messages"] = raw_messages vision_start_index = 0 vision_message_index = 0 @@ -376,17 +409,17 @@ def _compute_vision_positions( self, start_pos: int, t: int, h: int, w: int, second_per_grid_t: float ) -> np.ndarray: """ - Generate 3D positional embeddings for visual content. + Generate 3D position IDs for visual inputs. Args: - start_pos: Starting position index - t: Temporal dimension (frames) + start_pos: Base position in sequence + t: Temporal patches (1 for images) h: Height in patches w: Width in patches - second_per_grid_t: Seconds per temporal grid + second_per_grid_t: Time per temporal patch Returns: - numpy.ndarray: 3D position IDs shaped (3, t*h*w) + np.ndarray: Position IDs for [t,h,w] dimensions """ h //= self.spatial_conv_size w //= self.spatial_conv_size @@ -478,6 +511,7 @@ def apply_chat_template(self, request): add_generation_prompt=request.get("add_generation_prompt", True), ) prompt_token_str = raw_prompt.replace(self.image_token, "").replace(self.video_token, "") + request["text_after_process"] = raw_prompt tokens = self.tokenizer.tokenize(prompt_token_str) token_ids = self.tokenizer.convert_tokens_to_ids(tokens) diff --git a/fastdeploy/input/qwen_vl_processor.py b/fastdeploy/input/qwen_vl_processor.py index 1a88eb7287..d9232e464b 100644 --- a/fastdeploy/input/qwen_vl_processor.py +++ b/fastdeploy/input/qwen_vl_processor.py @@ -24,19 +24,18 @@ class QwenVLProcessor(TextProcessor): """ - Processor for Qwen Vision-Language models that handles multimodal inputs. + Qwen Vision-Language processor for handling multimodal inputs. - Inherits from ErnieProcessor and extends functionality for: + This processor extends TextProcessor to support: - Image and video processing - - Multimodal request handling - - Generation configuration + - Multimodal feature extraction + - Tokenization and position encoding + - Request processing and model input generation Attributes: - processor: Underlying DataProcessor instance - tokenizer: Text tokenizer - generation_config: Model generation configuration - eos_token_ids: End-of-sequence token IDs - limit_mm_per_prompt: Limits for multimodal inputs + processor (DataProcessor): Underlying data processor instance + tokenizer: Text tokenizer instance + limit_mm_per_prompt (dict): Limits for multimodal inputs per prompt """ def __init__( @@ -49,14 +48,15 @@ def __init__( tool_parser_obj=None, ): """ - Initialize QwenVLProcessor. + Initialize QwenVLProcessor instance. Args: - config: Model configuration - model_name_or_path: Path to pretrained model - limit_mm_per_prompt: Limits for multimodal inputs per prompt - mm_processor_kwargs: Additional kwargs for multimodal processor - reasoning_parser_obj: Optional reasoning parser + config: Model configuration object + model_name_or_path (str): Pretrained model name or path + limit_mm_per_prompt (dict, optional): Limits for multimodal inputs + mm_processor_kwargs (dict, optional): Multimodal processor arguments + reasoning_parser_obj: Reasoning parser instance + tool_parser_obj: Tool parser instance """ super().__init__(model_name_or_path, reasoning_parser_obj, tool_parser_obj) @@ -73,12 +73,12 @@ def __init__( def process_request(self, request, max_model_len=None, **kwargs): """ - Process incoming request into model inputs. + Process incoming request and generate model inputs. Args: request: Input request object - max_model_len: Maximum model context length - **kwargs: Additional processing arguments + max_model_len (int, optional): Maximum context length + **kwargs: Additional processing parameters Returns: Request: Processed request with model inputs @@ -92,16 +92,16 @@ def process_request(self, request, max_model_len=None, **kwargs): def _parse_processor_kwargs(self, kwargs): """ - Parse and validate multimodal processor kwargs. + Parse and validate multimodal processor arguments. Args: - kwargs: Input kwargs dictionary + kwargs (dict): Processor configuration arguments Returns: - dict: Validated processor kwargs + dict: Validated processor arguments Raises: - ValueError: If kwargs format is invalid + ValueError: If arguments format is invalid """ if not kwargs: return {} @@ -134,7 +134,7 @@ def _parse_limits(self, limits): Parse and validate multimodal input limits. Args: - limits: Input limits dictionary + limits (dict): Input limits configuration Returns: dict: Validated limits with defaults @@ -161,7 +161,7 @@ def _check_mm_limits(self, item): Validate multimodal inputs against configured limits. Args: - item: Input request item to check + item: Input request item to validate Raises: ValueError: If input exceeds configured limits @@ -176,9 +176,9 @@ def _check_mm_limits(self, item): for message in item: if isinstance(message.get("content"), list): for part in message["content"]: - if part.get("type") == "image": + if part.get("type") in ["image_url", "image"]: mm_data["image"].append(part) - elif part.get("type") == "video": + elif part.get("type") in ["video_url", "video"]: mm_data["video"].append(part) for modality, data in mm_data.items(): @@ -192,8 +192,8 @@ def process_request_dict(self, request, max_model_len=None): Process request dictionary into model inputs. Args: - request: Input request dictionary - max_model_len: Maximum model context length + request (dict): Input request dictionary + max_model_len (int, optional): Maximum context length Returns: dict: Processed request with model inputs @@ -253,6 +253,13 @@ def process_request_dict(self, request, max_model_len=None): return request def append_generated_tokens(self, outputs, generated_token_ids): + """ + Append generated tokens to existing outputs. + + Args: + outputs: Current model outputs + generated_token_ids: Generated tokens to append + """ out = {"input_ids": [], "token_type_ids": [], "position_ids": [], "cur_position": outputs["cur_position"]} self.processor._add_text(generated_token_ids, out) @@ -263,11 +270,20 @@ def append_generated_tokens(self, outputs, generated_token_ids): [outputs["token_type_ids"], np.array(out["token_type_ids"], dtype=np.int64)], axis=0 ) outputs["position_ids"] = np.concatenate( - [outputs["position_ids"], out["position_ids"]], axis=1, dtype=np.int64 + [outputs["position_ids"], out["position_ids"][0]], axis=1, dtype=np.int64 ) outputs["cur_position"] = out["cur_position"] def pack_outputs(self, outputs): + """ + Prepare final output dictionary for model. + + Args: + outputs: Intermediate processing outputs + + Returns: + dict: Packed output dictionary with all required fields + """ outputs["image_patch_id"] = self.processor.image_token_id outputs["video_patch_id"] = self.processor.video_token_id outputs["position_ids"] = outputs["position_ids"].transpose(1, 0) diff --git a/test/input/test_qwen_vl_processor.py b/test/input/test_qwen_vl_processor.py new file mode 100644 index 0000000000..6dc1344021 --- /dev/null +++ b/test/input/test_qwen_vl_processor.py @@ -0,0 +1,159 @@ +import unittest +from unittest.mock import MagicMock, patch + +import numpy as np +from PIL import Image + +from fastdeploy.engine.request import Request +from fastdeploy.input.qwen_mm_processor import DataProcessor +from fastdeploy.input.qwen_vl_processor import QwenVLProcessor + + +def mock_pil_image(height, width): + rgb_image = np.random.randint(0, 256, (height, width, 3), dtype=np.uint8) + return Image.fromarray(rgb_image) + + +def mock_parse_chat_messages(): + messages = [ + { + "role": "user", + "content": [ + { + "type": "image", + "image_url": {}, + "image": mock_pil_image(480, 640), + }, + { + "type": "video", + "video_url": {}, + "video": b"123", + }, + {"type": "text", "text": "Describe image and video."}, + ], + } + ] + return messages + + +def mock_video_frames(num_frames, height, width): + frames = [] + for i in range(num_frames): + frame = np.random.randint(0, 256, (height, width, 3), dtype=np.uint8) + frames.append(frame) + return np.stack(frames, axis=0) + + +def mock_load_and_process_video(): + frames = mock_video_frames(num_frames=3, height=480, width=640) + meta = { + "fps": 1, + "duration": 3, + "num_of_frame": 3, + } + return frames, meta + + +class TestQwenVLProcessor(unittest.TestCase): + + def setUp(self): + config = MagicMock() + config.vision_config.tokens_per_second = 2 + + self.patcher_parse_chat_messages = patch.object( + DataProcessor, "_parse_chat_messages", return_value=mock_parse_chat_messages() + ) + self.patcher_parse_chat_messages.start() + + self.patcher_load_and_process_video = patch.object( + DataProcessor, "_load_and_process_video", return_value=mock_load_and_process_video() + ) + self.patcher_load_and_process_video.start() + + mm_processor_kwargs = { + "video_max_frames": 20, + "video_min_frames": 1, + } + limit_mm_per_prompt = {"image": 1, "video": 1, "audio": 1} + + model_name_or_path = "./data/models/paddle/Qwen2.5-VL-3B-Instruct" + self.processor = QwenVLProcessor( + config=config, + model_name_or_path=model_name_or_path, + limit_mm_per_prompt=limit_mm_per_prompt, + mm_processor_kwargs=mm_processor_kwargs, + reasoning_parser_obj=None, + tool_parser_obj=None, + ) + + def tearDown(self) -> None: + self.patcher_parse_chat_messages.stop() + self.patcher_load_and_process_video.stop() + + def test_process_request(self): + prompt = { + "request_id": "123", + "messages": [ + { + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": "file://demo.jpeg"}}, + {"type": "video_url", "video_url": {"url": "file://3_frame_video.mp4"}}, + {"type": "text", "text": "Describe image and video."}, + ], + } + ], + } + + request = Request.from_dict(prompt) + result = self.processor.process_request(request, 1024 * 100) + + self.assertEqual(result.prompt_token_ids_len, result.multimodal_inputs["position_ids"].shape[0]) + self.assertEqual(result.prompt_token_ids_len, result.multimodal_inputs["token_type_ids"].shape[0]) + self.assertEqual( + result.multimodal_inputs["images"].shape[0], + sum(map(lambda x: x.prod(), result.multimodal_inputs["grid_thw"])), + ) + self.assertEqual( + result.multimodal_inputs["image_type_ids"].shape[0], result.multimodal_inputs["grid_thw"][:, 0].sum() + ) + self.assertEqual(result.multimodal_inputs["pic_cnt"], 1) + self.assertEqual(result.multimodal_inputs["video_cnt"], 1) + + def test_process_request_dict(self): + num_generated_token_ids = 10 + request = { + "metadata": { + "generated_token_ids": [1] * num_generated_token_ids, + }, + "stop": ["stop", "eof"], + "request_id": "123", + "messages": [ + { + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": "file://demo.jpeg"}}, + {"type": "video_url", "video_url": {"url": "file://3_frame_video.mp4"}}, + {"type": "text", "text": "Describe image and video."}, + ], + } + ], + } + + result = self.processor.process_request_dict(request, 1024 * 100) + + self.assertEqual(result["prompt_token_ids_len"], result["multimodal_inputs"]["position_ids"].shape[0]) + self.assertEqual(result["prompt_token_ids_len"], result["multimodal_inputs"]["token_type_ids"].shape[0]) + self.assertEqual( + result["multimodal_inputs"]["images"].shape[0], + sum(map(lambda x: x.prod(), result["multimodal_inputs"]["grid_thw"])), + ) + self.assertEqual( + result["multimodal_inputs"]["image_type_ids"].shape[0], result["multimodal_inputs"]["grid_thw"][:, 0].sum() + ) + self.assertEqual(result["multimodal_inputs"]["pic_cnt"], 1) + self.assertEqual(result["multimodal_inputs"]["video_cnt"], 1) + + +if __name__ == "__main__": + unittest.main() From 4edd20f468fdc60eee68214f8a37508521e0e11e Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Tue, 19 Aug 2025 21:21:14 +0800 Subject: [PATCH 14/16] update model path --- test/input/test_qwen_vl_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/input/test_qwen_vl_processor.py b/test/input/test_qwen_vl_processor.py index 6dc1344021..7b983344f9 100644 --- a/test/input/test_qwen_vl_processor.py +++ b/test/input/test_qwen_vl_processor.py @@ -76,7 +76,7 @@ def setUp(self): } limit_mm_per_prompt = {"image": 1, "video": 1, "audio": 1} - model_name_or_path = "./data/models/paddle/Qwen2.5-VL-3B-Instruct" + model_name_or_path = "/workspace/Fastdeploy/test/ModelData/Qwen2.5-VL-7B-Instruct" self.processor = QwenVLProcessor( config=config, model_name_or_path=model_name_or_path, From 1a46dd1a575a2c6a1c0c2e1926bc3ab4fa48a0df Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Tue, 19 Aug 2025 21:42:03 +0800 Subject: [PATCH 15/16] update model path --- test/input/test_qwen_vl_processor.py | 79 +++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/test/input/test_qwen_vl_processor.py b/test/input/test_qwen_vl_processor.py index 7b983344f9..f5db6b9cba 100644 --- a/test/input/test_qwen_vl_processor.py +++ b/test/input/test_qwen_vl_processor.py @@ -1,3 +1,19 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + import unittest from unittest.mock import MagicMock, patch @@ -10,11 +26,28 @@ def mock_pil_image(height, width): + """Generate mock random RGB image + + Args: + height: Image height in pixels + width: Image width in pixels + + Returns: + PIL.Image object with random RGB data + """ rgb_image = np.random.randint(0, 256, (height, width, 3), dtype=np.uint8) return Image.fromarray(rgb_image) def mock_parse_chat_messages(): + """Generate mock chat messages with image, video and text content + + Returns: + List of chat message dictionaries containing: + - Mock image data (480x640 pixels) + - Mock video data (dummy bytes) + - Sample text prompt + """ messages = [ { "role": "user", @@ -37,6 +70,17 @@ def mock_parse_chat_messages(): def mock_video_frames(num_frames, height, width): + """Generate mock video frames with random pixel data + + Args: + num_frames: Number of frames to generate + height: Frame height in pixels + width: Frame width in pixels + + Returns: + Numpy array of shape (num_frames, height, width, 3) + containing random RGB frames + """ frames = [] for i in range(num_frames): frame = np.random.randint(0, 256, (height, width, 3), dtype=np.uint8) @@ -45,6 +89,16 @@ def mock_video_frames(num_frames, height, width): def mock_load_and_process_video(): + """Mock video loading and processing + + Returns: + Tuple containing: + - frames: 3 mock video frames (480x640 resolution) + - meta: Dictionary with mock video metadata: + * fps: 1 + * duration: 3 seconds + * num_of_frame: 3 + """ frames = mock_video_frames(num_frames=3, height=480, width=640) meta = { "fps": 1, @@ -55,8 +109,14 @@ def mock_load_and_process_video(): class TestQwenVLProcessor(unittest.TestCase): + """Unit tests for Qwen Vision-Language Processor functionality""" def setUp(self): + """Initialize test case with: + - Mock configuration + - Patched message parsing and video processing methods + - QwenVLProcessor instance with test parameters + """ config = MagicMock() config.vision_config.tokens_per_second = 2 @@ -76,7 +136,7 @@ def setUp(self): } limit_mm_per_prompt = {"image": 1, "video": 1, "audio": 1} - model_name_or_path = "/workspace/Fastdeploy/test/ModelData/Qwen2.5-VL-7B-Instruct" + model_name_or_path = "/ModelData/Qwen2.5-VL-7B-Instruct" self.processor = QwenVLProcessor( config=config, model_name_or_path=model_name_or_path, @@ -87,10 +147,19 @@ def setUp(self): ) def tearDown(self) -> None: + """Clean up test case by stopping all mock patches""" self.patcher_parse_chat_messages.stop() self.patcher_load_and_process_video.stop() def test_process_request(self): + """Test processing of Request object with multimodal input + + Validates: + 1. Token ID lengths match position_ids and token_type_ids shapes + 2. Image processing produces expected output dimensions + 3. Video processing produces expected output dimensions + 4. Correct counts for images (1) and videos (1) + """ prompt = { "request_id": "123", "messages": [ @@ -121,6 +190,14 @@ def test_process_request(self): self.assertEqual(result.multimodal_inputs["video_cnt"], 1) def test_process_request_dict(self): + """Test processing of dictionary-format request with multimodal input + + Validates: + 1. Token ID lengths match position_ids and token_type_ids shapes + 2. Image processing produces expected output dimensions + 3. Video processing produces expected output dimensions + 4. Correct counts for images (1) and videos (1) + """ num_generated_token_ids = 10 request = { "metadata": { From 74f3a08a8eeba6bf1078916b3a3911ec156201bc Mon Sep 17 00:00:00 2001 From: lddfym <1799858408@qq.com> Date: Thu, 21 Aug 2025 00:06:01 +0800 Subject: [PATCH 16/16] update qwen_vl_processor unittest --- test/input/test_qwen_vl_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/input/test_qwen_vl_processor.py b/test/input/test_qwen_vl_processor.py index f5db6b9cba..e941adfc77 100644 --- a/test/input/test_qwen_vl_processor.py +++ b/test/input/test_qwen_vl_processor.py @@ -131,7 +131,7 @@ def setUp(self): self.patcher_load_and_process_video.start() mm_processor_kwargs = { - "video_max_frames": 20, + "video_max_frames": 10, "video_min_frames": 1, } limit_mm_per_prompt = {"image": 1, "video": 1, "audio": 1}