# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import math from collections.abc import Iterable from typing import Union import numpy as np from transformers.feature_extraction_utils import BatchFeature from transformers.image_processing_utils import select_best_resolution from transformers.image_utils import ImageInput, get_image_size, to_numpy_array from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack from transformers.tokenization_utils_base import PreTokenizedInput, TextInput from transformers.utils import logging logger = logging.get_logger(__name__) class RProcessorKwargs(ProcessingKwargs, total=False): # see processing_utils.ProcessingKwargs documentation for usage. _defaults = { "text_kwargs": { "padding": False, }, "image_kwargs": {}, "videos_kwargs": {}, } class RProcessor(ProcessorMixin): attributes = ["image_processor", "tokenizer", "video_processor"] valid_kwargs = [ "chat_template", "num_image_tokens", "image_processor_type", "vision_feature_select_strategy", "image_token", "video_token", "vision_aspect_ratio", ] image_processor_class = "AutoImageProcessor" tokenizer_class = "AutoTokenizer" video_processor_class = "AutoVideoProcessor" def __init__( self, image_processor=None, tokenizer=None, video_processor=None, num_image_tokens=None, vision_feature_select_strategy=None, chat_template=None, image_token="", video_token="