import warnings from typing import List, Optional, Tuple, Union import numpy as np import PIL import torch from accelerate import Accelerator, DistributedType from accelerate.state import AcceleratorState from decord import VideoReader, cpu from tqdm import tqdm from transformers import ( AutoConfig, AutoProcessor, LlavaForConditionalGeneration, LlavaNextForConditionalGeneration, ) from lmms_eval import utils from lmms_eval.api.instance import Instance from lmms_eval.api.model import lmms from lmms_eval.api.registry import register_model warnings.filterwarnings("ignore") from loguru import logger as eval_logger DEFAULT_IMAGE_TOKEN = "" DEFAULT_VIDEO_TOKEN = "