ref1 (#3)

Browse files

- remove files (6c6d0dfeaeb2b99a77b705cf16c44cd05a21df0e)

Files changed (15) hide show

README.md +0 -3
adapter_config.json +0 -27
added_tokens.json +0 -6
chat_template.json +0 -3
custom_llava_next.py +0 -122
example_simple.py +0 -72
merges.txt +0 -0
preprocessor_config.json +0 -136
processing_colgranitevision.py +0 -395
processor_config.json +0 -12
save_as_pretrained.py +0 -70
special_tokens_map.json +0 -35
tokenizer.json +0 -0
tokenizer_config.json +0 -208
vocab.json +0 -0

README.md DELETED Viewed

@@ -1,3 +0,0 @@
----
-license: apache-2.0
----

adapter_config.json DELETED Viewed

@@ -1,27 +0,0 @@
-{
-  "alpha_pattern": {},
-  "base_model_name_or_path": "ibm-granite/granite-vision-3.1-2b-preview",
-  "bias": "none",
-  "fan_in_fan_out": false,
-  "inference_mode": true,
-  "init_lora_weights": "gaussian",
-  "layer_replication": null,
-  "layers_pattern": null,
-  "layers_to_transform": null,
-  "loftq_config": {},
-  "lora_alpha": 32,
-  "lora_dropout": 0.1,
-  "megatron_config": null,
-  "megatron_core": "megatron.core",
-  "modules_to_save": [
-    "custom_text_proj"
-  ],
-  "peft_type": "LORA",
-  "r": 32,
-  "rank_pattern": {},
-  "revision": null,
-  "target_modules": "(.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$)",
-  "task_type": "FEATURE_EXTRACTION",
-  "use_dora": false,
-  "use_rslora": false
-}

added_tokens.json DELETED Viewed

@@ -1,6 +0,0 @@
-{
-  "<image>": 49155,
-  "<|end_of_role|>": 49153,
-  "<|start_of_role|>": 49152,
-  "<|tool_call|>": 49154
-}

chat_template.json DELETED Viewed

@@ -1,3 +0,0 @@
-{
-  "chat_template": "{%- if tools %}\n    {{- '<|start_of_role|>available_tools<|end_of_role|>\n' }}\n    {%- for tool in tools %}\n    {{- tool | tojson(indent=4) }}\n    {%- if not loop.last %}\n        {{- '\n\n' }}\n    {%- endif %}\n    {%- endfor %}\n    {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- for message in messages if message['role'] == 'system'%}{% else %}<|system|>\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n{% endfor %}{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n    {{- '<|system|>\n' + message['content'][0]['text'] + '\n' }}\n    {%- elif message['role'] == 'user' %}<|user|>\n {# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + '\n' }}{% endfor %}\n{%- elif message['role'] == 'assistant' %}\n    {{- '<|assistant|>\n'  + message['content'][0]['text']  + '<|end_of_text|>' }}\n    {%- elif message['role'] == 'assistant_tool_call' %}\n    {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message['content'][0]['text']  + '<|end_of_text|>\n' }}\n    {%- elif message['role'] == 'tool_response' %}\n    {{- '<|start_of_role|>tool_response<|end_of_role|>' + message['content'][0]['text'] + '<|end_of_text|>\n' }}\n    {%- endif %}\n    {%- if loop.last and add_generation_prompt %}\n    {{- '<|assistant|>\n' }}\n    {%- endif %}\n{%- endfor %}"
-}

custom_llava_next.py DELETED Viewed

@@ -1,122 +0,0 @@
-from transformers.models.llava_next.modeling_llava_next import LlavaNextForConditionalGeneration
-import torch
-from transformers.models.llava_next.modeling_llava_next import unpad_image, get_anyres_image_grid_shape
-import numpy as np
-class LlavaNextWithCustomPacking(LlavaNextForConditionalGeneration):
-    def pack_image_features(
-        self,
-        image_features,
-        image_sizes,
-        vision_feature_select_strategy,
-        image_newline=None,
-        base_image_feature_location="last",
-    ):
-        """
-        Reshape, unpad and then pack each image_feature into a single image_features tensor containing all visual vectors.
-        Args:
-            image_features (`List[torch.Tensor]` of length num_images, each of shape `(num_patches, image_length, embed_dim)`)
-                List of image feature tensor, each contains all the visual feature of all patches.
-            image_sizes (`torch.Tensor` of shape `(num_images, 2)`)
-                Actual image size of each images (H, W).
-            vision_feature_select_strategy (`str`)
-                The feature selection strategy used to select the vision feature from the vision backbone.
-            image_newline (`torch.Tensor` of shape `(embed_dim)`)
-                New line embedding vector.
-        Returns:
-            image_features (`torch.Tensor` of shape `(all_feat_len, embed_dim)`)
-            feature_lens (`List[int]`)
-                token length of each image in image_features
-        """
-        new_image_features = []
-        feature_lens = []
-        for image_idx, image_feature in enumerate(image_features):
-            if image_feature.shape[0] > 1:
-                base_image_feature = image_feature[0]
-                image_feature = image_feature[1:]
-                height = width = self.config.vision_config.image_size // self.config.vision_config.patch_size
-                num_patch_height, num_patch_width = get_anyres_image_grid_shape(
-                    image_sizes[image_idx],
-                    self.config.image_grid_pinpoints,
-                    self.config.vision_config.image_size,
-                )
-                if (
-                    np.prod(image_feature.shape) % (num_patch_height * num_patch_width * height * width) != 0
-                    and vision_feature_select_strategy == "default"
-                ):
-                    logger.warning_once(
-                        "Image feature shape does not line up with the provided patch size. "
-                        "You may be using the `default` vision_feature_select_strategy with a"
-                        " visual encoder that does not have CLS."
-                    )
-                image_feature = image_feature.view(num_patch_height, num_patch_width, height, width, -1)
-                image_feature = image_feature.permute(4, 0, 2, 1, 3).contiguous()
-                image_feature = image_feature.flatten(1, 2).flatten(2, 3)
-                image_feature = unpad_image(image_feature, image_sizes[image_idx])
-                if image_newline is not None:
-                    image_feature = torch.cat(
-                        (
-                            image_feature,
-                            image_newline[:, None, None]
-                            .expand(*image_feature.shape[:-1], 1)
-                            .to(image_feature.device, image_feature.dtype),
-                        ),
-                        dim=-1,
-                    )
-                image_feature = image_feature.flatten(1, 2).transpose(0, 1)
-                if base_image_feature_location == "last":
-                    image_feature = torch.cat((image_feature, base_image_feature), dim=0)
-                else:
-                    image_feature = torch.cat((base_image_feature, image_feature), dim=0)
-            else:
-                image_feature = image_feature[0]
-                if image_newline is not None:
-                    image_feature = torch.cat((image_feature, image_newline[None].to(image_feature)), dim=0)
-            new_image_features.append(image_feature)
-            feature_lens.append(image_feature.size(0))
-        image_features = torch.cat(new_image_features, dim=0)
-        feature_lens = torch.tensor(feature_lens, dtype=torch.long, device=image_features.device)
-        return image_features, feature_lens
-def main():
-    import torch
-    from transformers import AutoConfig
-    # Load config and model
-    model_id = "ibm-granite/granite-vision-3.1-2b-preview"
-    config = AutoConfig.from_pretrained(model_id)
-    model = LlavaNextWithCustomPacking.from_pretrained(model_id, config=config)
-    # Dummy image features for 2 images (1 base + 3x3 patch grid flattened)
-    B = 2  # batch size
-    num_views = 3
-    num_patches = 729
-    embed_dim = model.config.text_config.hidden_size
-    image_features = [
-        torch.randn(num_views, num_patches, embed_dim) for _ in range(B)
-    ]
-    image_sizes = torch.tensor([[384, 384], [384, 384]])  # H, W for each image
-    # Call overridden pack_image_features
-    packed_feats, lengths = model.pack_image_features(
-        image_features=image_features,
-        image_sizes=image_sizes,
-        vision_feature_select_strategy="default",
-        image_newline=model.image_newline,
-        base_image_feature_location="last",
-    )
-    print("Packed features shape:", packed_feats.shape)
-    print("Feature lengths:", lengths)
-if __name__ == "__main__":
-    main()

example_simple.py DELETED Viewed

@@ -1,72 +0,0 @@
-from PIL import Image
-import requests
-from io import BytesIO
-import torch
-from transformers import AutoModel, AutoProcessor, AutoConfig, AutoModelForVision2Seq
-# from granite_cola import ColGraniteVisionConfig, ColGraniteVision, ColGraniteVisionProcessor
-# --- 1) Register your custom classes so AutoModel/AutoProcessor work out-of-the-box
-# AutoConfig.register("colgranitevision", ColGraniteVisionConfig)
-# AutoModel.register(ColGraniteVisionConfig, ColGraniteVision)
-# AutoProcessor.register(ColGraniteVisionConfig, ColGraniteVisionProcessor)
-# ─────────────────────────────────────────────
-# 2) Load model & processor
-# ─────────────────────────────────────────────
-model_dir = "."
-model = AutoModelForVision2Seq.from_pretrained(
-    model_dir,
-    trust_remote_code=True,
-    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
-)
-# self.model = PeftModel.from_pretrained(self.model, peft_path).eval()
-processor = AutoProcessor.from_pretrained(
-    model_dir,
-    trust_remote_code=True,
-    use_fast=True
-)
-# Set patch_size explicitly if needed
-if hasattr(processor, 'patch_size') and processor.patch_size is None:
-    processor.patch_size = 14  # Default patch size for vision transformers
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = model.to(device).eval()
-# ─────────────────────────────────────────────
-# 3) Download sample image + build a prompt containing <image>
-# ─────────────────────────────────────────────
-image_url = "https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg"
-resp = requests.get(image_url)
-image = Image.open(BytesIO(resp.content)).convert("RGB")
-# ─────────────────────────────────────────────
-# 4) Process image and text
-# ─────────────────────────────────────────────
-# Process image
-image_inputs = processor.process_images([image])
-image_inputs = {k: v.to(device) for k, v in image_inputs.items()}
-# Process text
-text = "A photo of a tiger"
-text_inputs = processor.process_queries([text])
-text_inputs = {k: v.to(device) for k, v in text_inputs.items()}
-# ─────────────────────────────────────────────
-# 5) Get embeddings and score
-# ─────────────────────────────────────────────
-with torch.no_grad():
-    # Get image embedding
-    image_embedding = model(**image_inputs)
-    # Get text embedding
-    text_embedding = model(**text_inputs)
-    # Calculate similarity score
-    score = torch.matmul(text_embedding, image_embedding.T).item()
-print(f"Similarity score between text and image: {score:.4f}")

merges.txt DELETED Viewed

The diff for this file is too large to render. See raw diff

preprocessor_config.json DELETED Viewed

@@ -1,136 +0,0 @@
-{
-  "crop_size": {
-    "height": 384,
-    "width": 384
-  },
-  "do_center_crop": true,
-  "do_convert_rgb": null,
-  "do_normalize": true,
-  "do_pad": true,
-  "do_rescale": true,
-  "do_resize": true,
-  "image_grid_pinpoints": [
-    [
-      384,
-      768
-    ],
-    [
-      384,
-      1152
-    ],
-    [
-      384,
-      1536
-    ],
-    [
-      384,
-      1920
-    ],
-    [
-      384,
-      2304
-    ],
-    [
-      384,
-      2688
-    ],
-    [
-      384,
-      3072
-    ],
-    [
-      384,
-      3456
-    ],
-    [
-      384,
-      3840
-    ],
-    [
-      768,
-      384
-    ],
-    [
-      768,
-      768
-    ],
-    [
-      768,
-      1152
-    ],
-    [
-      768,
-      1536
-    ],
-    [
-      768,
-      1920
-    ],
-    [
-      1152,
-      384
-    ],
-    [
-      1152,
-      768
-    ],
-    [
-      1152,
-      1152
-    ],
-    [
-      1536,
-      384
-    ],
-    [
-      1536,
-      768
-    ],
-    [
-      1920,
-      384
-    ],
-    [
-      1920,
-      768
-    ],
-    [
-      2304,
-      384
-    ],
-    [
-      2688,
-      384
-    ],
-    [
-      3072,
-      384
-    ],
-    [
-      3456,
-      384
-    ],
-    [
-      3840,
-      384
-    ]
-  ],
-  "image_mean": [
-    0.5,
-    0.5,
-    0.5
-  ],
-  "image_processor_type": "LlavaNextImageProcessor",
-  "image_std": [
-    0.5,
-    0.5,
-    0.5
-  ],
-  "processor_class": "ColGraniteVisionProcessor",
-  "resample": 3,
-  "rescale_factor": 0.00392156862745098,
-  "size": {
-    "height": 384,
-    "width": 384
-  }
-}

processing_colgranitevision.py DELETED Viewed

@@ -1,395 +0,0 @@
-import math
-from typing import ClassVar, List, Optional, Tuple, Union
-import torch
-from PIL import Image, ImageOps
-from transformers import BatchFeature, LlavaNextProcessor
-def round_by_factor(number: float, factor: int) -> int:
-    """Returns the closest integer to 'number' that is divisible by 'factor'."""
-    return round(number / factor) * factor
-def ceil_by_factor(number: float, factor: int) -> int:
-    """Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'."""
-    return math.ceil(number / factor) * factor
-def floor_by_factor(number: float, factor: int) -> int:
-    """Returns the largest integer less than or equal to 'number' that is divisible by 'factor'."""
-    return math.floor(number / factor) * factor
-class ColGraniteVisionProcessor(LlavaNextProcessor):
-    """
-    Processor for ColPali.
-    """
-    visual_prompt_prefix: ClassVar[str] = "<|user|>\n<image>\nDescribe the image.\n"
-    system_message: ClassVar[
-        str] = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions."
-    query_prefix: ClassVar[str] = "Query: "
-    query_start: ClassVar[str] = "<|user|>\n"
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.factor = 14
-        self.min_size = 384
-        self.max_size = 384 * 2
-        self.suffix_len = 10
-    @property
-    def query_augmentation_token(self) -> str:
-        """
-        Return the query augmentation token.
-        Query augmentation buffers are used as reasoning buffers during inference.
-        """
-        return self.tokenizer.pad_token
-    @staticmethod
-    def smart_resize_helper(
-            width: int,
-            height: int,
-            factor: int,
-            min_size: int,
-            max_size: int
-    ) -> Tuple[int, int]:
-        """
-        Returns the resized image dimensions such that:
-        1. The smaller dimension is set to 'min_size'.
-        2. The larger dimension is scaled proportionally to maintain aspect ratio.
-        3. If the larger dimension exceeds 'max_size', it is clipped to 'max_size',
-        and the smaller dimension is adjusted accordingly to maintain aspect ratio.
-        4. Both dimensions are divisible by 'factor'.
-        """
-        # Determine scale factor based on min_size
-        if height < width:
-            scale_factor = min_size / height
-        else:
-            scale_factor = min_size / width
-        new_width = round(width * scale_factor)
-        new_height = round(height * scale_factor)
-        # If the longer dimension exceeds max_size, adjust accordingly
-        if max(new_width, new_height) > max_size:
-            clip_factor = max_size / max(new_width, new_height)
-            new_width = round(new_width * clip_factor)
-            new_height = round(new_height * clip_factor)
-        # Ensure dimensions are divisible by factor
-        # new_width = round_by_factor(new_width, factor)
-        # new_height = round_by_factor(new_height, factor)
-        return new_width, new_height
-    @staticmethod
-    def pad_image_center(image: Image.Image,
-                         target_width: int,
-                         target_height: int,
-                         fill_color=(0, 0, 0)) -> Image.Image:
-        """
-        Pads the given image to be centered within the target dimensions.
-        :param image: PIL Image to be padded.
-        :param target_width: The desired width after padding.
-        :param target_height: The desired height after padding.
-        :param fill_color: Background color (default is black).
-        :return: Padded image with centered content.
-        """
-        # Get original image size
-        img_width, img_height = image.size
-        # Compute padding values
-        pad_left = (target_width - img_width) // 2
-        pad_top = (target_height - img_height) // 2
-        pad_right = target_width - img_width - pad_left
-        pad_bottom = target_height - img_height - pad_top
-        # Apply padding
-        padded_image = ImageOps.expand(image, (pad_left, pad_top, pad_right, pad_bottom), fill_color).convert("RGB")
-        return padded_image
-    def smart_resize(self, image: Image.Image) -> Image.Image:
-        """
-        Resize and convert the image to the required format.
-        """
-        image_size = image.size
-        resized_height, resized_width = self.smart_resize_helper(
-            width=image_size[0],
-            height=image_size[1],
-            factor=self.factor,
-            min_size=self.min_size,
-            max_size=self.max_size
-        )
-        return image.convert("RGB").resize((resized_width, resized_height))
-    def smart_resize_and_pad(self, image: Image.Image) -> Image.Image:
-        """
-        Resize and pad the image to the required format.
-        """
-        return self.resize_and_pad_centered(
-            image=image,
-            factor=self.factor,
-            min_size=self.min_size,
-            max_size=self.max_size,
-            fill_color=0
-        )
-    def resize_and_pad_centered(self,
-                                image: Image.Image,
-                                factor: int,
-                                min_size: int,
-                                max_size: int,
-                                fill_color=0
-                                ) -> Image.Image:
-        """
-        Resizes and pads an image such that:
-        - The short side is set to `min_size`.
-        - The long side is scaled proportionally but clipped to `max_size`.
-        - The image is centered within the final padded area.
-        :param image: PIL Image
-        :param factor: Factor to make dimensions divisible by
-        :param min_size: Minimum size for the short side
-        :param max_size: Maximum allowed size for the long side
-        :param fill_color: Background padding color (default black)
-        :return: Resized and padded image
-        """
-        # Get original size
-        width, height = image.size
-        if min_size == -1 or max_size == -1:
-            return image.convert("RGB")
-        # Determine scale factor based on the short side (min_size)
-        if width < height:
-            scale_factor = min_size / width
-            target_width = min_size
-            max_scale_factor = min(max_size / height, scale_factor)
-            target_height = round(height * max_scale_factor)
-        else:
-            scale_factor = min_size / height
-            target_height = min_size
-            max_scale_factor = min(max_size / width, scale_factor)
-            target_width = round(width * max_scale_factor)
-        # Ensure the longer side does not exceed max_size
-        # if max(target_width, target_height) > max_size:
-        #     clip_factor = max_size / max(target_width, target_height)
-        #     target_width = round(target_width * clip_factor)
-        #     target_height = round(target_height * clip_factor)
-        # Ensure dimensions are divisible by factor
-        # target_width = round_by_factor(target_width, factor)
-        # target_height = round_by_factor(target_height, factor)
-        # Resize the image
-        resized_image = image.resize((target_width, target_height), Image.LANCZOS)
-        # Determine final padded dimensions (aligned to short side)
-        if width < height:
-            final_width, final_height = min_size, max_size
-        else:
-            final_width, final_height = max_size, min_size
-        # Compute padding to center the image
-        pad_left = (final_width - target_width) // 2
-        pad_top = (final_height - target_height) // 2
-        pad_right = final_width - target_width - pad_left
-        pad_bottom = final_height - target_height - pad_top
-        # Apply centered padding
-        # final_image = ImageOps.expand(resized_image, (pad_left, pad_top, pad_right, pad_bottom), fill_color).convert("RGB")
-        final_image = resized_image.convert("RGB")
-        return final_image
-    def format_data(self, question, image):
-        return [
-            {
-                "role": "system",
-                "content": [{"type": "text", "text": self.system_message}],
-            },
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "image",
-                        "image": image,
-                    },
-                    {
-                        "type": "text",
-                        "text": question,
-                    },
-                ],
-            }
-        ]
-    def format_data_wo_role(self, question, image=None):
-        return [
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "image",
-                        "image": image,
-                    },
-                    {
-                        "type": "text",
-                        "text": question,
-                    },
-                ],
-            }
-        ]
-    def process_images(
-            self,
-            images: List[Image.Image],
-    ) -> BatchFeature:
-        """
-        Process images for ColPali.
-        """
-        # texts_doc = [self.apply_chat_template(self.format_data_wo_role(self.visual_prompt_prefix, img),tokenize=False ) for img in images]
-        texts_doc = [self.visual_prompt_prefix for _ in images]
-        images = [self.smart_resize_and_pad(image) for image in images]
-        batch_doc = self(
-            text=texts_doc,
-            images=images,
-            return_tensors="pt",
-            padding="longest",
-        )
-        return batch_doc
-    def process_queries(self, queries, max_length=2048, suffix=None):
-        if suffix is None:
-            suffix = self.query_augmentation_token * self.suffix_len
-        processed = []
-        for q in queries:
-            q = self.query_start + self.query_prefix + q
-            # truncate before it eats actual query content
-            if len(q) + len(suffix) > max_length:
-                q = q[: max_length - len(suffix) - 1]
-            q += suffix + "\n"
-            processed.append(q)
-        return self(
-            text=processed,
-            images=None,
-            return_tensors="pt",
-            padding="longest",
-            truncation=True,
-            max_length=max_length,
-        )
-    def score(
-            self,
-            qs: List[torch.Tensor],
-            ps: List[torch.Tensor],
-            device: Optional[Union[str, torch.device]] = None,
-            **kwargs,
-    ) -> torch.Tensor:
-        """
-        Compute the MaxSim score (ColBERT-like) for the given multi-vector query and passage embeddings.
-        """
-        return self.score_multi_vector(qs, ps, device=device, **kwargs)
-    def get_n_patches(
-            self,
-            image_size: Tuple[int, int],
-            patch_size: int,
-    ) -> Tuple[int, int]:
-        n_patches_x = self.image_processor.size["width"] // patch_size
-        n_patches_y = self.image_processor.size["height"] // patch_size
-        return n_patches_x, n_patches_y
-    def get_image_mask(self, batch_images: BatchFeature) -> torch.Tensor:
-        return batch_images.input_ids == self.image_token_id
-    @staticmethod
-    def score_single_vector(
-            qs: List[torch.Tensor],
-            ps: List[torch.Tensor],
-            device: Optional[Union[str, torch.device]] = None,
-    ) -> torch.Tensor:
-        """
-        Compute the dot product score for the given single-vector query and passage embeddings.
-        """
-        if len(qs) == 0:
-            raise ValueError("No queries provided")
-        if len(ps) == 0:
-            raise ValueError("No passages provided")
-        qs_stacked = torch.stack(qs).to(device)
-        ps_stacked = torch.stack(ps).to(device)
-        scores = torch.einsum("bd,cd->bc", qs_stacked, ps_stacked)
-        assert scores.shape[0] == len(qs), f"Expected {len(qs)} scores, got {scores.shape[0]}"
-        scores = scores.to(torch.float32)
-        return scores
-    @staticmethod
-    def score_multi_vector(
-            qs: Union[torch.Tensor, List[torch.Tensor]],
-            ps: Union[torch.Tensor, List[torch.Tensor]],
-            batch_size: int = 128,
-            device: Optional[Union[str, torch.device]] = None,
-    ) -> torch.Tensor:
-        """
-        Compute the late-interaction/MaxSim score (ColBERT-like) for the given multi-vector
-        query embeddings (`qs`) and passage embeddings (`ps`). For ColPali, a passage is the
-        image of a document page.
-        Because the embedding tensors are multi-vector and can thus have different shapes, they
-        should be fed as:
-        (1) a list of tensors, where the i-th tensor is of shape (sequence_length_i, embedding_dim)
-        (2) a single tensor of shape (n_passages, max_sequence_length, embedding_dim) -> usually
-            obtained by padding the list of tensors.
-        Args:
-            qs (`Union[torch.Tensor, List[torch.Tensor]`): Query embeddings.
-            ps (`Union[torch.Tensor, List[torch.Tensor]`): Passage embeddings.
-            batch_size (`int`, *optional*, defaults to 128): Batch size for computing scores.
-            device (`Union[str, torch.device]`, *optional*): Device to use for computation. If not
-                provided, uses `get_torch_device("auto")`.
-        Returns:
-            `torch.Tensor`: A tensor of shape `(n_queries, n_passages)` containing the scores. The score
-            tensor is saved on the "cpu" device.
-        """
-        if len(qs) == 0:
-            raise ValueError("No queries provided")
-        if len(ps) == 0:
-            raise ValueError("No passages provided")
-        scores_list: List[torch.Tensor] = []
-        for i in range(0, len(qs), batch_size):
-            scores_batch = []
-            qs_batch = torch.nn.utils.rnn.pad_sequence(qs[i: i + batch_size], batch_first=True, padding_value=0).to(
-                device
-            )
-            for j in range(0, len(ps), batch_size):
-                ps_batch = torch.nn.utils.rnn.pad_sequence(
-                    ps[j: j + batch_size], batch_first=True, padding_value=0
-                ).to(device)
-                scores_batch.append(torch.einsum("bnd,csd->bcns", qs_batch, ps_batch).max(dim=3)[0].sum(dim=2))
-            scores_batch = torch.cat(scores_batch, dim=1).cpu()
-            scores_list.append(scores_batch)
-        scores = torch.cat(scores_list, dim=0)
-        assert scores.shape[0] == len(qs), f"Expected {len(qs)} scores, got {scores.shape[0]}"
-        scores = scores.to(torch.float32)
-        return scores

processor_config.json DELETED Viewed

@@ -1,12 +0,0 @@
-  {
-    "processor_class": "ColGraniteVisionProcessor",
-      "auto_map": {
-        "AutoProcessor": "processing_colgranitevision.ColGraniteVisionProcessor"
-    },
-    "model_type": "llava_next",
-    "base_model": "ibm-granite/granite-vision-3.1-2b-preview"
-}

save_as_pretrained.py DELETED Viewed

@@ -1,70 +0,0 @@
-# import os
-# import json
-# from transformers import AutoProcessor, PreTrainedModel
-# from colgranitevision_config import ColGraniteVisionConfig
-# from modeling_colgranitevision import ColGraniteVision
-# from processing_colgranitevision import ColGraniteVisionProcessor
-# # ─────────────────────────────────────────────
-# # 1)  Paths & basic config
-# # ─────────────────────────────────────────────
-# BASE_MODEL   = "ibm-granite/granite-vision-3.1-2b-preview"
-# ADAPTER_PATH = r"C:\Users\Y9F7WJ756\Documents\workspace\colgranite\granite_cola\adapters"
-# OUTPUT_DIR   = r"/colgranitevision_new"
-#
-# os.makedirs(OUTPUT_DIR, exist_ok=True)
-#
-# # ─────────────────────────────────────────────
-# # 2)  Build custom config
-# # ─────────────────────────────────────────────
-# config = ColGraniteVisionConfig(
-#     base_model=BASE_MODEL,
-#     adapter_path=ADAPTER_PATH,
-#     emb_dim_query=128,
-#     emb_dim_doc=128,
-#     torch_dtype="float16",
-# )
-#
-# # Override inherited wrong config
-# config.architectures = ["ColGraniteVision"]
-# config.model_type = "colgranitevision"
-#
-# # Save corrected config
-# with open(os.path.join(OUTPUT_DIR, "../colgranitevision/config.json"), "w") as f:
-#     json.dump(config.to_dict(), f, indent=2)
-#
-# # ─────────────────────────────────────────────
-# # 3)  Instantiate model & processor
-# # ─────────────────────────────────────────────
-# model = ColGraniteVision(config)
-processor = ColGraniteVisionProcessor.from_pretrained(BASE_MODEL)
-#
-# # ─────────────────────────────────────────────
-# # 4)  Save model, config, and processor
-# # ─────────────────────────────────────────────
-# model.save_pretrained(
-#     OUTPUT_DIR,
-#     safe_serialization=True,
-#     max_shard_size="2GB",
-# )
-processor.save_pretrained(OUTPUT_DIR)
-#
-# print("✅ PEFT-adapted model and processor saved to:", OUTPUT_DIR)
-#
-# # ─────────────────────────────────────────────
-# # 5)  Reload test
-# # ─────────────────────────────────────────────
-# try:
-#     from transformers import AutoConfig, AutoModel, AutoProcessor
-#     from granite_cola import granitevision
-#
-#     AutoConfig.register("colgranitevision", ColGraniteVisionConfig)
-#     AutoModel.register(ColGraniteVisionConfig, ColGraniteVision)
-#     AutoProcessor.register(ColGraniteVisionConfig, ColGraniteVisionProcessor)
-#
-#     config = AutoConfig.from_pretrained(OUTPUT_DIR)
-#     model = AutoModel.from_pretrained(OUTPUT_DIR, config=config)
-#     processor = AutoProcessor.from_pretrained(OUTPUT_DIR)
-#
-#     print("🚀 Reload successful:", type(model), type(processor))
-# except Exception as e:
-#     print("❌ Reload test failed:", e)

special_tokens_map.json DELETED Viewed

@@ -1,35 +0,0 @@
-{
-  "additional_special_tokens": [
-    "<|start_of_role|>",
-    "<|end_of_role|>",
-    "<|tool_call|>"
-  ],
-  "bos_token": {
-    "content": "<|end_of_text|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "<|end_of_text|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "<|end_of_text|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "<|end_of_text|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
-}

tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json DELETED Viewed

@@ -1,208 +0,0 @@
-{
-  "add_bos_token": false,
-  "add_prefix_space": false,
-  "added_tokens_decoder": {
-    "0": {
-      "content": "<|end_of_text|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "1": {
-      "content": "<fim_prefix>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "2": {
-      "content": "<fim_middle>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "3": {
-      "content": "<fim_suffix>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "4": {
-      "content": "<fim_pad>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "5": {
-      "content": "<filename>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "6": {
-      "content": "<gh_stars>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "7": {
-      "content": "<issue_start>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "8": {
-      "content": "<issue_comment>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "9": {
-      "content": "<issue_closed>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "10": {
-      "content": "<jupyter_start>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "11": {
-      "content": "<jupyter_text>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "12": {
-      "content": "<jupyter_code>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "13": {
-      "content": "<jupyter_output>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "14": {
-      "content": "<empty_output>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "15": {
-      "content": "<commit_before>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "16": {
-      "content": "<commit_msg>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "17": {
-      "content": "<commit_after>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "18": {
-      "content": "<reponame>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "49152": {
-      "content": "<|start_of_role|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "49153": {
-      "content": "<|end_of_role|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "49154": {
-      "content": "<|tool_call|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "49155": {
-      "content": "<image>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "additional_special_tokens": [
-    "<|start_of_role|>",
-    "<|end_of_role|>",
-    "<|tool_call|>"
-  ],
-  "bos_token": "<|end_of_text|>",
-  "chat_template": "{%- if tools %}\n    {{- '<|start_of_role|>available_tools<|end_of_role|>\n' }}\n    {%- for tool in tools %}\n    {{- tool | tojson(indent=4) }}\n    {%- if not loop.last %}\n        {{- '\n\n' }}\n    {%- endif %}\n    {%- endfor %}\n    {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- for message in messages if message['role'] == 'system'%}{% else %}<|system|>\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n{% endfor %}{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n    {{- '<|system|>\n' + message['content'] + '\n' }}\n    {%- elif message['role'] == 'user' %}\n    {{- '<|user|>\n' + message['content'] + '\n' }}\n    {%- elif message['role'] == 'assistant' %}\n    {{- '<|assistant|>\n'  + message['content'] + '<|end_of_text|>' }}\n    {%- elif message['role'] == 'assistant_tool_call' %}\n    {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message['content'] + '<|end_of_text|>\n' }}\n    {%- elif message['role'] == 'tool_response' %}\n    {{- '<|start_of_role|>tool_response<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n    {%- endif %}\n    {%- if loop.last and add_generation_prompt %}\n    {{- '<|assistant|>\n' }}\n    {%- endif %}\n{%- endfor %}",
-  "clean_up_tokenization_spaces": true,
-  "eos_token": "<|end_of_text|>",
-  "errors": "replace",
-  "extra_special_tokens": {},
-  "model_max_length": 16384,
-  "pad_token": "<|end_of_text|>",
-  "padding_side": "right",
-  "processor_class": "ColGraniteVisionProcessor",
-  "tokenizer_class": "GPT2Tokenizer",
-  "unk_token": "<|end_of_text|>",
-  "vocab_size": 49152
-}

vocab.json DELETED Viewed

The diff for this file is too large to render. See raw diff