|  | from abc import ABC, abstractmethod | 
					
						
						|  | from typing import List, Optional, Tuple, Union | 
					
						
						|  |  | 
					
						
						|  | import torch | 
					
						
						|  | from PIL import Image | 
					
						
						|  | from transformers import BatchEncoding, BatchFeature | 
					
						
						|  |  | 
					
						
						|  | from .torch_utils import get_torch_device | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | class BaseVisualRetrieverProcessor(ABC): | 
					
						
						|  | """ | 
					
						
						|  | Base class for visual retriever processors. | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  | @abstractmethod | 
					
						
						|  | def process_images( | 
					
						
						|  | self, | 
					
						
						|  | images: List[Image.Image], | 
					
						
						|  | ) -> Union[BatchFeature, BatchEncoding]: | 
					
						
						|  | pass | 
					
						
						|  |  | 
					
						
						|  | @abstractmethod | 
					
						
						|  | def process_queries( | 
					
						
						|  | self, | 
					
						
						|  | queries: List[str], | 
					
						
						|  | max_length: int = 50, | 
					
						
						|  | suffix: Optional[str] = None, | 
					
						
						|  | ) -> Union[BatchFeature, BatchEncoding]: | 
					
						
						|  | pass | 
					
						
						|  |  | 
					
						
						|  | @abstractmethod | 
					
						
						|  | def score( | 
					
						
						|  | self, | 
					
						
						|  | qs: List[torch.Tensor], | 
					
						
						|  | ps: List[torch.Tensor], | 
					
						
						|  | device: Optional[Union[str, torch.device]] = None, | 
					
						
						|  | **kwargs, | 
					
						
						|  | ) -> torch.Tensor: | 
					
						
						|  | pass | 
					
						
						|  |  | 
					
						
						|  | @staticmethod | 
					
						
						|  | def score_single_vector( | 
					
						
						|  | qs: List[torch.Tensor], | 
					
						
						|  | ps: List[torch.Tensor], | 
					
						
						|  | device: Optional[Union[str, torch.device]] = None, | 
					
						
						|  | ) -> torch.Tensor: | 
					
						
						|  | """ | 
					
						
						|  | Compute the dot product score for the given single-vector query and passage embeddings. | 
					
						
						|  | """ | 
					
						
						|  | device = device or get_torch_device("auto") | 
					
						
						|  |  | 
					
						
						|  | if len(qs) == 0: | 
					
						
						|  | raise ValueError("No queries provided") | 
					
						
						|  | if len(ps) == 0: | 
					
						
						|  | raise ValueError("No passages provided") | 
					
						
						|  |  | 
					
						
						|  | qs_stacked = torch.stack(qs).to(device) | 
					
						
						|  | ps_stacked = torch.stack(ps).to(device) | 
					
						
						|  |  | 
					
						
						|  | scores = torch.einsum("bd,cd->bc", qs_stacked, ps_stacked) | 
					
						
						|  | assert scores.shape[0] == len(qs), f"Expected {len(qs)} scores, got {scores.shape[0]}" | 
					
						
						|  |  | 
					
						
						|  | scores = scores.to(torch.float32) | 
					
						
						|  | return scores | 
					
						
						|  |  | 
					
						
						|  | @staticmethod | 
					
						
						|  | def score_multi_vector( | 
					
						
						|  | qs: List[torch.Tensor], | 
					
						
						|  | ps: List[torch.Tensor], | 
					
						
						|  | batch_size: int = 128, | 
					
						
						|  | device: Optional[Union[str, torch.device]] = None, | 
					
						
						|  | ) -> torch.Tensor: | 
					
						
						|  | """ | 
					
						
						|  | Compute the MaxSim score (ColBERT-like) for the given multi-vector query and passage embeddings. | 
					
						
						|  | """ | 
					
						
						|  | device = device or get_torch_device("auto") | 
					
						
						|  |  | 
					
						
						|  | if len(qs) == 0: | 
					
						
						|  | raise ValueError("No queries provided") | 
					
						
						|  | if len(ps) == 0: | 
					
						
						|  | raise ValueError("No passages provided") | 
					
						
						|  |  | 
					
						
						|  | scores_list: List[torch.Tensor] = [] | 
					
						
						|  |  | 
					
						
						|  | for i in range(0, len(qs), batch_size): | 
					
						
						|  | scores_batch = [] | 
					
						
						|  | qs_batch = torch.nn.utils.rnn.pad_sequence(qs[i : i + batch_size], batch_first=True, padding_value=0).to( | 
					
						
						|  | device | 
					
						
						|  | ) | 
					
						
						|  | for j in range(0, len(ps), batch_size): | 
					
						
						|  | ps_batch = torch.nn.utils.rnn.pad_sequence( | 
					
						
						|  | ps[j : j + batch_size], batch_first=True, padding_value=0 | 
					
						
						|  | ).to(device) | 
					
						
						|  | scores_batch.append(torch.einsum("bnd,csd->bcns", qs_batch, ps_batch).max(dim=3)[0].sum(dim=2)) | 
					
						
						|  | scores_batch = torch.cat(scores_batch, dim=1).cpu() | 
					
						
						|  | scores_list.append(scores_batch) | 
					
						
						|  |  | 
					
						
						|  | scores = torch.cat(scores_list, dim=0) | 
					
						
						|  | assert scores.shape[0] == len(qs), f"Expected {len(qs)} scores, got {scores.shape[0]}" | 
					
						
						|  |  | 
					
						
						|  | scores = scores.to(torch.float32) | 
					
						
						|  | return scores | 
					
						
						|  |  | 
					
						
						|  | @abstractmethod | 
					
						
						|  | def get_n_patches( | 
					
						
						|  | self, | 
					
						
						|  | image_size: Tuple[int, int], | 
					
						
						|  | patch_size: int = 14, | 
					
						
						|  | *args, | 
					
						
						|  | **kwargs, | 
					
						
						|  | ) -> Tuple[int, int]: | 
					
						
						|  | """ | 
					
						
						|  | Get the number of patches (n_patches_x, n_patches_y) that will be used to process an | 
					
						
						|  | image of size (height, width) with the given patch size. | 
					
						
						|  | """ | 
					
						
						|  | pass | 
					
						
						|  |  |