Update README.md
Browse files
README.md
CHANGED
@@ -92,4 +92,21 @@ inputs = {key: value.to('cuda') for key, value in inputs.items()}
|
|
92 |
tgt_output = model(tgt=inputs)["tgt_reps"]
|
93 |
print(string, '=', model.compute_similarity(qry_output, tgt_output))
|
94 |
## tensor([[0.3223]], device='cuda:0', dtype=torch.bfloat16)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
```
|
|
|
92 |
tgt_output = model(tgt=inputs)["tgt_reps"]
|
93 |
print(string, '=', model.compute_similarity(qry_output, tgt_output))
|
94 |
## tensor([[0.3223]], device='cuda:0', dtype=torch.bfloat16)
|
95 |
+
```
|
96 |
+
|
97 |
+
## Citation
|
98 |
+
```
|
99 |
+
@article{jiang2024vlm2vec,
|
100 |
+
title={VLM2Vec: Training Vision-Language Models for Massive Multimodal Embedding Tasks},
|
101 |
+
author={Jiang, Ziyan and Meng, Rui and Yang, Xinyi and Yavuz, Semih and Zhou, Yingbo and Chen, Wenhu},
|
102 |
+
journal={arXiv preprint arXiv:2410.05160},
|
103 |
+
year={2024}
|
104 |
+
}
|
105 |
+
|
106 |
+
@article{meng2025vlm2vecv2,
|
107 |
+
title={VLM2Vec-V2: Advancing Multimodal Embedding for Videos, Images, and Visual Documents},
|
108 |
+
author={Rui Meng and Ziyan Jiang and Ye Liu and Mingyi Su and Xinyi Yang and Yuepeng Fu and Can Qin and Zeyuan Chen and Ran Xu and Caiming Xiong and Yingbo Zhou and Wenhu Chen and Semih Yavuz},
|
109 |
+
journal={arXiv preprint arXiv:2507.04590},
|
110 |
+
year={2025}
|
111 |
+
}
|
112 |
```
|