hanxiao commited on
Commit
e5dae66
·
verified ·
1 Parent(s): 53f5224

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +41 -22
README.md CHANGED
@@ -120,7 +120,46 @@ Here's the speed and quality evaluation on two nano benchmarks. The higher the b
120
  ![](https://raw.githubusercontent.com/jina-ai/jina-embeddings-v4-gguf/refs/heads/main/NanoHotpotQA.svg)
121
  ![](https://raw.githubusercontent.com/jina-ai/jina-embeddings-v4-gguf/refs/heads/main/NanoFiQA2018.svg)
122
 
123
- #### NDCG@5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  | Quantization Type | NanoHotpotQA | NanoFiQA2018 | Δ to v3 (HotpotQA) | Δ to v4 (HotpotQA) | Δ to v3 (FiQA2018) | Δ to v4 (FiQA2018) |
125
  |------------------|--------------|--------------|-------------------|-------------------|-------------------|-------------------|
126
  | IQ1_S | 0.6369 | 0.3178 | -14% | -20% | -38% | -43% |
@@ -144,24 +183,4 @@ Here's the speed and quality evaluation on two nano benchmarks. The higher the b
144
  | jinaai-jina-embeddings-v3 | 0.7393 | 0.5144 | +0% | -7% | +0% | -8% |
145
  | jinaai-jina-embeddings-v4 | 0.7977 | 0.5571 | +8% | +0% | +8% | +0% |
146
 
147
- #### Tokens per Second
148
- | Quantization Type | NanoHotpotQA | NanoFiQA2018 | Δ to F16 (HotpotQA) | Δ to F16 (FiQA2018) |
149
- |------------------|--------------|--------------|--------------------|--------------------|
150
- | IQ1_S | 1608 | 1618 | +53% | +49% |
151
- | IQ1_M | 1553 | 1563 | +48% | +44% |
152
- | IQ2_XXS | 1600 | 1612 | +52% | +49% |
153
- | IQ2_M | 1529 | 1534 | +46% | +42% |
154
- | Q2_K | 1459 | 1471 | +39% | +36% |
155
- | IQ3_XXS | 1552 | 1487 | +48% | +37% |
156
- | IQ3_XS | 1529 | 1526 | +46% | +41% |
157
- | IQ3_S | 1520 | 1516 | +45% | +40% |
158
- | IQ3_M | 1507 | 1511 | +44% | +40% |
159
- | Q3_K_M | 1475 | 1487 | +40% | +37% |
160
- | IQ4_NL | 1464 | 1469 | +39% | +36% |
161
- | IQ4_XS | 1478 | 1487 | +41% | +37% |
162
- | Q4_K_M | 1454 | 1458 | +38% | +35% |
163
- | Q5_K_S | 1419 | 1429 | +35% | +32% |
164
- | Q5_K_M | 1404 | 1433 | +34% | +32% |
165
- | Q6_K | 1356 | 1382 | +29% | +28% |
166
- | Q8_0 | 1304 | 1334 | +24% | +23% |
167
- | F16 | 1050 | 1083 | +0% | +0% |
 
120
  ![](https://raw.githubusercontent.com/jina-ai/jina-embeddings-v4-gguf/refs/heads/main/NanoHotpotQA.svg)
121
  ![](https://raw.githubusercontent.com/jina-ai/jina-embeddings-v4-gguf/refs/heads/main/NanoFiQA2018.svg)
122
 
123
+ #### Table 1: Tokens per Second
124
+
125
+ ```
126
+ llama_context: n_seq_max = 1
127
+ llama_context: n_ctx = 4096
128
+ llama_context: n_ctx_per_seq = 4096
129
+ llama_context: n_batch = 4096
130
+ llama_context: n_ubatch = 4096
131
+ llama_context: causal_attn = 1
132
+ llama_context: flash_attn = 0
133
+ llama_context: kv_unified = true
134
+ llama_context: freq_base = 1000000.0
135
+ llama_context: freq_scale = 1
136
+ llama_context: n_ctx_per_seq (4096) < n_ctx_train (128000) -- the full capacity of the model will not be utilized
137
+
138
+ system_info: n_threads = 4 (n_threads_batch = 4) / 8 | CUDA : ARCHS = 890 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VNNI = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
139
+ ```
140
+
141
+ | Quantization Type | File Size | BPW | NanoHotpotQA | NanoFiQA2018 | Δ to F16 (HotpotQA) | Δ to F16 (FiQA2018) |
142
+ |------------------|-----------|-----|--------------|--------------|--------------------|--------------------|
143
+ | IQ1_S | 748.77 MiB | 2.04 | 1608 | 1618 | +53% | +49% |
144
+ | IQ1_M | 804.97 MiB | 2.19 | 1553 | 1563 | +48% | +44% |
145
+ | IQ2_XXS | 898.64 MiB | 2.44 | 1600 | 1612 | +52% | +49% |
146
+ | IQ2_M | 1.06 GiB | 2.94 | 1529 | 1534 | +46% | +42% |
147
+ | Q2_K | 1.18 GiB | 3.29 | 1459 | 1471 | +39% | +36% |
148
+ | IQ3_XXS | 1.19 GiB | 3.31 | 1552 | 1487 | +48% | +37% |
149
+ | IQ3_XS | 1.29 GiB | 3.59 | 1529 | 1526 | +46% | +41% |
150
+ | IQ3_S | 1.35 GiB | 3.76 | 1520 | 1516 | +45% | +40% |
151
+ | IQ3_M | 1.38 GiB | 3.84 | 1507 | 1511 | +44% | +40% |
152
+ | Q3_K_M | 1.48 GiB | 4.11 | 1475 | 1487 | +40% | +37% |
153
+ | IQ4_NL | 1.69 GiB | 4.72 | 1464 | 1469 | +39% | +36% |
154
+ | IQ4_XS | 1.61 GiB | 4.49 | 1478 | 1487 | +41% | +37% |
155
+ | Q4_K_M | 1.79 GiB | 4.99 | 1454 | 1458 | +38% | +35% |
156
+ | Q5_K_S | 2.02 GiB | 5.61 | 1419 | 1429 | +35% | +32% |
157
+ | Q5_K_M | 2.07 GiB | 5.75 | 1404 | 1433 | +34% | +32% |
158
+ | Q6_K | 2.36 GiB | 6.56 | 1356 | 1382 | +29% | +28% |
159
+ | Q8_0 | 3.05 GiB | 8.50 | 1304 | 1334 | +24% | +23% |
160
+ | F16 | 5.75 GiB | 16.00 | 1050 | 1083 | +0% | +0% |
161
+
162
+ #### Table 2: NDCG@5
163
  | Quantization Type | NanoHotpotQA | NanoFiQA2018 | Δ to v3 (HotpotQA) | Δ to v4 (HotpotQA) | Δ to v3 (FiQA2018) | Δ to v4 (FiQA2018) |
164
  |------------------|--------------|--------------|-------------------|-------------------|-------------------|-------------------|
165
  | IQ1_S | 0.6369 | 0.3178 | -14% | -20% | -38% | -43% |
 
183
  | jinaai-jina-embeddings-v3 | 0.7393 | 0.5144 | +0% | -7% | +0% | -8% |
184
  | jinaai-jina-embeddings-v4 | 0.7977 | 0.5571 | +8% | +0% | +8% | +0% |
185
 
186
+