Commit
·
bf50ae7
1
Parent(s):
26bf998
Add Q8 version
Browse files- .gitattributes +2 -0
- config.json +2 -1
- onnx/model_q8.onnx +3 -0
- onnx/model_q8.onnx_data +3 -0
.gitattributes
CHANGED
@@ -37,3 +37,5 @@ onnx/model.onnx_data filter=lfs diff=lfs merge=lfs -text
|
|
37 |
onnx/model_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
38 |
onnx/model_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
|
39 |
onnx/model_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
37 |
onnx/model_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
38 |
onnx/model_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
|
39 |
onnx/model_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
|
40 |
+
onnx/model_q8.onnx filter=lfs diff=lfs merge=lfs -text
|
41 |
+
onnx/model_q8.onnx_data filter=lfs diff=lfs merge=lfs -text
|
config.json
CHANGED
@@ -57,8 +57,9 @@
|
|
57 |
"transformers.js_config": {
|
58 |
"kv_cache_dtype": {
|
59 |
"q4f16": "float16",
|
|
|
60 |
"fp16": "float16"
|
61 |
},
|
62 |
"use_external_data_format": true
|
63 |
}
|
64 |
-
}
|
|
|
57 |
"transformers.js_config": {
|
58 |
"kv_cache_dtype": {
|
59 |
"q4f16": "float16",
|
60 |
+
"q8": "float16",
|
61 |
"fp16": "float16"
|
62 |
},
|
63 |
"use_external_data_format": true
|
64 |
}
|
65 |
+
}
|
onnx/model_q8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:277b12b232e7add5ac012dde98257bcd360caa7ab646de3f61a8def198e3502f
|
3 |
+
size 4114804
|
onnx/model_q8.onnx_data
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a40e83a292b328bb5ba75bb26ffc271e206f11fcee47c3b680e505d2155c0add
|
3 |
+
size 1203494912
|