remisultan commited on
Commit
3e2783d
·
1 Parent(s): 5aa0d88

feat: quantization per channel

Browse files
Files changed (5) hide show
  1. README.md +7 -7
  2. config.json +2 -2
  3. model.onnx +2 -2
  4. model.quant.onnx +2 -2
  5. tokenizer_config.json +1 -2
README.md CHANGED
@@ -38,16 +38,16 @@ This repository provides a ONNX converted and quantized version of meta-llama/Ll
38
  ## 📥 Evaluation Dataset
39
 
40
  We use [`jackhhao/jailbreak-classification`](https://huggingface.co/datasets/jackhhao/jailbreak-classification)
41
- for the evaluation
42
 
43
  ## 🧪 Evaluation Results
44
 
45
- | Model | Accuracy | Precision | Recall | F1 Score | AUC-ROC | Inference Time |
46
- |-------------------------------|----------|-----------|--------|----------|---------|----------------|
47
- | Llama-Prompt-Guard-2-22M | 0.9569 | 0.9879 | 0.9260 | 0.9559 | 0.9259 | 33s |
48
- | Llama-Prompt-Guard-2-22M-q | 0.9473 | 1.0000 | 0.8956 | 0.9449 | 0.9032 | 29s |
49
- | Llama-Prompt-Guard-2-86M | 0.9770 | 0.9980 | 0.9564 | 0.9767 | 0.9523 | 1m29s |
50
- | Llama-Prompt-Guard-2-86M-q | 0.8937 | 1.0000 | 0.7894 | 0.8823 | 0.7263 | 1m15s |
51
 
52
  ## 🤗 Usage
53
 
 
38
  ## 📥 Evaluation Dataset
39
 
40
  We use [`jackhhao/jailbreak-classification`](https://huggingface.co/datasets/jackhhao/jailbreak-classification)
41
+ for the evaluation (train+test)
42
 
43
  ## 🧪 Evaluation Results
44
 
45
+ | Model | Accuracy | Precision | Recall | F1 Score | AUC-ROC |
46
+ |----------------------------|----------|-----------|--------|----------|---------|
47
+ | Llama-Prompt-Guard-2-22M | 0.9564 | 0.9888 | 0.9249 | 0.9558 | 0.9234 |
48
+ | Llama-Prompt-Guard-2-22M-q | 0.9579 | 0.9967 | 0.9204 | 0.9449 | 0.9180 |
49
+ | Llama-Prompt-Guard-2-86M | 0.9801 | 0.9984 | 0.9625 | 0.9801 | 0.9519 |
50
+ | Llama-Prompt-Guard-2-86M-q | 0.8989 | 1.0000 | 0.8018 | 0.89 | 0.7452 |
51
 
52
  ## 🤗 Usage
53
 
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "architectures": [
3
  "DebertaV2ForSequenceClassification"
4
  ],
@@ -9,7 +10,6 @@
9
  "initializer_range": 0.02,
10
  "intermediate_size": 3072,
11
  "layer_norm_eps": 1e-07,
12
- "legacy": true,
13
  "max_position_embeddings": 512,
14
  "max_relative_positions": -1,
15
  "model_type": "deberta-v2",
@@ -29,7 +29,7 @@
29
  "relative_attention": true,
30
  "share_att_key": true,
31
  "torch_dtype": "float32",
32
- "transformers_version": "4.51.3",
33
  "type_vocab_size": 0,
34
  "vocab_size": 251000,
35
  "id2label": {
 
1
  {
2
+ "_name_or_path": "meta-llama/Llama-Prompt-Guard-2-86M",
3
  "architectures": [
4
  "DebertaV2ForSequenceClassification"
5
  ],
 
10
  "initializer_range": 0.02,
11
  "intermediate_size": 3072,
12
  "layer_norm_eps": 1e-07,
 
13
  "max_position_embeddings": 512,
14
  "max_relative_positions": -1,
15
  "model_type": "deberta-v2",
 
29
  "relative_attention": true,
30
  "share_att_key": true,
31
  "torch_dtype": "float32",
32
+ "transformers_version": "4.40.2",
33
  "type_vocab_size": 0,
34
  "vocab_size": 251000,
35
  "id2label": {
model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1346eae9dea21209611930d5689cd138989ed06ac7b4b08054040ae22049dce8
3
- size 1116153795
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbe0be6a471873b6c52f7d6631c16fbddb88ba8c7ab2ba34ca48e8e77ffd9999
3
+ size 1116138537
model.quant.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d267c337c6321f018effbb63dde4f8f6a78b8418f345e049cd5c5727b51bcd4
3
- size 281319107
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9d6109bea94333a50b05d13bcd9f9cd8f6b16dc245ed59fe665579e31bbe9f2
3
+ size 281716823
tokenizer_config.json CHANGED
@@ -846,7 +846,6 @@
846
  "cls_token": "[CLS]",
847
  "do_lower_case": false,
848
  "eos_token": "[SEP]",
849
- "extra_special_tokens": {},
850
  "mask_token": "[MASK]",
851
  "max_length": 512,
852
  "model_max_length": 1000000000000000019884624838656,
@@ -858,7 +857,7 @@
858
  "sp_model_kwargs": {},
859
  "split_by_punct": false,
860
  "stride": 0,
861
- "tokenizer_class": "DebertaV2TokenizerFast",
862
  "truncation_side": "right",
863
  "truncation_strategy": "longest_first",
864
  "unk_token": "[UNK]",
 
846
  "cls_token": "[CLS]",
847
  "do_lower_case": false,
848
  "eos_token": "[SEP]",
 
849
  "mask_token": "[MASK]",
850
  "max_length": 512,
851
  "model_max_length": 1000000000000000019884624838656,
 
857
  "sp_model_kwargs": {},
858
  "split_by_punct": false,
859
  "stride": 0,
860
+ "tokenizer_class": "DebertaV2Tokenizer",
861
  "truncation_side": "right",
862
  "truncation_strategy": "longest_first",
863
  "unk_token": "[UNK]",