feat: quantization per channel

Files changed (5) hide show

README.md CHANGED Viewed

@@ -38,16 +38,16 @@ This repository provides a ONNX converted and quantized version of meta-llama/Ll
 ## 📥 Evaluation Dataset
 We use [`jackhhao/jailbreak-classification`](https://huggingface.co/datasets/jackhhao/jailbreak-classification)
-for the evaluation
 ## 🧪 Evaluation Results
-| Model                         | Accuracy | Precision | Recall | F1 Score | AUC-ROC | Inference Time |
-|-------------------------------|----------|-----------|--------|----------|---------|----------------|
-| Llama-Prompt-Guard-2-22M      | 0.9569   | 0.9879    | 0.9260 | 0.9559   | 0.9259  | 33s            |
-| Llama-Prompt-Guard-2-22M-q    | 0.9473   | 1.0000    | 0.8956 | 0.9449   | 0.9032  | 29s            |
-| Llama-Prompt-Guard-2-86M      | 0.9770   | 0.9980    | 0.9564 | 0.9767   | 0.9523  | 1m29s          |
-| Llama-Prompt-Guard-2-86M-q    | 0.8937   | 1.0000    | 0.7894 | 0.8823   | 0.7263  | 1m15s          |
 ## 🤗 Usage

 ## 📥 Evaluation Dataset
 We use [`jackhhao/jailbreak-classification`](https://huggingface.co/datasets/jackhhao/jailbreak-classification)
+for the evaluation (train+test)
 ## 🧪 Evaluation Results
+| Model                      | Accuracy | Precision | Recall | F1 Score | AUC-ROC |
+|----------------------------|----------|-----------|--------|----------|---------|
+| Llama-Prompt-Guard-2-22M   | 0.9564   | 0.9888    | 0.9249 | 0.9558   | 0.9234  |
+| Llama-Prompt-Guard-2-22M-q | 0.9579   | 0.9967    | 0.9204 | 0.9449   | 0.9180  |
+| Llama-Prompt-Guard-2-86M   | 0.9801   | 0.9984    | 0.9625 | 0.9801   | 0.9519  |
+| Llama-Prompt-Guard-2-86M-q | 0.8989   | 1.0000    | 0.8018 | 0.89     | 0.7452  |
 ## 🤗 Usage

config.json CHANGED Viewed

@@ -1,4 +1,5 @@
 {
   "architectures": [
     "DebertaV2ForSequenceClassification"
   ],
@@ -9,7 +10,6 @@
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "layer_norm_eps": 1e-07,
-  "legacy": true,
   "max_position_embeddings": 512,
   "max_relative_positions": -1,
   "model_type": "deberta-v2",
@@ -29,7 +29,7 @@
   "relative_attention": true,
   "share_att_key": true,
   "torch_dtype": "float32",
-  "transformers_version": "4.51.3",
   "type_vocab_size": 0,
   "vocab_size": 251000,
   "id2label": {

 {
+  "_name_or_path": "meta-llama/Llama-Prompt-Guard-2-86M",
   "architectures": [
     "DebertaV2ForSequenceClassification"
   ],
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "layer_norm_eps": 1e-07,
   "max_position_embeddings": 512,
   "max_relative_positions": -1,
   "model_type": "deberta-v2",
   "relative_attention": true,
   "share_att_key": true,
   "torch_dtype": "float32",
+  "transformers_version": "4.40.2",
   "type_vocab_size": 0,
   "vocab_size": 251000,
   "id2label": {

model.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1346eae9dea21209611930d5689cd138989ed06ac7b4b08054040ae22049dce8
-size 1116153795

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbe0be6a471873b6c52f7d6631c16fbddb88ba8c7ab2ba34ca48e8e77ffd9999
+size 1116138537

model.quant.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d267c337c6321f018effbb63dde4f8f6a78b8418f345e049cd5c5727b51bcd4
-size 281319107

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9d6109bea94333a50b05d13bcd9f9cd8f6b16dc245ed59fe665579e31bbe9f2
+size 281716823

tokenizer_config.json CHANGED Viewed

@@ -846,7 +846,6 @@
   "cls_token": "[CLS]",
   "do_lower_case": false,
   "eos_token": "[SEP]",
-  "extra_special_tokens": {},
   "mask_token": "[MASK]",
   "max_length": 512,
   "model_max_length": 1000000000000000019884624838656,
@@ -858,7 +857,7 @@
   "sp_model_kwargs": {},
   "split_by_punct": false,
   "stride": 0,
-  "tokenizer_class": "DebertaV2TokenizerFast",
   "truncation_side": "right",
   "truncation_strategy": "longest_first",
   "unk_token": "[UNK]",

   "cls_token": "[CLS]",
   "do_lower_case": false,
   "eos_token": "[SEP]",
   "mask_token": "[MASK]",
   "max_length": 512,
   "model_max_length": 1000000000000000019884624838656,
   "sp_model_kwargs": {},
   "split_by_punct": false,
   "stride": 0,
+  "tokenizer_class": "DebertaV2Tokenizer",
   "truncation_side": "right",
   "truncation_strategy": "longest_first",
   "unk_token": "[UNK]",