weiweiz1
/

Llama-3.2-1B-Instruct-NVFP4-W4A4-RTN

8-bit precision

compressed-tensors

Model card Files Files and versions

weiweiz1 commited on Jul 30

Commit

7bb507c

·

verified ·

1 Parent(s): 7a87df3

Update config.json

Files changed (1) hide show

config.json +40 -21

config.json CHANGED Viewed

@@ -23,27 +23,46 @@
   "num_key_value_heads": 8,
   "pretraining_tp": 1,
   "quantization_config": {
-    "act_bits": 4,
-    "act_data_type": "nv_fp4_with_static_gs",
-    "act_dynamic": true,
-    "act_group_size": 16,
-    "act_sym": true,
-    "autoround_version": "0.6.1.dev",
-    "batch_size": 4,
-    "bits": 4,
-    "data_type": "nv_fp4",
-    "group_size": 16,
-    "iters": 0,
-    "packing_format": "nv_fp",
-    "quant_method": "auto-round",
-    "scale_calculation_mode": [
-      "even"
     ],
-    "scale_format": [
-      "e8m0"
-    ],
-    "seqlen": 512,
-    "sym": true
   },
   "rms_norm_eps": 1e-05,
   "rope_scaling": {
@@ -56,7 +75,7 @@
   "rope_theta": 500000.0,
   "tie_word_embeddings": true,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.52.4",
   "use_cache": true,
   "vocab_size": 128256
 }

   "num_key_value_heads": 8,
   "pretraining_tp": 1,
   "quantization_config": {
+    "config_groups": {
+      "group_0": {
+        "input_activations": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": "local",
+          "group_size": 16,
+          "num_bits": 4,
+          "observer": "minmax",
+          "observer_kwargs": {},
+          "strategy": "tensor_group",
+          "symmetric": true,
+          "type": "float"
+        },
+        "output_activations": null,
+        "targets": [
+          "Linear"
+        ],
+        "weights": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": false,
+          "group_size": 16,
+          "num_bits": 4,
+          "observer": "minmax",
+          "observer_kwargs": {},
+          "strategy": "tensor_group",
+          "symmetric": true,
+          "type": "float"
+        }
+      }
+    },
+    "format": "nvfp4-pack-quantized",
+    "global_compression_ratio": null,
+    "ignore": [
+      "lm_head"
     ],
+    "kv_cache_scheme": null,
+    "quant_method": "compressed-tensors",
+    "quantization_status": "compressed"
   },
   "rms_norm_eps": 1e-05,
   "rope_scaling": {
   "rope_theta": 500000.0,
   "tie_word_embeddings": true,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3",
   "use_cache": true,
   "vocab_size": 128256
 }