weiweiz1 commited on
Commit
7bb507c
·
verified ·
1 Parent(s): 7a87df3

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +40 -21
config.json CHANGED
@@ -23,27 +23,46 @@
23
  "num_key_value_heads": 8,
24
  "pretraining_tp": 1,
25
  "quantization_config": {
26
- "act_bits": 4,
27
- "act_data_type": "nv_fp4_with_static_gs",
28
- "act_dynamic": true,
29
- "act_group_size": 16,
30
- "act_sym": true,
31
- "autoround_version": "0.6.1.dev",
32
- "batch_size": 4,
33
- "bits": 4,
34
- "data_type": "nv_fp4",
35
- "group_size": 16,
36
- "iters": 0,
37
- "packing_format": "nv_fp",
38
- "quant_method": "auto-round",
39
- "scale_calculation_mode": [
40
- "even"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  ],
42
- "scale_format": [
43
- "e8m0"
44
- ],
45
- "seqlen": 512,
46
- "sym": true
47
  },
48
  "rms_norm_eps": 1e-05,
49
  "rope_scaling": {
@@ -56,7 +75,7 @@
56
  "rope_theta": 500000.0,
57
  "tie_word_embeddings": true,
58
  "torch_dtype": "bfloat16",
59
- "transformers_version": "4.52.4",
60
  "use_cache": true,
61
  "vocab_size": 128256
62
  }
 
23
  "num_key_value_heads": 8,
24
  "pretraining_tp": 1,
25
  "quantization_config": {
26
+ "config_groups": {
27
+ "group_0": {
28
+ "input_activations": {
29
+ "actorder": null,
30
+ "block_structure": null,
31
+ "dynamic": "local",
32
+ "group_size": 16,
33
+ "num_bits": 4,
34
+ "observer": "minmax",
35
+ "observer_kwargs": {},
36
+ "strategy": "tensor_group",
37
+ "symmetric": true,
38
+ "type": "float"
39
+ },
40
+ "output_activations": null,
41
+ "targets": [
42
+ "Linear"
43
+ ],
44
+ "weights": {
45
+ "actorder": null,
46
+ "block_structure": null,
47
+ "dynamic": false,
48
+ "group_size": 16,
49
+ "num_bits": 4,
50
+ "observer": "minmax",
51
+ "observer_kwargs": {},
52
+ "strategy": "tensor_group",
53
+ "symmetric": true,
54
+ "type": "float"
55
+ }
56
+ }
57
+ },
58
+ "format": "nvfp4-pack-quantized",
59
+ "global_compression_ratio": null,
60
+ "ignore": [
61
+ "lm_head"
62
  ],
63
+ "kv_cache_scheme": null,
64
+ "quant_method": "compressed-tensors",
65
+ "quantization_status": "compressed"
 
 
66
  },
67
  "rms_norm_eps": 1e-05,
68
  "rope_scaling": {
 
75
  "rope_theta": 500000.0,
76
  "tie_word_embeddings": true,
77
  "torch_dtype": "bfloat16",
78
+ "transformers_version": "4.51.3",
79
  "use_cache": true,
80
  "vocab_size": 128256
81
  }