Llama-3.2-1B-Instruct-NVFP4-W4A4-RTN / quantization_config.json

Upload folder using huggingface_hub

7a87df3 verified 3 months ago

431 Bytes

	{
	"bits": 4,
	"group_size": 16,
	"sym": true,
	"data_type": "nv_fp4",
	"seqlen": 512,
	"batch_size": 4,
	"iters": 0,
	"act_bits": 4,
	"act_group_size": 16,
	"act_sym": true,
	"act_dynamic": true,
	"act_data_type": "nv_fp4_with_static_gs",
	"autoround_version": "0.6.1.dev",
	"quant_method": "auto-round",
	"packing_format": "nv_fp",
	"scale_format": [
	"e8m0"
	],
	"scale_calculation_mode": [
	"even"
	]
	}