quantization_stage: | |
sparsity_modifiers: | |
SparseGPTModifier: | |
sparsity: 0.5 | |
mask_structure: '2:4' | |
sequential_update: false | |
sequential_targets: [LlamaDecoderLayer] | |
targets: [Linear] | |
ignore: ['re:.*lm_head'] | |
block_size: 128 | |
dampening_frac: 0.01 | |
preserve_sparsity_mask: false | |
offload_hessians: false | |
quantization_modifiers: | |
QuantizationModifier: | |
targets: [Linear] | |
ignore: [lm_head] | |
scheme: FP8_DYNAMIC | |