quantization_stage: sparsity_modifiers: SparseGPTModifier: sparsity: 0.5 mask_structure: '2:4' sequential_update: false sequential_targets: [LlamaDecoderLayer] targets: [Linear] ignore: ['re:.*lm_head'] block_size: 128 dampening_frac: 0.01 preserve_sparsity_mask: false offload_hessians: false quantization_modifiers: QuantizationModifier: targets: [Linear] ignore: [lm_head] scheme: FP8_DYNAMIC