Qwen3.6-27B-NVFP4A16 / recipe.yaml
huginnfork's picture
Initial upload
82ac8d0 verified
raw
history blame contribute delete
593 Bytes
name: nvfp4a16
scheme: NVFP4A16 # FP4 weights, bf16 activations — skips activation quant for much lower KLD
engine: llmcompressor
# A16 doesn't need activation calibration; 128 short samples is enough for weight stats.
calibration:
dataset: neuralmagic/calibration
config: LLM
split: train
num_samples: 128
max_seq_length: 2048
ignore:
- lm_head
- "re:.*visual.*"
- "re:.*linear_attn.*" # entire SSM block (in_proj_qkvz, in_proj_ba, conv1d, out_proj) kept in bf16
- "re:.*mlp.gate$"
- "re:.*mlp.shared_expert_gate$"
- "re:.*mtp.*"
export:
save_compressed: true