Creation

from transformers import AutoModelForCausalLM, AutoTokenizer
from llmcompressor.modifiers.awq import AWQModifier
from llmcompressor import oneshot
from datasets import load_dataset

model_stub = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
model_name = model_stub.split("/")[-1]
num_samples = 512
max_seq_len = 2048
tokenizer = AutoTokenizer.from_pretrained(model_stub)
model = AutoModelForCausalLM.from_pretrained(
    model_stub,
    device_map="auto",
    torch_dtype="auto",
)


def preprocess_fn(example):
    return {
        "text": tokenizer.apply_chat_template(
            example["messages"], add_generation_prompt=False, tokenize=False
        )
    }


ds = load_dataset("neuralmagic/LLM_compression_calibration", split="train")
ds = ds.map(preprocess_fn)


recipe = [
    AWQModifier(ignore=["lm_head"], scheme="W4A16_ASYM", targets=["Linear"]),
]

oneshot(
    model=model,
    dataset=ds,
    recipe=recipe,
    max_seq_length=max_seq_len,
    num_calibration_samples=num_samples,
)

save_path = model_name + "-quantized.w4a16"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

Downloads last month: 5

Safetensors

Model size

0.6B params

Tensor type

I64

I32

BF16

Inference Providers NEW

This model isn't deployed by any Inference Provider. 🙋 Ask for provider support