YAML Metadata Warning:empty or missing yaml metadata in repo card

Check out the documentation for more information.

Creation

from transformers import AutoModelForCausalLM, AutoTokenizer
from llmcompressor.modifiers.awq import AWQModifier
from llmcompressor import oneshot
from datasets import load_dataset

model_stub = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
model_name = model_stub.split("/")[-1]
num_samples = 512
max_seq_len = 2048
tokenizer = AutoTokenizer.from_pretrained(model_stub)
model = AutoModelForCausalLM.from_pretrained(
    model_stub,
    device_map="auto",
    torch_dtype="auto",
)


def preprocess_fn(example):
    return {
        "text": tokenizer.apply_chat_template(
            example["messages"], add_generation_prompt=False, tokenize=False
        )
    }


ds = load_dataset("neuralmagic/LLM_compression_calibration", split="train")
ds = ds.map(preprocess_fn)


recipe = [
    AWQModifier(ignore=["lm_head"], scheme="W4A16_ASYM", targets=["Linear"]),
]

oneshot(
    model=model,
    dataset=ds,
    recipe=recipe,
    max_seq_length=max_seq_len,
    num_calibration_samples=num_samples,
)

save_path = model_name + "-quantized.w4a16"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
Downloads last month
5
Safetensors
Model size
0.6B params
Tensor type
I64
·
I32
·
BF16
·
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support