YAML Metadata Warning:empty or missing yaml metadata in repo card
Check out the documentation for more information.
Creation
from transformers import AutoModelForCausalLM, AutoTokenizer
from llmcompressor.modifiers.awq import AWQModifier
from llmcompressor import oneshot
from datasets import load_dataset
model_stub = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
model_name = model_stub.split("/")[-1]
num_samples = 512
max_seq_len = 2048
tokenizer = AutoTokenizer.from_pretrained(model_stub)
model = AutoModelForCausalLM.from_pretrained(
model_stub,
device_map="auto",
torch_dtype="auto",
)
def preprocess_fn(example):
return {
"text": tokenizer.apply_chat_template(
example["messages"], add_generation_prompt=False, tokenize=False
)
}
ds = load_dataset("neuralmagic/LLM_compression_calibration", split="train")
ds = ds.map(preprocess_fn)
recipe = [
AWQModifier(ignore=["lm_head"], scheme="W4A16_ASYM", targets=["Linear"]),
]
oneshot(
model=model,
dataset=ds,
recipe=recipe,
max_seq_length=max_seq_len,
num_calibration_samples=num_samples,
)
save_path = model_name + "-quantized.w4a16"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
- Downloads last month
- 5
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support