Feature Extraction
Transformers
PyTorch
scaling_law_forecaster
scaling-laws
neural-scaling
performance-prediction
configuration-to-performance
custom_code
Instructions to use OptimizerStudy/NCPL-final with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use OptimizerStudy/NCPL-final with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("feature-extraction", model="OptimizerStudy/NCPL-final", trust_remote_code=True)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("OptimizerStudy/NCPL-final", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| import torch | |
| import torch.nn as nn | |
| from transformers import AutoModel, AutoConfig | |
| class ScalingLawForecaster(nn.Module): | |
| def __init__( | |
| self, | |
| base_model_name: str = "HuggingFaceTB/SmolLM2-135M", | |
| init_from_pretrained: bool = True, | |
| force_fp32: bool = False, | |
| ): | |
| super().__init__() | |
| self.config = AutoConfig.from_pretrained(base_model_name) | |
| if force_fp32: | |
| self.config.torch_dtype = torch.float32 | |
| if init_from_pretrained: | |
| if force_fp32: | |
| self.base = AutoModel.from_pretrained( | |
| base_model_name, | |
| config=self.config, | |
| torch_dtype=torch.float32, | |
| ) | |
| else: | |
| self.base = AutoModel.from_pretrained(base_model_name, config=self.config) | |
| else: | |
| self.base = AutoModel.from_config(self.config) | |
| hidden_size = self.config.hidden_size | |
| act_cls = nn.ReLU | |
| self.num_mlp = nn.Sequential( | |
| nn.Linear(1, hidden_size * 2), | |
| act_cls(), | |
| nn.Linear(hidden_size * 2, hidden_size) | |
| ) | |
| self.head = nn.Linear(hidden_size, 1) | |
| def forward( | |
| self, | |
| input_ids: torch.LongTensor, | |
| is_number_mask: torch.BoolTensor, | |
| number_values_filled: torch.FloatTensor, | |
| attention_mask: torch.BoolTensor = None | |
| ) -> torch.FloatTensor: | |
| """ | |
| Args: | |
| input_ids: (batch, seq_len) | |
| is_number_mask: (batch, seq_len) bool mask for numeric tokens | |
| number_values_filled:(batch, seq_len) float values (0 for non-numeric) | |
| attention_mask: (batch, seq_len) optional | |
| Returns: | |
| logits: (batch, seq_len) scalar predictions per token | |
| """ | |
| # Text embeddings | |
| input_ids[input_ids == 49152] = 0 | |
| text_emb = self.base.get_input_embeddings()(input_ids) | |
| # Numeric MLP embeddings | |
| flat_vals = number_values_filled.view(-1, 1) | |
| mlp_out = self.num_mlp(flat_vals) | |
| mlp_out = mlp_out.view_as(text_emb) | |
| mask = is_number_mask.unsqueeze(-1) | |
| inputs_embeds = torch.where(mask, mlp_out, text_emb) | |
| outputs = self.base( | |
| inputs_embeds=inputs_embeds, | |
| attention_mask=attention_mask, | |
| return_dict=True | |
| ) | |
| hidden = outputs.last_hidden_state | |
| # Final scalar head | |
| logits = self.head(hidden).squeeze(-1) | |
| return logits | |