Instructions to use TensorStack/Components with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use TensorStack/Components with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("TensorStack/Components", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
Upload 6 files
Browse files- AceStepXL_AudioDetokenizer/config.json +21 -0
- AceStepXL_AudioDetokenizer/diffusion_pytorch_model.safetensors +3 -0
- AceStepXL_AudioTokenizer/config.json +31 -0
- AceStepXL_AudioTokenizer/diffusion_pytorch_model.safetensors +3 -0
- AceStepXL_ConditionEncoder/config.json +19 -0
- AceStepXL_ConditionEncoder/diffusion_pytorch_model.safetensors +3 -0
AceStepXL_AudioDetokenizer/config.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "AceStepAudioTokenDetokenizer",
|
| 3 |
+
"_diffusers_version": "0.38.0.dev0",
|
| 4 |
+
"attention_bias": false,
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"audio_acoustic_hidden_dim": 64,
|
| 7 |
+
"head_dim": 128,
|
| 8 |
+
"hidden_size": 2048,
|
| 9 |
+
"intermediate_size": 6144,
|
| 10 |
+
"layer_types": [
|
| 11 |
+
"sliding_attention",
|
| 12 |
+
"full_attention"
|
| 13 |
+
],
|
| 14 |
+
"num_attention_heads": 16,
|
| 15 |
+
"num_attention_pooler_hidden_layers": 2,
|
| 16 |
+
"num_key_value_heads": 8,
|
| 17 |
+
"pool_window_size": 5,
|
| 18 |
+
"rms_norm_eps": 1e-06,
|
| 19 |
+
"rope_theta": 1000000,
|
| 20 |
+
"sliding_window": 128
|
| 21 |
+
}
|
AceStepXL_AudioDetokenizer/diffusion_pytorch_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:744612a9406114fc5b3661e3682f9bd835dc737538b9777124c874303bbbb837
|
| 3 |
+
size 210026424
|
AceStepXL_AudioTokenizer/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "AceStepAudioTokenizer",
|
| 3 |
+
"_diffusers_version": "0.38.0.dev0",
|
| 4 |
+
"attention_bias": false,
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"audio_acoustic_hidden_dim": 64,
|
| 7 |
+
"fsq_dim": 2048,
|
| 8 |
+
"fsq_input_levels": [
|
| 9 |
+
8,
|
| 10 |
+
8,
|
| 11 |
+
8,
|
| 12 |
+
5,
|
| 13 |
+
5,
|
| 14 |
+
5
|
| 15 |
+
],
|
| 16 |
+
"fsq_input_num_quantizers": 1,
|
| 17 |
+
"head_dim": 128,
|
| 18 |
+
"hidden_size": 2048,
|
| 19 |
+
"intermediate_size": 6144,
|
| 20 |
+
"layer_types": [
|
| 21 |
+
"sliding_attention",
|
| 22 |
+
"full_attention"
|
| 23 |
+
],
|
| 24 |
+
"num_attention_heads": 16,
|
| 25 |
+
"num_attention_pooler_hidden_layers": 2,
|
| 26 |
+
"num_key_value_heads": 8,
|
| 27 |
+
"pool_window_size": 5,
|
| 28 |
+
"rms_norm_eps": 1e-06,
|
| 29 |
+
"rope_theta": 1000000,
|
| 30 |
+
"sliding_window": 128
|
| 31 |
+
}
|
AceStepXL_AudioTokenizer/diffusion_pytorch_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8f46814109ba8bb451fef2122cb7ff58093856a9f2e5464e32f06b71404973b
|
| 3 |
+
size 210068124
|
AceStepXL_ConditionEncoder/config.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "AceStepConditionEncoder",
|
| 3 |
+
"_diffusers_version": "0.38.0.dev0",
|
| 4 |
+
"attention_bias": false,
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"head_dim": 128,
|
| 7 |
+
"hidden_size": 2048,
|
| 8 |
+
"intermediate_size": 6144,
|
| 9 |
+
"layer_types": null,
|
| 10 |
+
"num_attention_heads": 16,
|
| 11 |
+
"num_key_value_heads": 8,
|
| 12 |
+
"num_lyric_encoder_hidden_layers": 8,
|
| 13 |
+
"num_timbre_encoder_hidden_layers": 4,
|
| 14 |
+
"rms_norm_eps": 1e-06,
|
| 15 |
+
"rope_theta": 1000000,
|
| 16 |
+
"sliding_window": 128,
|
| 17 |
+
"text_hidden_dim": 1024,
|
| 18 |
+
"timbre_hidden_dim": 64
|
| 19 |
+
}
|
AceStepXL_ConditionEncoder/diffusion_pytorch_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca111ca95858c8d3c3678c986dda03df20778c0de009f57109c1052866d522b2
|
| 3 |
+
size 1218676312
|