| { |
| "hidden_size": 768, |
| "num_attention_heads": 6, |
| "attention_head_dim": 128, |
| "in_channels": 16, |
| "patch_size": 1, |
| "joint_attention_dim": 768, |
| "pooled_projection_dim": 768, |
| "num_double_layers": 2, |
| "num_single_layers": 4, |
| "mlp_ratio": 4.0, |
| "axes_dims_rope": [ |
| 16, |
| 56, |
| 56 |
| ], |
| "use_lune_expert": true, |
| "lune_expert_dim": 1280, |
| "lune_hidden_dim": 512, |
| "lune_dropout": 0.1, |
| "freeze_lune": false, |
| "use_sol_prior": true, |
| "sol_spatial_size": 8, |
| "sol_hidden_dim": 256, |
| "sol_geometric_weight": 0.7, |
| "freeze_sol": false, |
| "use_t5_vec": true |
| } |