| { |
| "architectures": [ |
| "TRMForCausalLM" |
| ], |
| "dtype": "bfloat16", |
| "model_type": "trm", |
| "pad_token_id": 50283, |
| "transformers_version": "4.57.1", |
| "trm_config_dict": { |
| "H_layers": 0, |
| "batch_size": 1, |
| "causal": true, |
| "dropout": 0.2, |
| "expansion": 4, |
| "forward_dtype": "bfloat16", |
| "freeze_embeddings": false, |
| "halt_exploration_prob": 0.1, |
| "halt_max_steps": 8, |
| "hidden_size": 768, |
| "loss": { |
| "loss_type": "stablemax_cross_entropy", |
| "name": "losses@ACTLossHead" |
| }, |
| "name": "recursive_reasoning.trm@TinyRecursiveModel", |
| "no_ACT_continue": true, |
| "num_heads": 12, |
| "num_layers": 8, |
| "num_puzzle_identifiers": 1, |
| "pos_encodings": "rope", |
| "pretrained_embeddings_model": "Alibaba-NLP/gte-modernbert-base", |
| "puzzle_emb_len": 0, |
| "puzzle_emb_ndim": 0, |
| "residual_recursion": false, |
| "seq_len": 512, |
| "vocab_size": 50368, |
| "y_cycles": 2, |
| "z_cycles": 3 |
| } |
| } |
|
|