| { | |
| "model_type": "causal_lm", | |
| "base_model": "Qwen/Qwen3-8B", | |
| "task": "text-generation", | |
| "language": [ | |
| "en" | |
| ], | |
| "tags": [ | |
| "terminal", | |
| "agent", | |
| "multitask", | |
| "nat", | |
| "qwen" | |
| ], | |
| "datasets": [ | |
| "terminal-agent-multitask-nat-v13" | |
| ], | |
| "model-index": [ | |
| { | |
| "name": "Terminal Agent Multi-Task NAT v13", | |
| "results": [ | |
| { | |
| "task": { | |
| "type": "text-generation" | |
| }, | |
| "dataset": { | |
| "name": "Terminal Bench", | |
| "type": "terminal-bench" | |
| }, | |
| "metrics": [ | |
| { | |
| "name": "success_rate", | |
| "type": "accuracy", | |
| "value": "56%" | |
| } | |
| ] | |
| } | |
| ] | |
| } | |
| ] | |
| } |