atutej commited on
Commit
d7fbe0a
·
verified ·
1 Parent(s): f5737fd

Add parsed training metrics and plots

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ training_logs/20260428_203121_reward_vs_steps.png filter=lfs diff=lfs merge=lfs -text
training_logs/20260428_203121_metrics_job_389754.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,timing/save_hf_model
2
+ 0.0,0,64,512,0,0.0,0,0.0,3837.043,1844.4,3856.6943,11791,1000,1728.238,0.0098,-0.003,0.0108,-0.0,0.2114,-0.0,0.0,1.0,0.0,0.0146,0.0469,0.0098,11.0404,73.0374,334.9649,61.0,857.9687,523.0038,10.3772,0.0525,2.3302,54.8335,599.7188,654.8685,44.3151,2252.5445,55.9998,599.982,1539.3458,0,1,128.0,1024.0,1024.0,0.0,0.0,
3
+ 0.0,0,64,512,1,1.0,1,1.0,3734.7441,4189.0,3722.9098,14143,1008,1771.7086,0.0254,-0.003,0.0135,0.0,0.2119,0.0,0.0,1.0,0.0,0.0147,0.0781,0.0254,12.3731,73.238,331.4794,61.4,857.9687,526.4893,0.0156,0.0625,2.608,51.6906,593.7021,645.7138,33.1731,927.602,58.3788,593.9603,220.8803,0,2,64.0,512.0,512.0,0.0,0.0,
4
+ 0.0,0,64,512,2,2.0,2,1.0,3680.1211,3429.5,3685.1135,17798,960,1903.0569,0.0195,-0.0057,0.0404,0.0,0.2131,0.0,0.0,1.0,0.0,0.017,0.125,0.0195,15.006,73.3821,331.1569,61.4,857.9687,526.8118,0.0079,0.0712,3.338,54.9564,607.7339,663.0308,33.7102,1161.2,54.4484,608.0028,440.3575,0,3,64.0,512.0,512.0,0.0,0.0,
5
+ 0.0,0,64,512,3,3.0,3,1.0,4038.8242,3594.9412,4054.0687,19942,1070,2204.7849,0.0332,-0.0083,0.0398,0.0,0.1988,0.0,0.0,1.0,0.0,0.0159,0.1094,0.0332,18.2571,73.6598,332.0689,61.3,857.9687,525.8998,0.0123,0.0638,3.988,64.8817,648.7096,713.9267,33.5855,774.7256,56.7814,648.9808,0.0053,0,4,64.0,512.0,512.0,0.0,0.0,
6
+ 0.0,0,64,512,4,4.0,4,1.0,3734.0176,3721.08,3734.6817,13083,1049,1658.2439,0.0488,-0.0016,0.0331,0.0,0.2072,0.0,0.0,1.0,0.0,0.0298,0.125,0.0488,18.5352,73.6546,329.1295,61.6,857.9687,528.8392,0.0099,0.0428,2.9611,54.8169,588.7437,643.8552,33.7826,946.1626,58.0555,588.9951,241.2625,0,5,64.0,512.0,512.0,0.0,0.0,38.1036
7
+ 0.0,0,64,512,5,5.0,5,1.0,3678.4492,2779.8125,3707.4375,16575,982,1778.7688,0.0312,-0.0069,0.0393,0.0,0.2064,0.0,0.0,1.0,0.0,0.0194,0.1406,0.0312,18.5881,73.6669,327.9209,61.8,857.9687,530.0478,0.0087,0.0617,3.0487,49.8621,590.8918,641.066,34.0988,888.5801,61.7658,591.1417,182.6699,0,6,64.0,512.0,512.0,0.0,0.0,
8
+ 0.0,0,64,512,6,6.0,6,1.0,4026.377,2695.15,4080.4919,30105,860,2435.5002,0.0391,-0.0117,0.0579,0.0,0.2004,0.0,0.0,1.0,0.0,0.0242,0.2031,0.0391,24.2751,73.9734,398.3101,53.6,857.9687,459.6586,0.0095,0.0949,4.4248,61.0183,660.677,722.0682,35.0715,2094.5908,55.4597,660.9545,1312.6129,0,7,64.0,512.0,512.0,0.0,0.0,
9
+ 0.0,0,64,512,0,0.0,0,0.0,3169.8066,2304.1277,3257.3054,9841,950,1527.965,0.0918,-0.023,0.1174,0.0,0.2084,0.0,0.0,1.0,0.0,0.0364,0.3281,0.0918,24.1753,73.8303,401.2653,53.2,857.9687,456.7034,0.0112,0.0425,2.0132,44.7602,566.5425,611.7071,33.0222,1954.7684,56.9235,566.9038,1284.0936,1,8,64.0,512.0,512.0,0.0,0.0,
10
+ 0.0,0,64,512,1,1.0,1,1.0,3208.3555,2564.1489,3273.4688,19175,927,1685.1496,0.0918,-0.0201,0.1149,0.0,0.2081,0.0,0.0,1.0,0.0,0.0337,0.3281,0.0918,24.6965,74.2126,396.9511,53.7,857.9687,461.0176,0.0079,0.0446,3.5998,47.727,576.6595,624.6952,33.46,810.2478,57.0584,576.9233,124.8704,1,9,64.0,512.0,512.0,0.0,0.0,
11
+ 0.0,0,64,512,2,2.0,2,1.0,3237.8379,2406.9649,3341.9253,12501,890,1700.2375,0.1113,-0.0333,0.1439,0.0,0.2076,0.0,0.0,1.0,0.0,0.0374,0.3906,0.1113,24.8782,74.2612,391.3597,54.4,857.9687,466.609,0.0092,0.0533,2.4828,50.2219,593.8531,644.3998,33.4188,912.5225,56.7812,594.124,208.8362,1,10,64.0,512.0,512.0,0.0,0.0,37.5099
12
+ 0.0,0,64,512,3,3.0,3,1.0,3001.6465,2650.619,3050.8998,13406,941,1483.113,0.123,-0.0266,0.1658,0.0,0.199,0.0,0.0,1.0,0.0,0.0435,0.4219,0.123,25.0717,74.3157,387.2821,54.9,857.9687,470.6866,0.0168,0.0371,2.6113,41.7835,561.0683,603.1351,33.5831,731.0664,56.667,561.3141,68.6264,1,11,64.0,512.0,512.0,0.0,0.0,
13
+ 0.0,0,64,512,4,4.0,4,1.0,2883.377,2714.9545,2918.3325,14702,798,1601.3015,0.1719,-0.0285,0.1735,-0.0,0.1995,-0.0,0.0,1.0,0.0,0.0405,0.4219,0.1719,25.1736,74.6695,384.2253,55.2,857.9687,473.7434,0.011,0.0379,2.9524,47.2009,561.4127,608.9848,33.1065,931.2563,58.3024,561.7455,260.9926,1,12,64.0,512.0,512.0,0.0,0.0,
14
+ 0.0,0,64,512,5,5.0,5,1.0,2857.6934,2411.3333,2960.6995,13216,868,1685.3561,0.1875,-0.0066,0.1342,0.0,0.1896,0.0,0.0,1.0,0.0,0.0396,0.3906,0.1875,25.3254,74.5136,380.6907,55.6,857.9687,477.278,0.0093,0.0471,3.5522,43.8172,560.515,604.6342,33.2148,682.4333,57.7062,560.7695,16.5161,1,13,64.0,512.0,512.0,0.0,0.0,
15
+ 0.0,0,64,512,6,6.0,6,1.0,3206.502,2278.8974,3373.2143,15130,906,2031.0791,0.1523,-0.0278,0.1606,-0.0,0.1855,-0.0,0.0,1.0,0.0,0.0446,0.4688,0.1523,25.6977,74.7123,378.3962,55.9,857.9687,479.5725,0.0096,0.0577,2.5454,52.8053,611.3389,664.4901,33.0486,4631.2906,13.904,611.6265,3950.3283,1,14,,,,,,
training_logs/20260428_203121_metrics_report.md ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SkyRL Training Metrics Analysis
2
+
3
+ Generated from 1 log files
4
+
5
+ ## Overview
6
+
7
+ | Log File | Total Steps | Metric Blocks | Final Reward (mean) | Final Reward (max) | Total Time (s) |
8
+ |----------|-------------|---------------|---------------------|-------------------|----------------|
9
+ | job_389754 | 14 | 14 | 0.0812 | 0.1875 | 19699.0 |
10
+
11
+ ## Async Metrics
12
+
13
+ | | Mean | Std | Min | Max | Count |
14
+ |:------------------------------|-----------:|---------:|------:|------:|--------:|
15
+ | async/discard_rate | 0 | 0 | 0 | 0 | 14 |
16
+ | async/discarded_count | 0 | 0 | 0 | 0 | 14 |
17
+ | async/effective_batch_groups | 64 | 0 | 64 | 64 | 14 |
18
+ | async/effective_batch_samples | 512 | 0 | 512 | 512 | 14 |
19
+ | async/staleness_max | 3 | 2.0755 | 0 | 6 | 14 |
20
+ | async/staleness_mean | 3 | 2.0755 | 0 | 6 | 14 |
21
+ | async/staleness_min | 3 | 2.0755 | 0 | 6 | 14 |
22
+ | async/staleness_ratio | 0.857143 | 0.363137 | 0 | 1 | 14 |
23
+
24
+ ## Generate Metrics
25
+
26
+ | | Mean | Std | Min | Max | Count |
27
+ |:-------------------------------------|---------:|----------:|--------:|---------:|--------:|
28
+ | generate/avg_num_tokens | 3449.63 | 412.285 | 2857.69 | 4038.82 | 14 |
29
+ | generate/avg_tokens_non_zero_rewards | 2827.49 | 657.972 | 1844.4 | 4189 | 14 |
30
+ | generate/avg_tokens_zero_rewards | 3501.23 | 385.844 | 2918.33 | 4080.49 | 14 |
31
+ | generate/max_num_tokens | 15814.9 | 4995.58 | 9841 | 30105 | 14 |
32
+ | generate/min_num_tokens | 943.5 | 75.7889 | 798 | 1070 | 14 |
33
+ | generate/std_num_tokens | 1799.61 | 264.344 | 1483.11 | 2435.5 | 14 |
34
+
35
+ ## Loss Metrics
36
+
37
+ | | Mean | Std | Min | Max | Count |
38
+ |:----------------------------|-----------:|----------:|--------:|--------:|--------:|
39
+ | loss/avg_final_rewards | 0.0811857 | 0.0601946 | 0.0098 | 0.1875 | 14 |
40
+ | loss/avg_raw_advantages | -0.0147214 | 0.0112676 | -0.0333 | -0.0016 | 14 |
41
+ | loss/avg_raw_advantages_abs | 0.0889357 | 0.0606754 | 0.0108 | 0.1735 | 14 |
42
+
43
+ ## Policy Metrics
44
+
45
+ | | Mean | Std | Min | Max | Count |
46
+ |:---------------------------|----------:|-----------:|--------:|-------:|--------:|
47
+ | policy/final_loss | 0 | 0 | -0 | 0 | 14 |
48
+ | policy/policy_entropy | 0.20335 | 0.00829298 | 0.1855 | 0.2131 | 14 |
49
+ | policy/policy_loss | 0 | 0 | -0 | 0 | 14 |
50
+ | policy/policy_lr | 0 | 0 | 0 | 0 | 14 |
51
+ | policy/policy_update_steps | 1 | 0 | 1 | 1 | 14 |
52
+ | policy/ppo_clip_ratio | 0 | 0 | 0 | 0 | 14 |
53
+ | policy/raw_grad_norm | 0.0293786 | 0.0113914 | 0.0146 | 0.0446 | 14 |
54
+
55
+ ## Reward Metrics
56
+
57
+ | | Mean | Std | Min | Max | Count |
58
+ |:----------------------|----------:|----------:|-------:|-------:|--------:|
59
+ | reward/avg_pass_at_8 | 0.255579 | 0.15045 | 0.0469 | 0.4688 | 14 |
60
+ | reward/avg_raw_reward | 0.0811857 | 0.0601946 | 0.0098 | 0.1875 | 14 |
61
+
62
+ ## System Metrics
63
+
64
+ | | Mean | Std | Min | Max | Count |
65
+ |:------------------------|---------:|-------------:|---------:|---------:|--------:|
66
+ | system/process_rss_gb | 20.9352 | 5.21234 | 11.0404 | 25.6977 | 14 |
67
+ | system/process_vms_gb | 73.9377 | 0.529341 | 73.0374 | 74.7123 | 14 |
68
+ | system/ram_available_gb | 364.657 | 30.8191 | 327.921 | 401.265 | 14 |
69
+ | system/ram_percent | 57.5 | 3.59893 | 53.2 | 61.8 | 14 |
70
+ | system/ram_total_gb | 857.969 | 2.35957e-13 | 857.969 | 857.969 | 14 |
71
+ | system/ram_used_gb | 493.311 | 30.8191 | 456.703 | 530.048 | 14 |
72
+
73
+ ## Timing Metrics
74
+
75
+ | | Mean | Std | Min | Max | Count |
76
+ |:--------------------------------------|-------------:|-------------:|---------:|----------:|--------:|
77
+ | timing/cleanup_old_checkpoints | 0.75115 | 2.77057 | 0.0079 | 10.3772 | 14 |
78
+ | timing/compute_advantages_and_returns | 0.0549714 | 0.0155364 | 0.0371 | 0.0949 | 14 |
79
+ | timing/convert_to_training_input | 3.03256 | 0.678105 | 2.0132 | 4.4248 | 14 |
80
+ | timing/fwd_logprobs_values_reward | 51.4554 | 6.453 | 41.7835 | 64.8817 | 14 |
81
+ | timing/policy_train | 594.398 | 30.7826 | 560.515 | 660.677 | 14 |
82
+ | timing/run_training | 646.184 | 36.8893 | 603.135 | 722.068 | 14 |
83
+ | timing/save_checkpoints | 34.3279 | 2.92364 | 33.0222 | 44.3151 | 14 |
84
+ | timing/step | 1407.07 | 1069.28 | 682.433 | 4631.29 | 14 |
85
+ | timing/sync_weights | 54.1594 | 11.7091 | 13.904 | 61.7658 | 14 |
86
+ | timing/train_critic_and_policy | 594.673 | 30.7772 | 560.769 | 660.955 | 14 |
87
+ | timing/wait_for_generation_buffer | 703.671 | 1069.9 | 0.0053 | 3950.33 | 14 |
88
+ | timing/save_hf_model | 37.8068 | 0.419809 | 37.5099 | 38.1036 | 2 |
89
+
90
+ ## Trainer Metrics
91
+
92
+ | | Mean | Std | Min | Max | Count |
93
+ |:--------------------|-------:|---------:|------:|------:|--------:|
94
+ | trainer/epoch | 0.5 | 0.518875 | 0 | 1 | 14 |
95
+ | trainer/global_step | 7.5 | 4.1833 | 1 | 14 | 14 |
96
+
97
+ ## Batch_Errors Metrics
98
+
99
+ | | Mean | Std | Min | Max | Count |
100
+ |:------------------------------|---------:|---------:|------:|------:|--------:|
101
+ | batch_errors/total_batches | 68.9231 | 17.7504 | 64 | 128 | 13 |
102
+ | batch_errors/total_instances | 551.385 | 142.003 | 512 | 1024 | 13 |
103
+ | batch_errors/total_successful | 551.385 | 142.003 | 512 | 1024 | 13 |
104
+ | batch_errors/total_failed | 0 | 0 | 0 | 0 | 13 |
105
+ | batch_errors/total_masked | 0 | 0 | 0 | 0 | 13 |
106
+
107
+ ## Training Progression by Log
108
+
109
+ ### job_389754
110
+
111
+ | Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
112
+ |------|--------|--------|-----|------|---------------|-------------|
113
+ | 1 | 0.0098 | 0.0469 | 0.000000 | -0.0000 | 2252.5 | 1539.3 |
114
+ | 2 | 0.0254 | 0.0781 | 0.000000 | 0.0000 | 927.6 | 220.9 |
115
+ | 3 | 0.0195 | 0.1250 | 0.000000 | 0.0000 | 1161.2 | 440.4 |
116
+ | 4 | 0.0332 | 0.1094 | 0.000000 | 0.0000 | 774.7 | 0.0 |
117
+ | 5 | 0.0488 | 0.1250 | 0.000000 | 0.0000 | 946.2 | 241.3 |
118
+ | 6 | 0.0312 | 0.1406 | 0.000000 | 0.0000 | 888.6 | 182.7 |
119
+ | 7 | 0.0391 | 0.2031 | 0.000000 | 0.0000 | 2094.6 | 1312.6 |
120
+ | 8 | 0.0918 | 0.3281 | 0.000000 | 0.0000 | 1954.8 | 1284.1 |
121
+ | 9 | 0.0918 | 0.3281 | 0.000000 | 0.0000 | 810.2 | 124.9 |
122
+ | 10 | 0.1113 | 0.3906 | 0.000000 | 0.0000 | 912.5 | 208.8 |
123
+ | 11 | 0.1230 | 0.4219 | 0.000000 | 0.0000 | 731.1 | 68.6 |
124
+ | 12 | 0.1719 | 0.4219 | 0.000000 | -0.0000 | 931.3 | 261.0 |
125
+ | 13 | 0.1875 | 0.3906 | 0.000000 | 0.0000 | 682.4 | 16.5 |
126
+ | 14 | 0.1523 | 0.4688 | 0.000000 | -0.0000 | 4631.3 | 3950.3 |
127
+
128
+ ## Timing Analysis
129
+
130
+ ### Average Time Breakdown (% of step time)
131
+
132
+ | Component | Avg % of Step Time |
133
+ |-----------|-------------------|
134
+ | run_training | 60.9% |
135
+ | train_critic_and_policy | 56.1% |
136
+ | policy_train | 56.0% |
137
+ | wait_for_generation_buffer | 33.4% |
138
+ | sync_weights | 5.4% |
139
+ | fwd_logprobs_values_reward | 4.8% |
140
+ | save_hf_model | 4.1% |
141
+ | save_checkpoints | 3.2% |
142
+ | convert_to_training_input | 0.3% |
143
+ | cleanup_old_checkpoints | 0.0% |
144
+ | compute_advantages_and_returns | 0.0% |
145
+
146
+ ## vLLM Inference Engine Analysis
147
+
148
+ Metrics from vLLM stat loggers (V1LoggingStatLoggerFixed).
149
+
150
+ > **Note**: Ray deduplicates similar log messages with `[repeated Nx across cluster]`,
151
+ > so we typically capture stats from one engine per timestamp. The stats shown are
152
+ > **per-engine** values. Multiply by num_inference_engines for cluster-wide estimates.
153
+
154
+ ### Summary by Log (Per-Engine Stats)
155
+
156
+ | Log | Avg Running/Engine | Avg Waiting/Engine | Avg Gen Throughput/Engine | Avg KV Cache % | Avg Prefix Hit % |
157
+ |-----|-------------------|-------------------|--------------------------|----------------|------------------|
158
+ | job_389754 | 3.5 | 0.0 | 66.8 tok/s | 6.4% | 83.3% |
159
+
160
+ ### Utilization Analysis (Per-Engine)
161
+
162
+ Key indicators of inference engine utilization:
163
+
164
+ - **Running requests/engine**: Concurrent requests being processed by each engine
165
+ - **Waiting requests**: Requests queued (0 = engine not saturated, has spare capacity)
166
+ - **Generation throughput**: Decode tokens/sec per engine
167
+ - 8B model on H100 can do **1000+ tok/s** when saturated
168
+ - If seeing <300 tok/s with 0 waiting, engine is **starved for requests**
169
+
170
+ #### job_389754
171
+
172
+ - **Running requests/engine**: avg=3.5, max=13
173
+ - **Waiting requests**: avg=0.0, max=0
174
+ - **Generation throughput/engine**: avg=66.8 tok/s, max=298.0 tok/s
175
+ - **KV cache usage**: avg=6.4%
176
+ - **Prefix cache hit rate**: avg=83.3%
177
+ - ⚠️ **Underutilized**: Engines starved for requests (0 waiting, avg 3.5 running)
178
+ - Bottleneck is likely upstream (environment execution, not inference)
179
+
180
+ ## Trial-Level Analysis (from result.json)
181
+
182
+ Total trials parsed: 7168
183
+
184
+ ### Turn Count Statistics
185
+
186
+ | Metric | Value |
187
+ |--------|-------|
188
+ | Mean | 2.4 |
189
+ | Median | 2.0 |
190
+ | Std | 0.7 |
191
+ | Min | 2 |
192
+ | Max | 20 |
193
+ | Count | 7168 |
194
+
195
+ ### Exception Distribution
196
+
197
+ | Exception Type | Count | % |
198
+ |---------------|-------|---|
199
+ | No exception | 7162 | 99.9% |
200
+ | AgentTimeoutError | 5 | 0.1% |
201
+ | ContextLengthExceededError | 1 | 0.0% |
202
+
203
+ ### Turn Count by Exception Type
204
+
205
+ | Exception Type | Mean Turns | Median Turns | Count |
206
+ |---------------|-----------|-------------|-------|
207
+ | ContextLengthExceededError | 20.0 | 20.0 | 1 |
208
+ | AgentTimeoutError | 10.2 | 10.0 | 5 |
209
+ | No exception | 2.4 | 2.0 | 7162 |
210
+
211
+ ### Turn Count by Outcome
212
+
213
+ | Outcome | Mean Turns | Median Turns | Count |
214
+ |---------|-----------|-------------|-------|
215
+ | Success | 2.3 | 2.0 | 582 |
216
+ | Failure | 2.5 | 2.0 | 6586 |
217
+
218
+ ### Reward Summary
219
+
220
+ - Mean reward: 0.0812
221
+ - Success rate: 8.1%
222
+ - Trials with reward data: 7168
223
+
training_logs/20260428_203121_metrics_table.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_file,async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,timing/save_hf_model,global_step
2
+ job_389754,0.0,0,64,512,0,0.0,0,0.0,3837.043,1844.4,3856.6943,11791,1000,1728.238,0.0098,-0.003,0.0108,-0.0,0.2114,-0.0,0.0,1.0,0.0,0.0146,0.0469,0.0098,11.0404,73.0374,334.9649,61.0,857.9687,523.0038,10.3772,0.0525,2.3302,54.8335,599.7188,654.8685,44.3151,2252.5445,55.9998,599.982,1539.3458,0,1,128.0,1024.0,1024.0,0.0,0.0,,1
3
+ job_389754,0.0,0,64,512,1,1.0,1,1.0,3734.7441,4189.0,3722.9098,14143,1008,1771.7086,0.0254,-0.003,0.0135,0.0,0.2119,0.0,0.0,1.0,0.0,0.0147,0.0781,0.0254,12.3731,73.238,331.4794,61.4,857.9687,526.4893,0.0156,0.0625,2.608,51.6906,593.7021,645.7138,33.1731,927.602,58.3788,593.9603,220.8803,0,2,64.0,512.0,512.0,0.0,0.0,,2
4
+ job_389754,0.0,0,64,512,2,2.0,2,1.0,3680.1211,3429.5,3685.1135,17798,960,1903.0569,0.0195,-0.0057,0.0404,0.0,0.2131,0.0,0.0,1.0,0.0,0.017,0.125,0.0195,15.006,73.3821,331.1569,61.4,857.9687,526.8118,0.0079,0.0712,3.338,54.9564,607.7339,663.0308,33.7102,1161.2,54.4484,608.0028,440.3575,0,3,64.0,512.0,512.0,0.0,0.0,,3
5
+ job_389754,0.0,0,64,512,3,3.0,3,1.0,4038.8242,3594.9412,4054.0687,19942,1070,2204.7849,0.0332,-0.0083,0.0398,0.0,0.1988,0.0,0.0,1.0,0.0,0.0159,0.1094,0.0332,18.2571,73.6598,332.0689,61.3,857.9687,525.8998,0.0123,0.0638,3.988,64.8817,648.7096,713.9267,33.5855,774.7256,56.7814,648.9808,0.0053,0,4,64.0,512.0,512.0,0.0,0.0,,4
6
+ job_389754,0.0,0,64,512,4,4.0,4,1.0,3734.0176,3721.08,3734.6817,13083,1049,1658.2439,0.0488,-0.0016,0.0331,0.0,0.2072,0.0,0.0,1.0,0.0,0.0298,0.125,0.0488,18.5352,73.6546,329.1295,61.6,857.9687,528.8392,0.0099,0.0428,2.9611,54.8169,588.7437,643.8552,33.7826,946.1626,58.0555,588.9951,241.2625,0,5,64.0,512.0,512.0,0.0,0.0,38.1036,5
7
+ job_389754,0.0,0,64,512,5,5.0,5,1.0,3678.4492,2779.8125,3707.4375,16575,982,1778.7688,0.0312,-0.0069,0.0393,0.0,0.2064,0.0,0.0,1.0,0.0,0.0194,0.1406,0.0312,18.5881,73.6669,327.9209,61.8,857.9687,530.0478,0.0087,0.0617,3.0487,49.8621,590.8918,641.066,34.0988,888.5801,61.7658,591.1417,182.6699,0,6,64.0,512.0,512.0,0.0,0.0,,6
8
+ job_389754,0.0,0,64,512,6,6.0,6,1.0,4026.377,2695.15,4080.4919,30105,860,2435.5002,0.0391,-0.0117,0.0579,0.0,0.2004,0.0,0.0,1.0,0.0,0.0242,0.2031,0.0391,24.2751,73.9734,398.3101,53.6,857.9687,459.6586,0.0095,0.0949,4.4248,61.0183,660.677,722.0682,35.0715,2094.5908,55.4597,660.9545,1312.6129,0,7,64.0,512.0,512.0,0.0,0.0,,7
9
+ job_389754,0.0,0,64,512,0,0.0,0,0.0,3169.8066,2304.1277,3257.3054,9841,950,1527.965,0.0918,-0.023,0.1174,0.0,0.2084,0.0,0.0,1.0,0.0,0.0364,0.3281,0.0918,24.1753,73.8303,401.2653,53.2,857.9687,456.7034,0.0112,0.0425,2.0132,44.7602,566.5425,611.7071,33.0222,1954.7684,56.9235,566.9038,1284.0936,1,8,64.0,512.0,512.0,0.0,0.0,,8
10
+ job_389754,0.0,0,64,512,1,1.0,1,1.0,3208.3555,2564.1489,3273.4688,19175,927,1685.1496,0.0918,-0.0201,0.1149,0.0,0.2081,0.0,0.0,1.0,0.0,0.0337,0.3281,0.0918,24.6965,74.2126,396.9511,53.7,857.9687,461.0176,0.0079,0.0446,3.5998,47.727,576.6595,624.6952,33.46,810.2478,57.0584,576.9233,124.8704,1,9,64.0,512.0,512.0,0.0,0.0,,9
11
+ job_389754,0.0,0,64,512,2,2.0,2,1.0,3237.8379,2406.9649,3341.9253,12501,890,1700.2375,0.1113,-0.0333,0.1439,0.0,0.2076,0.0,0.0,1.0,0.0,0.0374,0.3906,0.1113,24.8782,74.2612,391.3597,54.4,857.9687,466.609,0.0092,0.0533,2.4828,50.2219,593.8531,644.3998,33.4188,912.5225,56.7812,594.124,208.8362,1,10,64.0,512.0,512.0,0.0,0.0,37.5099,10
12
+ job_389754,0.0,0,64,512,3,3.0,3,1.0,3001.6465,2650.619,3050.8998,13406,941,1483.113,0.123,-0.0266,0.1658,0.0,0.199,0.0,0.0,1.0,0.0,0.0435,0.4219,0.123,25.0717,74.3157,387.2821,54.9,857.9687,470.6866,0.0168,0.0371,2.6113,41.7835,561.0683,603.1351,33.5831,731.0664,56.667,561.3141,68.6264,1,11,64.0,512.0,512.0,0.0,0.0,,11
13
+ job_389754,0.0,0,64,512,4,4.0,4,1.0,2883.377,2714.9545,2918.3325,14702,798,1601.3015,0.1719,-0.0285,0.1735,-0.0,0.1995,-0.0,0.0,1.0,0.0,0.0405,0.4219,0.1719,25.1736,74.6695,384.2253,55.2,857.9687,473.7434,0.011,0.0379,2.9524,47.2009,561.4127,608.9848,33.1065,931.2563,58.3024,561.7455,260.9926,1,12,64.0,512.0,512.0,0.0,0.0,,12
14
+ job_389754,0.0,0,64,512,5,5.0,5,1.0,2857.6934,2411.3333,2960.6995,13216,868,1685.3561,0.1875,-0.0066,0.1342,0.0,0.1896,0.0,0.0,1.0,0.0,0.0396,0.3906,0.1875,25.3254,74.5136,380.6907,55.6,857.9687,477.278,0.0093,0.0471,3.5522,43.8172,560.515,604.6342,33.2148,682.4333,57.7062,560.7695,16.5161,1,13,64.0,512.0,512.0,0.0,0.0,,13
15
+ job_389754,0.0,0,64,512,6,6.0,6,1.0,3206.502,2278.8974,3373.2143,15130,906,2031.0791,0.1523,-0.0278,0.1606,-0.0,0.1855,-0.0,0.0,1.0,0.0,0.0446,0.4688,0.1523,25.6977,74.7123,378.3962,55.9,857.9687,479.5725,0.0096,0.0577,2.5454,52.8053,611.3389,664.4901,33.0486,4631.2906,13.904,611.6265,3950.3283,1,14,,,,,,,14
training_logs/20260428_203121_reward_vs_steps.png ADDED

Git LFS Details

  • SHA256: 24f2c863ba542c4aab41abd874143bae96b71659216e348fdb0397286af86b2c
  • Pointer size: 131 Bytes
  • Size of remote file: 129 kB
training_logs/20260428_203121_trial_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/20260428_203121_turn_count_distribution.png ADDED
training_logs/20260428_203121_vllm_metrics_job_389754.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/20260428_203121_vllm_metrics_table.csv ADDED
The diff for this file is too large to render. See raw diff