| eval-result:bfcl.gpt-5.001 | eval-result:bfcl.gpt-5.001 | benchmarks |
| eval-result:evalplus.gpt-5.001 | eval-result:evalplus.gpt-5.001 | benchmarks |
| eval-result:gpqa-diamond.gpt-5.001 | eval-result:gpqa-diamond.gpt-5.001 | benchmarks |
| eval-result:gpqa.gpt-5.001 | eval-result:gpqa.gpt-5.001 | benchmarks |
| eval-result:human-eval-plus.gpt-5.001 | eval-result:human-eval-plus.gpt-5.001 | benchmarks |
| eval-result:human-eval.gpt-5.001 | eval-result:human-eval.gpt-5.001 | benchmarks |
| eval-result:livecodebench.gpt-5.001 | eval-result:livecodebench.gpt-5.001 | benchmarks |
| eval-result:math.gpt-5.001 | eval-result:math.gpt-5.001 | benchmarks |
| eval-result:swe-bench-verified.gpt-5.001 | eval-result:swe-bench-verified.gpt-5.001 | benchmarks |
| eval-result:swe-bench-verified.gpt-5.headline | eval-result:swe-bench-verified.gpt-5.headline | benchmarks |
| eval-result:swe-bench-verified.gpt-5.headline.001 | eval-result:swe-bench-verified.gpt-5.headline.001 | benchmarks |