| eval-run:gpqa-diamond.gpt-5-4-mini.2026-03-17 | eval-run:gpqa-diamond.gpt-5-4-mini.2026-03-17 | benchmarks |
| eval-run:gpqa-diamond.gpt-5-4.2026-03-17 | eval-run:gpqa-diamond.gpt-5-4.2026-03-17 | benchmarks |
| eval-run:gpqa-diamond.gpt-5.2025-08 | eval-run:gpqa-diamond.gpt-5.2025-08 | benchmarks |
| eval-run:gpqa.gpt-5.2025-08 | eval-run:gpqa.gpt-5.2025-08 | benchmarks |
| eval-run:human-eval.gpt-5.2025-08 | eval-run:human-eval.gpt-5.2025-08 | benchmarks |
| eval-run:livecodebench.gpt-5.2025-08 | eval-run:livecodebench.gpt-5.2025-08 | benchmarks |
| eval-run:math.gpt-5.2025-08 | eval-run:math.gpt-5.2025-08 | benchmarks |
| eval-run:math.o3.2025-04 | eval-run:math.o3.2025-04 | benchmarks |
| eval-run:mmlu.o1.2024-12 | eval-run:mmlu.o1.2024-12 | benchmarks |
| eval-run:swe-bench-verified.gpt-5.2025-08 | eval-run:swe-bench-verified.gpt-5.2025-08 | benchmarks |
| eval-run:swe-bench-verified.o3.2025-04 | eval-run:swe-bench-verified.o3.2025-04 | benchmarks |