Agentic AI Atlas

II.

Benchmark JSON

benchmark:truthful-qa

Structured · live

TruthfulQA json

Inspect the normalized record payload exactly as the atlas UI reads it.

File · benchmarks/benchmarks/benchmarks-knowledge.yamlCluster · benchmarks

Record JSON

{
  "id": "benchmark:truthful-qa",
  "_kind": "Benchmark",
  "_file": "benchmarks/benchmarks/benchmarks-knowledge.yaml",
  "_cluster": "benchmarks",
  "attributes": {
    "displayName": "TruthfulQA",
    "homepageUrl": "https://github.com/sylinrl/TruthfulQA",
    "kind": "model-only",
    "targetsKind": "ModelVersion",
    "description": "TruthfulQA (Lin et al., 2021) is an 817-question benchmark\nmeasuring whether a language model produces truthful answers\nwhen humans tend to hold false beliefs (misconceptions, urban\nlegends). Reports both truthfulness and informativeness scores.\n"
  },
  "outgoingEdges": [],
  "incomingEdges": [
    {
      "from": "eval-result:truthful-qa.claude-opus-4-5.001",
      "to": "benchmark:truthful-qa",
      "kind": "scored_against",
      "attributes": {}
    },
    {
      "from": "eval-run:truthful-qa.claude-opus-4-5.2025-09",
      "to": "benchmark:truthful-qa",
      "kind": "for_benchmark"
    }
  ]
}