Agentic AI Atlas

II.

TestSet JSON

test-set:flores-200-devtest

Structured · live

FLORES-200 devtest json

Inspect the normalized record payload exactly as the atlas UI reads it.

File · benchmarks/test-sets/test-sets.yamlCluster · benchmarks

Record JSON

{
  "id": "test-set:flores-200-devtest",
  "_kind": "TestSet",
  "_file": "benchmarks/test-sets/test-sets.yaml",
  "_cluster": "benchmarks",
  "attributes": {
    "displayName": "FLORES-200 devtest",
    "benchmarkId": "benchmark:flores-200",
    "caseCount": 1012,
    "releasedAt": "2022-07-06",
    "composition": "The FLORES-200 devtest split: 1,012 sentences professionally\ntranslated from English into 200 languages (and pivots), used\nas the canonical held-out evaluation set for many-to-many\nmachine-translation systems.\n",
    "homepageUrl": "https://github.com/facebookresearch/flores",
    "description": "The dev/devtest splits are the canonical held-out evaluation\nsets for FLORES-200; vendors typically report spBLEU / chrF on\ndevtest.\n"
  },
  "outgoingEdges": [],
  "incomingEdges": [
    {
      "from": "benchmark:flores-200",
      "to": "test-set:flores-200-devtest",
      "kind": "uses_test_set",
      "attributes": {}
    }
  ]
}