Agentic AI Atlas

II.

ModelTransportProtocol JSON

model-transport:vertex-anthropic-messages

Structured · live

Vertex AI — Anthropic Messages json

Inspect the normalized record payload exactly as the atlas UI reads it.

File · compute/model-transport-protocols/vertex-anthropic-messages.yamlCluster · compute

Record JSON

{
  "id": "model-transport:vertex-anthropic-messages",
  "_kind": "ModelTransportProtocol",
  "_file": "compute/model-transport-protocols/vertex-anthropic-messages.yaml",
  "_cluster": "compute",
  "attributes": {
    "displayName": "Vertex AI — Anthropic Messages",
    "vendor": "Google Cloud + Anthropic",
    "specUrl": "https://docs.anthropic.com/en/api/claude-on-vertex-ai",
    "streamingFraming": "sse",
    "toolUseSchema": "Wire-compatible with Anthropic Messages: tool calls appear as\n`tool_use` content blocks (with `id`, `name`, `input`); tool\nresults are returned as `tool_result` blocks referencing the same\n`id`.\n",
    "thinkingChannel": "content-block",
    "cacheControl": "explicit",
    "firstSpecVersion": "2024-03-19",
    "currentSpecVersion": "vertex-2023-10-16",
    "status": "standard",
    "requestBodyShape": "POST https://{LOCATION}-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{LOCATION}/publishers/anthropic/models/{MODEL}:rawPredict\nPOST https://{LOCATION}-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{LOCATION}/publishers/anthropic/models/{MODEL}:streamRawPredict\nHeaders:\n  - `Authorization: Bearer <gcloud-oauth-token>`\n  - `Content-Type: application/json`\nBody — Anthropic Messages request shape with these adjustments:\n  - `anthropic_version` (required, e.g. \"vertex-2023-10-16\") instead\n    of the `anthropic-version` header\n  - no top-level `model` field (it is supplied via the URL path)\n  - all other fields (`messages`, `system`, `max_tokens`, `tools`,\n    `tool_choice`, `temperature`, `top_p`, `top_k`, `stop_sequences`,\n    `stream`, `thinking`, `metadata`) match Anthropic Messages\n",
    "responseBodyShape": "Non-streaming response (HTTP 200 `application/json`): the full\nAnthropic Messages response payload —\n  {\n    \"id\": \"msg_...\",\n    \"type\": \"message\",\n    \"role\": \"assistant\",\n    \"model\": \"...\",\n    \"content\": [ ContentBlock, ... ],\n    \"stop_reason\": \"end_turn\"|\"max_tokens\"|\"stop_sequence\"|\"tool_use\",\n    \"stop_sequence\": null|string,\n    \"usage\": { \"input_tokens\": int, \"output_tokens\": int,\n               \"cache_creation_input_tokens\"?: int,\n               \"cache_read_input_tokens\"?: int }\n  }\nStreaming (`:streamRawPredict`): SSE response framed identically to\nAnthropic Messages streaming.\n",
    "streamingEventTypes": [
      "message_start",
      "content_block_start",
      "content_block_delta",
      "content_block_stop",
      "message_delta",
      "message_stop",
      "ping",
      "error"
    ],
    "toolCallWireFormat": "A `tool_use` content block in `message.content`:\n  { \"type\": \"tool_use\", \"id\": \"toolu_...\", \"name\": \"<tool_name>\", \"input\": { ... } }\n",
    "toolResultWireFormat": "A `tool_result` content block in a subsequent user-role message:\n  { \"type\": \"tool_result\",\n    \"tool_use_id\": \"toolu_...\",\n    \"content\": string | ContentBlock[],\n    \"is_error\"?: bool }\n",
    "errorEnvelope": "Non-2xx response, `application/json` — Google API error envelope:\n  { \"error\": { \"code\": int,\n               \"message\": string,\n               \"status\": \"INVALID_ARGUMENT\"|\"PERMISSION_DENIED\"|\"RESOURCE_EXHAUSTED\"|\"NOT_FOUND\"|\"INTERNAL\"|\"UNAVAILABLE\",\n               \"details\": [ ... ] } }\nHTTP status mirrors `error.code`.\n",
    "cacheControlWireFormat": "Per-content-block annotation, identical to Anthropic Messages:\n  { \"type\": \"text\", \"text\": \"...\", \"cache_control\": { \"type\": \"ephemeral\", \"ttl\"?: \"5m\"|\"1h\" } }\nUp to four cache breakpoints per request. Cache hit accounting is\nreported as `usage.cache_creation_input_tokens` and\n`usage.cache_read_input_tokens`.\non Vertex (rolled out after the 5m default).\n",
    "rateLimitSignaling": "On HTTP 429 `RESOURCE_EXHAUSTED`: GCP-standard quota error;\n`retry-after` header may be returned. Quota state is observable via\nGCP quotas / Cloud Monitoring rather than per-response headers.\n",
    "reasoningWireFormat": "Identical to Anthropic Messages — `thinking` and\n`redacted_thinking` content blocks on the response message:\n  { \"type\": \"thinking\", \"thinking\": \"<text>\", \"signature\": \"<opaque>\" }\n  { \"type\": \"redacted_thinking\", \"data\": \"<opaque>\" }\nBoth must be echoed back verbatim on multi-turn tool-use loops or\nextended-thinking continuations.\n",
    "authHeaderFormat": "GCP OAuth bearer token, sourced from a Google service account or\n`gcloud auth application-default print-access-token`:\n  `Authorization: Bearer <gcloud-oauth-token>`\nProject / region selection is encoded in the URL path, not\nheaders. No `x-api-key` header.\n",
    "versioningHeader": "No `anthropic-version` request header. Protocol version is\ndeclared in-body as `anthropic_version: \"vertex-2023-10-16\"`.\nOptional `anthropic_beta` opt-in features are passed in-body as\n`anthropic_beta: [\"<feature>\", ...]`.\nin-body field name (`anthropic_beta` vs `anthropic-beta`) against\ncurrent Vertex docs.\n"
  },
  "outgoingEdges": [
    {
      "from": "model-transport:vertex-anthropic-messages",
      "to": "layer:3-transport",
      "kind": "realizes",
      "attributes": {}
    },
    {
      "from": "model-transport:vertex-anthropic-messages",
      "to": "provider:gcp-vertex",
      "kind": "served_by"
    },
    {
      "from": "model-transport:vertex-anthropic-messages",
      "to": "model:claude-opus-4-7@current",
      "kind": "spoken_by"
    }
  ],
  "incomingEdges": []
}

Vertex AI — Anthropic Messages json

Inspect the normalized record payload exactly as the atlas UI reads it.

File · compute/model-transport-protocols/vertex-anthropic-messages.yamlCluster · compute

Record JSON

{
  "id": "model-transport:vertex-anthropic-messages",
  "_kind": "ModelTransportProtocol",
  "_file": "compute/model-transport-protocols/vertex-anthropic-messages.yaml",
  "_cluster": "compute",
  "attributes": {
    "displayName": "Vertex AI — Anthropic Messages",
    "vendor": "Google Cloud + Anthropic",
    "specUrl": "https://docs.anthropic.com/en/api/claude-on-vertex-ai",
    "streamingFraming": "sse",
    "toolUseSchema": "Wire-compatible with Anthropic Messages: tool calls appear as\n`tool_use` content blocks (with `id`, `name`, `input`); tool\nresults are returned as `tool_result` blocks referencing the same\n`id`.\n",
    "thinkingChannel": "content-block",
    "cacheControl": "explicit",
    "firstSpecVersion": "2024-03-19",
    "currentSpecVersion": "vertex-2023-10-16",
    "status": "standard",
    "requestBodyShape": "POST https://{LOCATION}-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{LOCATION}/publishers/anthropic/models/{MODEL}:rawPredict\nPOST https://{LOCATION}-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{LOCATION}/publishers/anthropic/models/{MODEL}:streamRawPredict\nHeaders:\n  - `Authorization: Bearer <gcloud-oauth-token>`\n  - `Content-Type: application/json`\nBody — Anthropic Messages request shape with these adjustments:\n  - `anthropic_version` (required, e.g. \"vertex-2023-10-16\") instead\n    of the `anthropic-version` header\n  - no top-level `model` field (it is supplied via the URL path)\n  - all other fields (`messages`, `system`, `max_tokens`, `tools`,\n    `tool_choice`, `temperature`, `top_p`, `top_k`, `stop_sequences`,\n    `stream`, `thinking`, `metadata`) match Anthropic Messages\n",
    "responseBodyShape": "Non-streaming response (HTTP 200 `application/json`): the full\nAnthropic Messages response payload —\n  {\n    \"id\": \"msg_...\",\n    \"type\": \"message\",\n    \"role\": \"assistant\",\n    \"model\": \"...\",\n    \"content\": [ ContentBlock, ... ],\n    \"stop_reason\": \"end_turn\"|\"max_tokens\"|\"stop_sequence\"|\"tool_use\",\n    \"stop_sequence\": null|string,\n    \"usage\": { \"input_tokens\": int, \"output_tokens\": int,\n               \"cache_creation_input_tokens\"?: int,\n               \"cache_read_input_tokens\"?: int }\n  }\nStreaming (`:streamRawPredict`): SSE response framed identically to\nAnthropic Messages streaming.\n",
    "streamingEventTypes": [
      "message_start",
      "content_block_start",
      "content_block_delta",
      "content_block_stop",
      "message_delta",
      "message_stop",
      "ping",
      "error"
    ],
    "toolCallWireFormat": "A `tool_use` content block in `message.content`:\n  { \"type\": \"tool_use\", \"id\": \"toolu_...\", \"name\": \"<tool_name>\", \"input\": { ... } }\n",
    "toolResultWireFormat": "A `tool_result` content block in a subsequent user-role message:\n  { \"type\": \"tool_result\",\n    \"tool_use_id\": \"toolu_...\",\n    \"content\": string | ContentBlock[],\n    \"is_error\"?: bool }\n",
    "errorEnvelope": "Non-2xx response, `application/json` — Google API error envelope:\n  { \"error\": { \"code\": int,\n               \"message\": string,\n               \"status\": \"INVALID_ARGUMENT\"|\"PERMISSION_DENIED\"|\"RESOURCE_EXHAUSTED\"|\"NOT_FOUND\"|\"INTERNAL\"|\"UNAVAILABLE\",\n               \"details\": [ ... ] } }\nHTTP status mirrors `error.code`.\n",
    "cacheControlWireFormat": "Per-content-block annotation, identical to Anthropic Messages:\n  { \"type\": \"text\", \"text\": \"...\", \"cache_control\": { \"type\": \"ephemeral\", \"ttl\"?: \"5m\"|\"1h\" } }\nUp to four cache breakpoints per request. Cache hit accounting is\nreported as `usage.cache_creation_input_tokens` and\n`usage.cache_read_input_tokens`.\non Vertex (rolled out after the 5m default).\n",
    "rateLimitSignaling": "On HTTP 429 `RESOURCE_EXHAUSTED`: GCP-standard quota error;\n`retry-after` header may be returned. Quota state is observable via\nGCP quotas / Cloud Monitoring rather than per-response headers.\n",
    "reasoningWireFormat": "Identical to Anthropic Messages — `thinking` and\n`redacted_thinking` content blocks on the response message:\n  { \"type\": \"thinking\", \"thinking\": \"<text>\", \"signature\": \"<opaque>\" }\n  { \"type\": \"redacted_thinking\", \"data\": \"<opaque>\" }\nBoth must be echoed back verbatim on multi-turn tool-use loops or\nextended-thinking continuations.\n",
    "authHeaderFormat": "GCP OAuth bearer token, sourced from a Google service account or\n`gcloud auth application-default print-access-token`:\n  `Authorization: Bearer <gcloud-oauth-token>`\nProject / region selection is encoded in the URL path, not\nheaders. No `x-api-key` header.\n",
    "versioningHeader": "No `anthropic-version` request header. Protocol version is\ndeclared in-body as `anthropic_version: \"vertex-2023-10-16\"`.\nOptional `anthropic_beta` opt-in features are passed in-body as\n`anthropic_beta: [\"<feature>\", ...]`.\nin-body field name (`anthropic_beta` vs `anthropic-beta`) against\ncurrent Vertex docs.\n"
  },
  "outgoingEdges": [
    {
      "from": "model-transport:vertex-anthropic-messages",
      "to": "layer:3-transport",
      "kind": "realizes",
      "attributes": {}
    },
    {
      "from": "model-transport:vertex-anthropic-messages",
      "to": "provider:gcp-vertex",
      "kind": "served_by"
    },
    {
      "from": "model-transport:vertex-anthropic-messages",
      "to": "model:claude-opus-4-7@current",
      "kind": "spoken_by"
    }
  ],
  "incomingEdges": []
}