II.
StackProfile JSON
Structured · livestack-profile:voice-ai-agent
Voice AI Agent Stack (Whisper, TTS, WebSocket, FastAPI, React) json
Inspect the normalized record payload exactly as the atlas UI reads it.
{
"id": "stack-profile:voice-ai-agent",
"_kind": "StackProfile",
"_file": "domain/stack-profiles/deep-stacks-1.yaml",
"_cluster": "domain",
"attributes": {
"displayName": "Voice AI Agent Stack (Whisper, TTS, WebSocket, FastAPI, React)",
"description": "An end-to-end voice-powered AI agent architecture for building\nconversational interfaces with speech input and output. OpenAI Whisper\n(or whisper.cpp) handles automatic speech recognition, converting audio\nstreams to text. A text-to-speech engine synthesizes agent responses\nback to audio. WebSocket connections enable full-duplex, low-latency\naudio streaming between client and server.\n\nFastAPI serves as the async backend, coordinating ASR, LLM inference,\nand TTS in a streaming pipeline. React powers the frontend with audio\ncapture, playback, and visual feedback. Python handles all server-side\nlogic including audio preprocessing and LLM integration. This stack\nsuits voice assistants, call center copilots, and accessibility-first\napplications. The main tradeoff is latency — the ASR-to-TTS round trip\nmust stay under 1-2 seconds for natural conversation flow.\n",
"composes": [
"framework:fastapi",
"framework:react",
"language:python",
"language:typescript",
"library:websockets"
]
},
"outgoingEdges": [
{
"from": "stack-profile:voice-ai-agent",
"to": "framework:fastapi",
"kind": "composed_of"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "framework:react",
"kind": "composed_of"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "language:python",
"kind": "composed_of"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "language:typescript",
"kind": "composed_of"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "library:websockets",
"kind": "composed_of"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "tool:docker",
"kind": "composed_of"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "library:uvicorn",
"kind": "composed_of"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "role:ml-engineer",
"kind": "used_by_role"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "role:fullstack-engineer",
"kind": "used_by_role"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "role:frontend-engineer",
"kind": "used_by_role"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "workflow:prompt-engineering-iteration",
"kind": "follows_workflow"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "workflow:agent-evaluation-cycle",
"kind": "follows_workflow"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "domain:ml-ai",
"kind": "applies_to"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "domain:frontend",
"kind": "applies_to"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "skill-area:audio-processing",
"kind": "requires_skill_area"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "skill-area:streaming-realtime-processing",
"kind": "requires_skill_area"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "skill-area:websocket-design",
"kind": "requires_skill_area"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "skill-area:natural-language-processing",
"kind": "requires_skill_area"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "skill-area:model-serving-deployment",
"kind": "requires_skill_area"
}
],
"incomingEdges": []
}