services: voice-chat: build: . ports: - "8000:8000" volumes: # Cache models on the host so they survive container rebuilds - huggingface-cache:/cache/huggingface deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] volumes: huggingface-cache: