services: voice-chat: build: . ports: - "8000:8000" volumes: # Cache models on the host so they survive container rebuilds - huggingface-cache:/cache/huggingface # LoRA adapters — drop .safetensors files into ./loras on the host, # reference them from config.yml as /cache/loras/.safetensors - ./loras:/cache/loras # Avatar images uploaded via the web UI persist between restarts - ./avatars:/app/avatars # Mount source so you can edit code/config without rebuilding the image - ./config.yml:/app/config.yml:ro - ./configs:/app/configs:ro - ./server:/app/server:ro - ./static:/app/static:ro - ./tests:/app/tests - ./run.py:/app/run.py:ro deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] volumes: huggingface-cache: