Files
bhetherman 44a10667c2 Enhance video handling and performance optimizations
- Added environment variables to prevent CPU thread pools from busy-waiting.
- Deferred loading of video models until first use to reduce VRAM footprint.
- Implemented streaming of speaking clips for improved responsiveness.
- Introduced a queue for managing speaking clips to handle multiple requests smoothly.
- Updated video playback logic to ensure proper handling of clip generation.
2026-04-24 00:36:18 -04:00

21 lines
578 B
Python

import os
import torch
import uvicorn
# Cap CPU thread pools so PyTorch/OpenMP don't spin-wait on every core at idle.
# Models run on GPU; the CPU thread pool is only needed for small ops.
os.environ.setdefault("OMP_WAIT_POLICY", "PASSIVE")
os.environ.setdefault("MKL_WAIT_POLICY", "PASSIVE")
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
torch.set_num_threads(2)
torch.set_num_interop_threads(2)
if __name__ == "__main__":
uvicorn.run(
"server.main:app",
host="0.0.0.0",
port=8000,
reload=False,
log_level="info",
)