Enhance video handling and performance optimizations

- Added environment variables to prevent CPU thread pools from busy-waiting.
- Deferred loading of video models until first use to reduce VRAM footprint.
- Implemented streaming of speaking clips for improved responsiveness.
- Introduced a queue for managing speaking clips to handle multiple requests smoothly.
- Updated video playback logic to ensure proper handling of clip generation.
This commit is contained in:
2026-04-24 00:36:18 -04:00
parent 129df7d1fa
commit 44a10667c2
7 changed files with 234 additions and 69 deletions
+10
View File
@@ -1,5 +1,15 @@
import os
import torch
import uvicorn
# Cap CPU thread pools so PyTorch/OpenMP don't spin-wait on every core at idle.
# Models run on GPU; the CPU thread pool is only needed for small ops.
os.environ.setdefault("OMP_WAIT_POLICY", "PASSIVE")
os.environ.setdefault("MKL_WAIT_POLICY", "PASSIVE")
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
torch.set_num_threads(2)
torch.set_num_interop_threads(2)
if __name__ == "__main__":
uvicorn.run(
"server.main:app",