Enhance video handling and performance optimizations

- Added environment variables to prevent CPU thread pools from busy-waiting. - Deferred loading of video models until first use to reduce VRAM footprint. - Implemented streaming of speaking clips for improved responsiveness. - Introduced a queue for managing speaking clips to handle multiple requests smoothly. - Updated video playback logic to ensure proper handling of clip generation.
2026-04-24 00:36:18 -04:00
parent 129df7d1fa
commit 44a10667c2
7 changed files with 234 additions and 69 deletions
@@ -1,5 +1,15 @@
+import os
+import torch
 import uvicorn

+# Cap CPU thread pools so PyTorch/OpenMP don't spin-wait on every core at idle.
+# Models run on GPU; the CPU thread pool is only needed for small ops.
+os.environ.setdefault("OMP_WAIT_POLICY", "PASSIVE")
+os.environ.setdefault("MKL_WAIT_POLICY", "PASSIVE")
+os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+torch.set_num_threads(2)
+torch.set_num_interop_threads(2)
+
 if __name__ == "__main__":
    uvicorn.run(
        "server.main:app",