44a10667c2
- Added environment variables to prevent CPU thread pools from busy-waiting. - Deferred loading of video models until first use to reduce VRAM footprint. - Implemented streaming of speaking clips for improved responsiveness. - Introduced a queue for managing speaking clips to handle multiple requests smoothly. - Updated video playback logic to ensure proper handling of clip generation.
21 lines
578 B
Python
21 lines
578 B
Python
import os
|
|
import torch
|
|
import uvicorn
|
|
|
|
# Cap CPU thread pools so PyTorch/OpenMP don't spin-wait on every core at idle.
|
|
# Models run on GPU; the CPU thread pool is only needed for small ops.
|
|
os.environ.setdefault("OMP_WAIT_POLICY", "PASSIVE")
|
|
os.environ.setdefault("MKL_WAIT_POLICY", "PASSIVE")
|
|
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
|
|
torch.set_num_threads(2)
|
|
torch.set_num_interop_threads(2)
|
|
|
|
if __name__ == "__main__":
|
|
uvicorn.run(
|
|
"server.main:app",
|
|
host="0.0.0.0",
|
|
port=8000,
|
|
reload=False,
|
|
log_level="info",
|
|
)
|