import os import torch import uvicorn # Cap CPU thread pools so PyTorch/OpenMP don't spin-wait on every core at idle. # Models run on GPU; the CPU thread pool is only needed for small ops. os.environ.setdefault("OMP_WAIT_POLICY", "PASSIVE") os.environ.setdefault("MKL_WAIT_POLICY", "PASSIVE") os.environ.setdefault("TOKENIZERS_PARALLELISM", "false") torch.set_num_threads(2) torch.set_num_interop_threads(2) if __name__ == "__main__": uvicorn.run( "server.main:app", host="0.0.0.0", port=8000, reload=False, log_level="info", )