FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 # HuggingFace model cache — mounted as a volume so models persist across runs ENV HF_HOME=/cache/huggingface RUN apt-get update && apt-get install -y \ python3.11 \ python3.11-dev \ python3.11-venv \ git \ ffmpeg \ curl \ && rm -rf /var/lib/apt/lists/* # Bootstrap pip for python3.11 (Debian disables ensurepip for system Python) RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 RUN ln -sf /usr/bin/python3.11 /usr/bin/python WORKDIR /app # Install PyTorch 2.7+ with CUDA 12.8 support (includes Blackwell/sm_120 support) RUN python3.11 -m pip install --no-cache-dir \ torch \ --index-url https://download.pytorch.org/whl/cu128 # Install auto-gptq pre-built wheel for CUDA 12.8 (avoids compiling from source) RUN python3.11 -m pip install --no-cache-dir \ "auto-gptq>=0.7.1" \ --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu128/ # Install the rest of the app requirements COPY requirements.txt . RUN python3.11 -m pip install --no-cache-dir -r requirements.txt COPY . . EXPOSE 8000 CMD ["python3.11", "run.py"]