FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 # HuggingFace model cache — mounted as a volume so models persist across runs ENV HF_HOME=/cache/huggingface RUN apt-get update && apt-get install -y \ python3.11 \ python3.11-dev \ python3.11-venv \ git \ ffmpeg \ curl \ cmake \ ninja-build \ build-essential \ && rm -rf /var/lib/apt/lists/* # Bootstrap pip for python3.11 (Debian disables ensurepip for system Python) RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 RUN ln -sf /usr/bin/python3.11 /usr/bin/python WORKDIR /app # Build PyTorch from source with Blackwell (sm_120) support RUN git clone --depth 1 https://github.com/pytorch/pytorch.git /tmp/pytorch && \ cd /tmp/pytorch && \ git submodule update --init --recursive && \ TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6;9.0;9.0a;12.0" \ python3.11 setup.py install && \ cd / && rm -rf /tmp/pytorch # Install torchvision and torchaudio with CUDA 12.1 support RUN python3.11 -m pip install --no-cache-dir \ torchvision torchaudio \ --index-url https://download.pytorch.org/whl/cu121 # Install auto-gptq pre-built wheel for CUDA 12.1 (avoids compiling from source) RUN python3.11 -m pip install --no-cache-dir \ "auto-gptq>=0.7.1" \ --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu121/ # Install the rest of the app requirements COPY requirements.txt . RUN python3.11 -m pip install --no-cache-dir -r requirements.txt COPY . . EXPOSE 8000 CMD ["python3.11", "run.py"]