43 lines
1.2 KiB
Docker
43 lines
1.2 KiB
Docker
FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu22.04
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
ENV PYTHONUNBUFFERED=1
|
|
# HuggingFace model cache — mounted as a volume so models persist across runs
|
|
ENV HF_HOME=/cache/huggingface
|
|
|
|
RUN apt-get update && apt-get install -y \
|
|
python3.11 \
|
|
python3.11-dev \
|
|
python3.11-venv \
|
|
git \
|
|
ffmpeg \
|
|
curl \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Bootstrap pip for python3.11 (Debian disables ensurepip for system Python)
|
|
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
|
|
|
|
RUN ln -sf /usr/bin/python3.11 /usr/bin/python
|
|
|
|
WORKDIR /app
|
|
|
|
# Install PyTorch 2.7+ with CUDA 12.8 support (includes Blackwell/sm_120 support)
|
|
RUN python3.11 -m pip install --no-cache-dir \
|
|
torch \
|
|
--index-url https://download.pytorch.org/whl/cu128
|
|
|
|
# Install auto-gptq pre-built wheel for CUDA 12.8 (avoids compiling from source)
|
|
RUN python3.11 -m pip install --no-cache-dir \
|
|
"auto-gptq>=0.7.1" \
|
|
--extra-index-url https://huggingface.github.io/autogptq-index/whl/cu128/
|
|
|
|
# Install the rest of the app requirements
|
|
COPY requirements.txt .
|
|
RUN python3.11 -m pip install --no-cache-dir -r requirements.txt
|
|
|
|
COPY . .
|
|
|
|
EXPOSE 8000
|
|
|
|
CMD ["python3.11", "run.py"]
|