FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
# HuggingFace model cache — mounted as a volume so models persist across runs
ENV HF_HOME=/cache/huggingface

RUN apt-get update && apt-get install -y \
    python3.11 \
    python3.11-dev \
    python3.11-venv \
    git \
    ffmpeg \
    curl \
    cmake \
    ninja-build \
    build-essential \
    && rm -rf /var/lib/apt/lists/*

# Bootstrap pip for python3.11 (Debian disables ensurepip for system Python)
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11

RUN ln -sf /usr/bin/python3.11 /usr/bin/python

WORKDIR /app

# Build PyTorch from source with Blackwell (sm_120) support
RUN git clone --depth 1 https://github.com/pytorch/pytorch.git /tmp/pytorch && \
    cd /tmp/pytorch && \
    git submodule update --init --recursive && \
    TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6;9.0;9.0a;12.0" \
    python3.11 setup.py install && \
    cd / && rm -rf /tmp/pytorch

# Install torchvision and torchaudio with CUDA 12.1 support
RUN python3.11 -m pip install --no-cache-dir \
    torchvision torchaudio \
    --index-url https://download.pytorch.org/whl/cu121

# Install auto-gptq pre-built wheel for CUDA 12.1 (avoids compiling from source)
RUN python3.11 -m pip install --no-cache-dir \
    "auto-gptq>=0.7.1" \
    --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu121/

# Install the rest of the app requirements
COPY requirements.txt .
RUN python3.11 -m pip install --no-cache-dir -r requirements.txt

COPY . .

EXPOSE 8000

CMD ["python3.11", "run.py"]