65 lines
2.6 KiB
Docker
65 lines
2.6 KiB
Docker
# ── Builder ───────────────────────────────────────────────────────────────────
|
|
FROM python:3.11-slim AS builder
|
|
|
|
WORKDIR /build
|
|
|
|
# Install build tools
|
|
RUN pip install --upgrade pip setuptools wheel
|
|
|
|
# Install feedback package from source
|
|
COPY packages/content-moderation-feedback /build/packages/content-moderation-feedback
|
|
RUN pip install --no-cache-dir /build/packages/content-moderation-feedback
|
|
|
|
# Install service dependencies
|
|
COPY services/inference-api/requirements.txt /build/requirements.txt
|
|
# onnxruntime-gpu requires CUDA runtime; fall back to CPU-only in the builder
|
|
# The GPU variant is expected to be present in the final runtime image.
|
|
RUN pip install --no-cache-dir \
|
|
$(grep -v 'onnxruntime-gpu' /build/requirements.txt | grep -v '^#' | tr '\n' ' ') \
|
|
onnxruntime>=1.17.0
|
|
|
|
# ── Runtime ───────────────────────────────────────────────────────────────────
|
|
# Use NVIDIA CUDA base when GPU inference is needed.
|
|
# Switch FROM line to `python:3.11-slim` for CPU-only deployments.
|
|
FROM nvidia/cuda:12.3.1-runtime-ubuntu22.04 AS runtime
|
|
|
|
# Install Python in the CUDA image
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
python3.11 \
|
|
python3.11-venv \
|
|
python3-pip \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 \
|
|
&& update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1
|
|
|
|
WORKDIR /app
|
|
|
|
# Copy installed packages from builder
|
|
COPY --from=builder /usr/local/lib/python3.11 /usr/local/lib/python3.11
|
|
COPY --from=builder /usr/local/bin /usr/local/bin
|
|
|
|
# Install GPU-aware ONNX Runtime over the CPU version from builder
|
|
RUN pip install --no-cache-dir onnxruntime-gpu>=1.17.0 || \
|
|
echo "WARNING: onnxruntime-gpu install failed — falling back to CPU" && \
|
|
pip install --no-cache-dir onnxruntime>=1.17.0
|
|
|
|
# Copy service source
|
|
COPY services/inference-api/ /app/
|
|
|
|
# Models directory is expected to be mounted at /app/models
|
|
# (matches APP_ROOT / "models" when APP_ROOT=/app)
|
|
VOLUME ["/app/models"]
|
|
|
|
# Feedback store persisted outside the container
|
|
VOLUME ["/app/data"]
|
|
ENV CM_FEEDBACK_STORE=/app/data/feedback.jsonl
|
|
|
|
EXPOSE 3501
|
|
|
|
ENV CM_API_HOST=0.0.0.0
|
|
ENV CM_API_PORT=3501
|
|
ENV PYTHONUNBUFFERED=1
|
|
ENV PYTHONPATH=/app
|
|
|
|
CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "3501", "--workers", "1"]
|