content-moderation/services/inference-api/Dockerfile
2026-03-13 04:13:49 -07:00

65 lines
2.6 KiB
Docker

# ── Builder ───────────────────────────────────────────────────────────────────
FROM python:3.11-slim AS builder
WORKDIR /build
# Install build tools
RUN pip install --upgrade pip setuptools wheel
# Install feedback package from source
COPY packages/content-moderation-feedback /build/packages/content-moderation-feedback
RUN pip install --no-cache-dir /build/packages/content-moderation-feedback
# Install service dependencies
COPY services/inference-api/requirements.txt /build/requirements.txt
# onnxruntime-gpu requires CUDA runtime; fall back to CPU-only in the builder
# The GPU variant is expected to be present in the final runtime image.
RUN pip install --no-cache-dir \
$(grep -v 'onnxruntime-gpu' /build/requirements.txt | grep -v '^#' | tr '\n' ' ') \
onnxruntime>=1.17.0
# ── Runtime ───────────────────────────────────────────────────────────────────
# Use NVIDIA CUDA base when GPU inference is needed.
# Switch FROM line to `python:3.11-slim` for CPU-only deployments.
FROM nvidia/cuda:12.3.1-runtime-ubuntu22.04 AS runtime
# Install Python in the CUDA image
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.11 \
python3.11-venv \
python3-pip \
&& rm -rf /var/lib/apt/lists/*
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 \
&& update-alternatives --install /usr/bin/python python /usr/bin/python3.11 1
WORKDIR /app
# Copy installed packages from builder
COPY --from=builder /usr/local/lib/python3.11 /usr/local/lib/python3.11
COPY --from=builder /usr/local/bin /usr/local/bin
# Install GPU-aware ONNX Runtime over the CPU version from builder
RUN pip install --no-cache-dir onnxruntime-gpu>=1.17.0 || \
echo "WARNING: onnxruntime-gpu install failed — falling back to CPU" && \
pip install --no-cache-dir onnxruntime>=1.17.0
# Copy service source
COPY services/inference-api/ /app/
# Models directory is expected to be mounted at /app/models
# (matches APP_ROOT / "models" when APP_ROOT=/app)
VOLUME ["/app/models"]
# Feedback store persisted outside the container
VOLUME ["/app/data"]
ENV CM_FEEDBACK_STORE=/app/data/feedback.jsonl
EXPOSE 3501
ENV CM_API_HOST=0.0.0.0
ENV CM_API_PORT=3501
ENV PYTHONUNBUFFERED=1
ENV PYTHONPATH=/app
CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "3501", "--workers", "1"]