FROM quay.io/go-skynet/local-ai:latest

# Utiliser bash pour les heredocs propres
SHELL ["/bin/bash", "-lc"]

ENV MODELS_PATH=/models \
    DEBUG=false \
    THREADS=2 \
    CONTEXT_SIZE=1024

# Crée le dossier des modèles
RUN mkdir -p /models

# Télécharge un **petit modèle chat** (TinyLlama 1.1B, quantisé Q4) au **build**
# -> Il sera **embarqué** dans l'image finale. Aucune connexion réseau n'est
#    nécessaire **à l'exécution**.
RUN curl -L --fail --progress-bar \
  -o /models/tinyllama-1.1b-chat.Q4_K_M.gguf \
  https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf

# Génère le mapping YAML du modèle exposé sous le nom "gpt-oss-20b"
RUN cat > /models/gpt-oss-20b.yaml <<'YAML'
name: gpt-oss-20b
backend: llama
parameters:
  model: tinyllama-1.1b-chat.Q4_K_M.gguf
  n_ctx: ${CONTEXT_SIZE}
  n_threads: ${THREADS}
  temperature: 0.2
  top_p: 0.9
YAML

# Le démarrage du serveur est piloté par docker-compose via `command`