first

2025-08-16 16:26:36 +02:00 · 2025-08-16 16:26:36 +02:00 · 6c9c3f663b
commit 6c9c3f663b
2 changed files with 48 additions and 0 deletions
--- a/30
+++ b/30
@ -0,0 +1,30 @@
+FROM quay.io/go-skynet/local-ai:latest
+
+ENV MODELS_PATH=/models \
+    DEBUG=false \
+    THREADS=2 \
+    CONTEXT_SIZE=1024
+
+# Crée le dossier des modèles
+RUN mkdir -p /models
+
+# Télécharge un **petit modèle chat** (TinyLlama 1.1B, quantisé Q4) au **build**
+# -> Il sera **embarqué** dans l'image finale. Aucune connexion réseau n'est
+#    nécessaire **à l'exécution**.
+RUN curl -L --fail --progress-bar \
+  -o /models/tinyllama-1.1b-chat.Q4_K_M.gguf \
+  https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf
+
+# Mappe le nom du modèle **gpt-oss-20b** (compat) vers le fichier téléchargé
+# => côté Symfony on référence simplement "gpt-oss-20b"
+RUN printf "name: gpt-oss-20b
+backend: llama
+parameters:
+  model: tinyllama-1.1b-chat.Q4_K_M.gguf
+  n_ctx: %s
+  n_threads: %s
+  temperature: 0.2
+  top_p: 0.9
+" "$CONTEXT_SIZE" "$THREADS" > /models/gpt-oss-20b.yaml
+
+# L'entrée du serveur (déjà gérée par docker-compose via `command`)
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,18 @@
+version: "3.9"
+services:
+  localai:
+    build: .
+    container_name: localai
+    ports:
+      - "8080:8080"
+    environment:
+      - MODELS_PATH=/models
+      - THREADS=2          # ajustez selon vos cores (léger par défaut)
+      - CONTEXT_SIZE=1024  # contexte réduit pour tests
+      - DEBUG=false
+    command: ["--models-path", "/models", "--address", "0.0.0.0:8080"]
+    healthcheck:
+      test: ["CMD", "curl", "-fsS", "http://localhost:8080/v1/models"]
+      interval: 10s
+      timeout: 5s
+      retries: 10