diff --git a/Dockerfile b/Dockerfile
index 82c05c6..1becd34 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,7 +1,8 @@
+# Variante 1 (ta base actuelle)
 FROM quay.io/go-skynet/local-ai:latest
 
-# Utiliser bash pour les heredocs propres
-SHELL ["/bin/bash", "-lc"]
+# (facultatif) si l'image ne contient pas curl :
+# RUN apk add --no-cache curl || (apt-get update && apt-get install -y curl ca-certificates)
 
 ENV MODELS_PATH=/models \
     DEBUG=false \
@@ -11,15 +12,15 @@ ENV MODELS_PATH=/models \
 # Crée le dossier des modèles
 RUN mkdir -p /models
 
-# Télécharge un **petit modèle chat** (TinyLlama 1.1B, quantisé Q4) au **build**
-# -> Il sera **embarqué** dans l'image finale. Aucune connexion réseau n'est
-#    nécessaire **à l'exécution**.
+# Télécharge un petit modèle chat (TinyLlama 1.1B quantisé Q4) AU BUILD
+# => Embarqué dans l'image, pas besoin de réseau à l'exécution pour ce fichier.
 RUN curl -L --fail --progress-bar \
   -o /models/tinyllama-1.1b-chat.Q4_K_M.gguf \
   https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf
 
-# Génère le mapping YAML du modèle exposé sous le nom "gpt-oss-20b"
-RUN cat > /models/gpt-oss-20b.yaml <<'YAML'
+# IMPORTANT : on utilise un heredoc NON quoted (<<YAML) pour EXPANSION des variables
+# (n_ctx/n_threads prendront les valeurs de CONTEXT_SIZE/THREADS AU BUILD)
+RUN cat > /models/gpt-oss-20b.yaml <<YAML
 name: gpt-oss-20b
 backend: llama
 parameters:
@@ -30,4 +31,4 @@ parameters:
   top_p: 0.9
 YAML
 
-# Le démarrage du serveur est piloté par docker-compose via `command`
\ No newline at end of file
+# Le démarrage du serveur est piloté par docker-compose via `command`
diff --git a/docker-compose.yml b/docker-compose.yml
index 060a770..ed23195 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,19 +1,23 @@
 version: "3.9"
+
 services:
   localai:
-    build: .
+    build:
+      context: .
+      dockerfile: Dockerfile
     container_name: localai
     ports:
-      - "8085:8080"
+      - "8085:8080"              # accès direct: http://<hôte>:8085
     environment:
       - MODELS_PATH=/models
-      - THREADS=8          # ajustez selon vos cores (léger par défaut)
-      - CONTEXT_SIZE=2048  # contexte réduit pour tests
+      - THREADS=8                # adapte au nombre de cœurs
+      - CONTEXT_SIZE=2048        # augmente si tu as plus de RAM
       - DEBUG=false
-      - DISABLE_DOWNLOAD=false
+      - DISABLE_DOWNLOAD=false   # laisse LocalAI récupérer le backend llama-cpp au 1er run
     command: ["--models-path", "/models", "--address", "0.0.0.0:8080"]
     healthcheck:
       test: ["CMD", "curl", "-fsS", "http://localhost:8080/v1/models"]
       interval: 10s
       timeout: 5s
-      retries: 10
\ No newline at end of file
+      retries: 10
+    restart: unless-stopped