version: "3.9" services: localai: build: context: . dockerfile: Dockerfile container_name: localai ports: - "8085:8080" # accès direct: http://:8085 environment: - MODELS_PATH=/models - THREADS=8 # adapte au nombre de cœurs - CONTEXT_SIZE=2048 # augmente si tu as plus de RAM - DEBUG=false - DISABLE_DOWNLOAD=false # laisse LocalAI récupérer le backend llama-cpp au 1er run command: ["--models-path", "/models", "--address", "0.0.0.0:8080"] healthcheck: test: ["CMD", "curl", "-fsS", "http://localhost:8080/v1/models"] interval: 10s timeout: 5s retries: 10 restart: unless-stopped