arcadia-suite-sv/docker/litellm-config.yaml

56 lines
1.7 KiB
YAML

# LiteLLM — Proxy unificado de LLMs para o Arcádia Suite
# Documentação: https://docs.litellm.ai/docs/proxy/configs
model_list:
# ── OpenAI (quando disponível) ───────────────────────────────────────────────
- model_name: gpt-4o
litellm_params:
model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY
- model_name: gpt-4o-mini
litellm_params:
model: openai/gpt-4o-mini
api_key: os.environ/OPENAI_API_KEY
# ── Ollama (LLMs locais — soberania) ─────────────────────────────────────────
- model_name: llama3.3
litellm_params:
model: ollama/llama3.3
api_base: os.environ/OLLAMA_BASE_URL
- model_name: qwen2.5-coder
litellm_params:
model: ollama/qwen2.5-coder:7b
api_base: os.environ/OLLAMA_BASE_URL
- model_name: deepseek-r1
litellm_params:
model: ollama/deepseek-r1:7b
api_base: os.environ/OLLAMA_BASE_URL
# ── Modelo padrão: tenta OpenAI, fallback para Ollama ───────────────────────
- model_name: arcadia-default
litellm_params:
model: openai/gpt-4o-mini
api_key: os.environ/OPENAI_API_KEY
model_info:
fallbacks: ["llama3.3"]
router_settings:
routing_strategy: least-busy
fallbacks:
- {"gpt-4o": ["llama3.3"]}
- {"gpt-4o-mini": ["qwen2.5-coder"]}
- {"arcadia-default": ["llama3.3"]}
litellm_settings:
drop_params: true
request_timeout: 120
set_verbose: false
general_settings:
master_key: os.environ/LITELLM_MASTER_KEY
database_url: os.environ/DATABASE_URL