Configuration Reference
Complete reference for config.yaml. All fields with their types, defaults, and descriptions.
Server
yaml
server:
host: 0.0.0.0 # Bind address
port: 8090 # Listen port
timeout: 30s # Request timeout
keep_alive: 60s # Keep-alive duration
tls:
enabled: false # Enable TLS
cert_file: "" # Path to TLS certificate
key_file: "" # Path to TLS private key
min_version: "1.2" # Minimum TLS version
auth:
enabled: true # Require API key authentication
api_keys_env: "LLM_PROXY_API_KEYS" # Env var with comma-separated keys
metrics:
enabled: false # Enable dedicated metrics port
port: 9091 # Metrics port
admin:
enabled: true # Enable admin API
port: 8081 # Admin port
vllm:
enabled: false # Enable local vLLM integration
model_path: "" # Local model path
fallback_threshold: 0.1 # Budget threshold to fallback to localSecurity
yaml
security:
enabled: true # Enable security pipeline
max_payload_size_kb: 512 # Maximum request body size
max_messages: 50 # Maximum messages per request
link_sanitization:
enabled: true # Enable URL sanitization
blocked_domains: [] # Domains to blockIdentity
yaml
identity:
enabled: false # Enable SSO/JWT authentication
default_role: "user" # Default role for new users
providers: # OIDC providers
- name: google
client_id_env: "OIDC_GOOGLE_CLIENT_ID"
- name: microsoft
client_id_env: "OIDC_MICROSOFT_CLIENT_ID"
- name: apple
client_id_env: "OIDC_APPLE_CLIENT_ID"
role_mappings: {} # email → role mappings
session_ttl: 3600 # Session token TTL (seconds)Endpoints
yaml
endpoints:
<name>:
provider: "<provider>" # Provider adapter name
base_url: "<url>" # Provider API base URL
api_key_env: "<env>" # Environment variable for API key
models: [] # Available models
rate_limit: # Optional rate limits
rpm: 3500 # Requests per minute
tpm: 60000 # Tokens per minuteFallback Chains
yaml
fallback_chains:
"<model>": # Primary model name
- provider: "<provider>" # Fallback provider
model: "<model>" # Fallback modelModel Aliases
yaml
model_aliases:
"<alias>": "<real-model-id>"Model Groups
yaml
model_groups:
"<group-name>":
strategy: "cheapest" # cheapest, fastest, weighted, random
models:
- model: "<model>"
provider: "<provider>"
weight: 0.5 # For weighted strategyRotation
yaml
rotation:
strategy: "round_robin" # round_robin, weighted, least_used, random
failover:
enabled: true
max_retries: 3
retry_delay: 1s
switch_on_status: [429, 500, 503]Logging
yaml
logging:
level: "info" # debug, info, warning, error
format: "json" # json or text
output: "" # Log file path (empty = stdout)
audit_trail:
enabled: true # Enable persistent audit log
mask_pii: true # Mask PII in audit entriesCaching
yaml
caching:
enabled: true
db_path: "cache.db" # SQLite cache database path
ttl: 3600 # Cache TTL (seconds)
eviction_interval: 3600 # Eviction check interval
negative_cache:
maxsize: 50000 # Max negative cache entries
ttl: 300 # Negative cache TTLObservability
yaml
observability:
tracing:
enabled: true
service_name: "llmproxy" # OpenTelemetry service name
otlp_endpoint: null # OTLP collector endpoint
console_exporter: true # Print traces to console
sentry:
dsn_env: "SENTRY_DSN" # Sentry DSN environment variable
export:
enabled: false
output_dir: "exports" # JSONL export directory
scrub_pii: true # Remove PII from exports
compress_on_rotate: true # Gzip on daily rotationWebhooks
yaml
webhooks:
enabled: false
endpoints:
- name: "<name>"
target: "<type>" # slack, teams, discord, generic
url_env: "<env>" # Webhook URL environment variable
events: [] # Event types to sendEvent types: circuit_open, budget_threshold, injection_blocked, endpoint_down, endpoint_recovered, auth_failure, panic_activated
Budget
yaml
budget:
daily_limit: 50.0 # Hard daily cap (USD)
soft_limit: 40.0 # Warning threshold (USD)
fallback_to_local_on_limit: true # Use local LLM when exhaustedRate Limiting
yaml
rate_limiting:
enabled: true
requests_per_minute: 60 # Global rate limit