app = "vllm-demo" primary_region = "ord" [experimental] entrypoint = "/start.sh" [build] image = "vllm/vllm-openai:latest" [http_service] internal_port = 8000 force_https = true [[vm]] size = 'l40s' gpus = 1 [[files]] guest_path = "/start.sh" local_path = "start.sh" [[services]] internal_port = 8000 protocol = "tcp" auto_stop_machines = "stop" auto_start_machines = true min_machines_running = 0