mirror of
https://github.com/informaticker/vllm-fly.git
synced 2024-10-18 20:25:03 +02:00
26 lines
344 B
TOML
26 lines
344 B
TOML
app = "vllm-demo"
|
|
primary_region = "ord"
|
|
|
|
[experimental]
|
|
entrypoint = "/start.sh"
|
|
|
|
[build]
|
|
image = "vllm/vllm-openai:v0.4.2"
|
|
|
|
[[mounts]]
|
|
source = "models"
|
|
destination = "/root/.cache/huggingface"
|
|
|
|
[http_service]
|
|
internal_port = 8000
|
|
force_https = true
|
|
|
|
[[vm]]
|
|
size = 'l40s'
|
|
gpus = 1
|
|
|
|
[[files]]
|
|
guest_path = "/start.sh"
|
|
local_path = "start.sh"
|
|
|