mirror of
https://github.com/informaticker/vllm-fly.git
synced 2024-11-22 09:41:58 +01:00
add README
This commit is contained in:
parent
0066470e85
commit
1e42a6b6b3
43
README.md
Normal file
43
README.md
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
# [vLLM](https://github.com/vllm-project/vllm) demo app for Fly.io
|
||||||
|
|
||||||
|
|
||||||
|
First deploy with:
|
||||||
|
```
|
||||||
|
fly launch
|
||||||
|
```
|
||||||
|
|
||||||
|
from there update by running: `fly deploy`
|
||||||
|
|
||||||
|
Once deploy, interact with the API at https://$APPNAME.fly.dev/
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
❯ curl https://vllm-demo.fly.dev/v1/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "facebook/opt-125m",
|
||||||
|
"prompt": "San Francisco is a",
|
||||||
|
"max_tokens": 7,
|
||||||
|
"temperature": 0
|
||||||
|
}' -s |jq .
|
||||||
|
{
|
||||||
|
"id": "cmpl-b4b03ec33d794a50ba5cf2801d807025",
|
||||||
|
"object": "text_completion",
|
||||||
|
"created": 1716250075,
|
||||||
|
"model": "facebook/opt-125m",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"text": " great place to live. I",
|
||||||
|
"logprobs": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"stop_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": 5,
|
||||||
|
"total_tokens": 12,
|
||||||
|
"completion_tokens": 7
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
Loading…
Reference in New Issue
Block a user