commit af38cda43e499e90abf672d6d21b1da19925df40 Author: elijah Date: Fri Jul 19 00:10:34 2024 +0200 Initial commit diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..5585952 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +# Use an official Python runtime as the base image +FROM python:3.9-slim + +# Set the working directory in the container +WORKDIR /app + +# Copy the requirements file into the container +COPY requirements.txt . + +# Install the required packages +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of the application code +COPY . . + +# Make port 5000 available to the world outside this container +EXPOSE 5000 + +# Define environment variable for Flask +ENV FLASK_APP=app.py + +# Run the application +CMD ["gunicorn", "-b", "0.0.0.0:5000", "app:app"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..0fb4b31 --- /dev/null +++ b/README.md @@ -0,0 +1,10 @@ +# cchat +language model interface + +## Docker +To run the application in a docker container, first customize the docker-compose file. + +Then run the following command: +```bash +docker compose up -d +``` diff --git a/app.py b/app.py new file mode 100644 index 0000000..62fca21 --- /dev/null +++ b/app.py @@ -0,0 +1,73 @@ +from flask import Flask, request, jsonify, render_template, Response, stream_with_context +from transformers import AutoTokenizer +import os +import requests +from flask_limiter import Limiter +from flask_limiter.util import get_remote_address + +app = Flask(__name__) + +# Initialize rate limiter +limiter = Limiter( + get_remote_address, + app=app, + storage_uri="memory://" +) + +# Load the tokenizer +tokenizer = AutoTokenizer.from_pretrained("informatiker/Llama-3-8B-Instruct-abliterated") +tokenizer = AutoTokenizer.from_pretrained(os.environ.get('TOKENIZER', 'gpt2')) + +api_url = os.environ.get('API_URL', 'https://api.openai.com/v1') +api_key = os.environ.get('API_KEY') +api_model = os.environ.get('API_MODEL', 'gpt-3.5-turbo') +temperature = os.environ.get('TEMPERATURE', 0) + +@app.route('/v1/tokenizer/count', methods=['POST']) +def token_count(): + data = request.json + messages = data.get('messages', []) + full_text = " ".join([f"{msg['role']}: {msg['content']}" for msg in messages]) + tokens = tokenizer.encode(full_text) + token_count = len(tokens) + return jsonify({"token_count": token_count}) + +@app.route('/v1/chat/completions', methods=['POST']) +@limiter.limit(os.environ.get('RATE_LIMIT', '20/minute')) +def proxy_chat_completions(): + headers = { + 'Authorization': f'Bearer {api_key}', + 'Content-Type': 'application/json' + } + + request_data = request.json + + request_data['model'] = api_model + request_data['temperature'] = temperature + + request_data['stream'] = True + + response = requests.post(f"{api_url}/chat/completions", + json=request_data, + headers=headers, + stream=True) + + # Stream the response back to the client + def generate(): + for chunk in response.iter_content(chunk_size=8): + if chunk: + yield chunk + + return Response(stream_with_context(generate()), + content_type=response.headers['content-type']) + +@app.route('/') +def index(): + return render_template('index.html') + +@app.route('/static/') +def serve_static(filename): + return app.send_static_file(filename) + +if __name__ == '__main__': + app.run(debug=False, port=5000) diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..f1c3af8 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,15 @@ +version: "3.8" + +services: + web: + build: . + ports: + - "5000:5000" + environment: + - FLASK_APP=app.py + - TOKENIZER=gpt2 + - API_URL=https://api.openai.com/v1 + - API_KEY=your_api_key_here + - API_MODEL=gpt-3.5-turbo + - TEMPERATURE=0 + - RATE_LIMIT=20/minute diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f85a2f1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +Flask==2.1.0 +transformers==4.28.1 +requests==2.26.0 +Flask-Limiter==3.1.0 +gunicorn==20.1.0 diff --git a/static/css/common.css b/static/css/common.css new file mode 100644 index 0000000..f3a1afc --- /dev/null +++ b/static/css/common.css @@ -0,0 +1,172 @@ +/* make it responsive */ +@media (min-width: 852px) { + body { + font-size: 14px; + } +} +@media (max-width: 852px) { + body { + font-size: 12px; + } +} + +/* resets */ +html, +body { + width: 100%; + height: 100%; +} + +*::-webkit-scrollbar { + display: none; +} + +* { + -ms-overflow-style: none; + scrollbar-width: none; +} + +* { + -moz-box-sizing: border-box; + -webkit-box-sizing: border-box; + box-sizing: border-box; +} + +/* default */ +body { + margin: 0; + background-color: var(--primary-bg-color); + color: var(--foreground-color); +} + +h1, +h2, +h3, +h4, +h5, +h6 { + margin: 0em; +} + +hr { + width: 92%; +} + +button { + cursor: pointer; + border: none; + background-color: transparent; +} +button:hover { +} +button:active { +} + +/* components */ +.container { + margin: 0 auto; + padding: 1rem; +} + +.centered { + display: flex; + flex-direction: column; + justify-content: center; + align-items: center; +} + +.centered-w-only { + position: absolute; + left: 50%; + transform: translateX(-50%); +} + +.centered-h-only { + position: absolute; + top: 50%; + transform: translateY(-50%); +} + +.card { + padding: 0; +} + +.card-header { + padding: 0.5rem 1rem; +} + +.card-container { + width: 96vw; + height: 100%; + gap: 1rem; + display: flex; + flex-direction: row; + flex-wrap: wrap; + justify-content: center; + align-items: center; +} + +.clean-a { + text-decoration: underline; + text-decoration-color: #006fc1; + text-decoration-thickness: 2px; + color: inherit; +} + +.hover-underline { + text-decoration: underline; + text-decoration-color: #228039; + text-decoration-thickness: 2px; + color: inherit; +} + +.flex-horizontal { + display: flex; + flex-direction: row; + justify-content: space-between; + align-items: center; +} + +.vertical-separator { + padding: 0 0.5rem; +} + +[x-cloak] { + display: none !important; +} + +pre { + background-color: var(--tertiary-bg-color); + padding: 1rem; + border-radius: 10px; + overflow-x: auto; + position: relative; +} + +code { + font-family: monospace; + font-size: 0.9em; +} + +:not(pre) > code { + background-color: var(--tertiary-bg-color); + padding: 0.2em 0.4em; + border-radius: 3px; +} + +.clipboard-button { + position: absolute; + top: 0.5rem; + right: 0.5rem; + padding: 0.5rem; + background-color: var(--secondary-color); + color: var(--foreground-color); + border: none; + border-radius: 5px; + cursor: pointer; + transition: 0.2s; +} + +.clipboard-button:hover { + background-color: var(--primary-color); +} diff --git a/static/css/index.css b/static/css/index.css new file mode 100644 index 0000000..fe17994 --- /dev/null +++ b/static/css/index.css @@ -0,0 +1,368 @@ +:root { + --primary-color: #ff71ce; + --primary-color-transparent: #ff71ce66; + --secondary-color: #01cdfe; + --secondary-color-transparent: #01cdfe66; + + --red-color: #ff71ce; + --green-color: #05ffa1; + --silver-color: #b967ff; +} + +@media (prefers-color-scheme: light) { + :root { + --primary-bg-color: #fffcf9; + --secondary-bg-color: #ffd1dc; + --tertiary-bg-color: #fbcff3; + --foreground-color: #2d0036; + --accent-color: #b967ff; + } +} + +@media (prefers-color-scheme: dark) { + :root { + --primary-bg-color: #16001e; + --secondary-bg-color: #2d0036; + --tertiary-bg-color: #3d004d; + --foreground-color: #fffcf9; + --accent-color: #05ffa1; + } +} + +main { + width: 100%; + height: 100%; + display: flex; + flex-direction: column; + place-items: center; + background: linear-gradient( + 45deg, + var(--primary-bg-color), + var(--secondary-bg-color) + ); +} + +.home { + width: 100%; + height: 90%; + margin-bottom: 10rem; +} + +.title { + font-size: 3rem; + margin: 1rem 0; + margin-top: 3rem; + color: var(--primary-color); + text-shadow: 2px 2px var(--secondary-color); +} + +.histories-container-container { + width: 100%; + max-height: 75%; + position: relative; +} + +.histories-container { + overflow-y: auto; + overflow-x: hidden; + width: 100%; + height: 100%; + display: flex; + flex-direction: column; + gap: 1rem; + align-items: center; + margin: 0; + padding: 3rem 1rem; +} + +.histories-start, +.histories-end { + height: 3rem; + width: 100%; + z-index: 999; + position: absolute; +} + +.histories-start { + top: 0; + background: linear-gradient( + 180deg, + var(--primary-bg-color) 0%, + transparent 100% + ); +} + +.histories-end { + bottom: 0; + background: linear-gradient( + 0deg, + var(--primary-bg-color) 0%, + transparent 100% + ); +} + +.history { + padding: 1rem; + width: 100%; + max-width: 40rem; + background-color: var(--tertiary-bg-color); + border-radius: 10px; + border-left: 2px solid var(--primary-color); + cursor: pointer; + transform: translateX(calc(1px * var(--tx, 0))); + opacity: var(--opacity, 1); + transition: all 0.3s ease; +} + +.history:hover { + background-color: var(--secondary-bg-color); + transform: scale(1.05); +} + +.history-delete-button { + position: absolute; + top: 0; + right: 0; + padding: 0.5rem; + margin: 0; + outline: none; + border: none; + background-color: var(--secondary-color); + color: var(--foreground-color); + border-radius: 0 0 0 10px; + cursor: pointer; + transition: 0.2s; +} + +.history-delete-button:hover { + background-color: var(--primary-color); + padding: 0.75rem; +} + +.messages { + overflow-y: auto; + height: 100%; + width: 100%; + display: flex; + flex-direction: column; + gap: 1rem; + align-items: center; + padding-top: 1rem; + padding-bottom: 11rem; + font-size: 0.8rem; +} + +.message { + width: 96%; + max-width: 80rem; + display: grid; + background-color: var(--secondary-bg-color); + padding: 0.5rem 1rem; + border-radius: 10px; + transition: all 0.3s ease; +} + +.message-role-assistant { + border-bottom: 2px solid var(--primary-color); + border-left: 2px solid var(--primary-color); + box-shadow: -10px 10px 20px 2px var(--primary-color-transparent); + margin-right: auto; + margin-left: 2%; +} + +.message-role-user { + border-bottom: 2px solid var(--secondary-color); + border-right: 2px solid var(--secondary-color); + box-shadow: 10px 10px 20px 2px var(--secondary-color-transparent); + margin-left: auto; + margin-right: 2%; +} + +.message > pre { + white-space: pre-wrap; +} + +.hljs { + width: 100%; + position: relative; + border-radius: 10px; + white-space: pre-wrap; + background-color: var(--tertiary-bg-color); +} + +.clipboard-button { + position: absolute; + top: 0; + right: 0; + padding: 0.5rem; + margin: 0; + outline: none; + border: none; + background-color: var(--secondary-color); + color: var(--foreground-color); + border-radius: 0 0 0 10px; + cursor: pointer; + transition: 0.2s; +} + +.clipboard-button:hover { + background-color: var(--primary-color); + padding: 0.75rem; +} + +.input-container { + position: absolute; + bottom: 0; + background: linear-gradient( + 0deg, + var(--primary-bg-color) 55%, + transparent 100% + ); + width: 100%; + display: flex; + flex-direction: column; + justify-content: center; + align-items: center; + z-index: 999; +} + +.input-performance { + margin-top: 4rem; + display: flex; + flex-direction: row; + gap: 1rem; +} + +.input-performance-point { + display: flex; + flex-direction: row; + place-items: center; + gap: 0.5rem; +} + +.input-performance-point > p { + height: 1rem; + line-height: normal; +} + +.input { + width: 90%; + min-height: 3rem; + flex-shrink: 0; + display: flex; + flex-direction: row; + justify-content: center; + gap: 0.5rem; + align-items: flex-end; + margin-bottom: 2rem; +} + +.input-form { + width: 100%; + padding: 1rem; + min-height: 3rem; + max-height: 8rem; + background-color: var(--tertiary-bg-color); + color: var(--foreground-color); + border-radius: 10px; + border: none; + resize: none; + outline: none; + transition: all 0.3s ease; +} + +.input-form:focus { + box-shadow: 0 0 10px var(--accent-color); +} + +.input-button { + height: 3rem; + width: 4rem; + background-color: var(--secondary-color); + color: var(--foreground-color); + border-radius: 10px; + padding: 0.5rem; + cursor: pointer; + transition: all 0.3s ease; +} + +.input-button:hover { + background-color: var(--primary-color); + transform: scale(1.1); +} + +.input-button:disabled { + background-color: var(--secondary-bg-color); + cursor: not-allowed; +} + +p { + white-space: pre-wrap; +} + +.megrim-regular { + font-family: "Megrim", system-ui; + font-weight: 400; + font-style: normal; +} + +.monospace { + font-family: monospace; +} +.new-chat-button { + position: fixed; + top: 1rem; + right: 1rem; + width: 3rem; + height: 3rem; + background-color: var(--accent-color); + color: var(--foreground-color); + border: none; + border-radius: 1.5rem; + cursor: pointer; + font-size: 1rem; + display: flex; + align-items: center; + justify-content: center; + transition: all 0.3s ease; + z-index: 1000; + overflow: hidden; + padding: 0; +} + +.new-chat-button i { + font-size: 1.2rem; + display: flex; + justify-content: center; + align-items: center; + transition: transform 0.3s ease; + width: 3rem; + height: 3rem; +} + +.new-chat-text { + max-width: 0; + opacity: 0; + white-space: nowrap; + transition: + max-width 0.3s ease, + opacity 0.3s ease, + margin-left 0.3s ease; +} + +.new-chat-button:hover { + background-color: var(--primary-color); + width: auto; + padding-right: 1.5rem; +} + +.new-chat-button:hover .new-chat-text { + max-width: 100px; + opacity: 1; + margin-left: 0.5rem; +} + +.new-chat-button:hover i { + transform: rotate(90deg); + width: 2rem; +} diff --git a/static/favicon.ico b/static/favicon.ico new file mode 100644 index 0000000..7e37f57 Binary files /dev/null and b/static/favicon.ico differ diff --git a/static/js/index.js b/static/js/index.js new file mode 100644 index 0000000..df91971 --- /dev/null +++ b/static/js/index.js @@ -0,0 +1,175 @@ +document.addEventListener("alpine:init", () => { + Alpine.data("state", () => ({ + // current state + cstate: { + time: null, + messages: [], + }, + + // historical state + histories: JSON.parse(localStorage.getItem("histories")) || [], + + home: 0, + generating: false, + endpoint: window.location.origin + "/v1", + model: "llama3-8b-8192", // This doesen't matter anymore as the backend handles it now + stopToken: "<|eot_id|>", // We may need this for some models + + // performance tracking + time_till_first: 0, + tokens_per_second: 0, + total_tokens: 0, + + removeHistory(cstate) { + const index = this.histories.findIndex((state) => { + return state.time === cstate.time; + }); + if (index !== -1) { + this.histories.splice(index, 1); + localStorage.setItem("histories", JSON.stringify(this.histories)); + } + }, + + async handleSend() { + const el = document.getElementById("input-form"); + const value = el.value.trim(); + if (!value) return; + + if (this.generating) return; + this.generating = true; + if (this.home === 0) this.home = 1; + + // ensure that going back in history will go back to home + window.history.pushState({}, "", "/"); + + // add message to list + this.cstate.messages.push({ role: "user", content: value }); + + // clear textarea + el.value = ""; + el.style.height = "auto"; + el.style.height = el.scrollHeight + "px"; + + // reset performance tracking + const prefill_start = Date.now(); + let start_time = 0; + let tokens = 0; + this.tokens_per_second = 0; + + // start receiving server sent events + let gottenFirstChunk = false; + for await (const chunk of this.openaiChatCompletion( + this.cstate.messages, + )) { + if (!gottenFirstChunk) { + this.cstate.messages.push({ role: "assistant", content: "" }); + gottenFirstChunk = true; + } + + // add chunk to the last message + this.cstate.messages[this.cstate.messages.length - 1].content += chunk; + + // calculate performance tracking + tokens += 1; + this.total_tokens += 1; + if (start_time === 0) { + start_time = Date.now(); + this.time_till_first = start_time - prefill_start; + } else { + const diff = Date.now() - start_time; + if (diff > 0) { + this.tokens_per_second = tokens / (diff / 1000); + } + } + } + + // update the state in histories or add it if it doesn't exist + const index = this.histories.findIndex((cstate) => { + return cstate.time === this.cstate.time; + }); + this.cstate.time = Date.now(); + if (index !== -1) { + // update the time + this.histories[index] = this.cstate; + } else { + this.histories.push(this.cstate); + } + // update in local storage + localStorage.setItem("histories", JSON.stringify(this.histories)); + + this.generating = false; + }, + + async handleEnter(event) { + // if shift is not pressed + if (!event.shiftKey) { + event.preventDefault(); + await this.handleSend(); + } + }, + + updateTotalTokens(messages) { + fetch(`${window.location.origin}/v1/tokenizer/count`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ messages }), + }) + .then((response) => response.json()) + .then((data) => { + this.total_tokens = data.token_count; + }) + .catch(console.error); + }, + + async *openaiChatCompletion(messages) { + // stream response + const response = await fetch(`${this.endpoint}/chat/completions`, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${this.apiKey}`, + }, + body: JSON.stringify({ + model: this.model, + messages: messages, + stream: true, + stop: [this.stopToken], + }), + }); + if (!response.ok) { + throw new Error("Failed to fetch"); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder("utf-8"); + let buffer = ""; + + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split("\n"); + buffer = lines.pop(); + + for (const line of lines) { + if (line.startsWith("data: ")) { + const data = line.slice(6); + if (data === "[DONE]") { + return; + } + try { + const json = JSON.parse(data); + if (json.choices && json.choices[0].delta.content) { + yield json.choices[0].delta.content; + } + } catch (error) { + console.error("Error parsing JSON:", error); + } + } + } + } + }, + })); +}); diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..776b08c --- /dev/null +++ b/templates/index.html @@ -0,0 +1,178 @@ + + + + cchat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+

cchat

+
+ +
+ +
+ +
+
+
+
+
+ +

+

TTFT

+
+ +

+

TOKENS/SEC

+
+ +

+

TOKENS

+
+
+
+ + +
+
+
+ + +