Skip to content

Commit 7d98090

Browse files
authored
Make API and server compatible with OpenAI API (#1042)
1 parent e3ec7ac commit 7d98090

File tree

3 files changed

+61
-35
lines changed

3 files changed

+61
-35
lines changed

README.md

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -181,16 +181,6 @@ This mode generates text based on an input prompt.
181181
python3 torchchat.py generate llama3.1 --prompt "write me a story about a boy and his bear"
182182
```
183183

184-
### Browser
185-
This mode allows you to chat with the model using a UI in your browser
186-
Running the command automatically open a tab in your browser.
187-
188-
[skip default]: begin
189-
190-
```
191-
streamlit run torchchat.py -- browser llama3.1
192-
```
193-
194184
[skip default]: end
195185

196186
### Server
@@ -252,6 +242,19 @@ curl http://127.0.0.1:5000/v1/chat \
252242

253243
</details>
254244

245+
### Browser
246+
This command opens a basic browser interface for local chat by querying a local server.
247+
248+
First, follow the steps in the Server section above to start a local server. Then, in another terminal, launch the interface. Running the following will open a tab in your browser.
249+
250+
[skip default]: begin
251+
252+
```
253+
streamlit run browser/browser.py
254+
```
255+
256+
Use the "Max Response Tokens" slider to limit the maximum number of tokens generated by the model for each response. Click the "Reset Chat" button to remove the message history and start a fresh chat.
257+
255258

256259
## Desktop/Server Execution
257260

browser/browser.py

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,66 @@
11
import streamlit as st
22
from openai import OpenAI
33

4+
st.title("torchchat")
5+
6+
start_state = [
7+
{
8+
"role": "system",
9+
"content": "You're an assistant. Answer questions directly, be brief, and have fun.",
10+
},
11+
{"role": "assistant", "content": "How can I help you?"},
12+
]
13+
414
with st.sidebar:
5-
openai_api_key = st.text_input(
6-
"OpenAI API Key", key="chatbot_api_key", type="password"
15+
response_max_tokens = st.slider(
16+
"Max Response Tokens", min_value=10, max_value=1000, value=250, step=10
717
)
8-
"[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
9-
"[View the source code](https://github.com/streamlit/llm-examples/blob/main/Chatbot.py)"
10-
"[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/streamlit/llm-examples?quickstart=1)"
11-
12-
st.title("💬 Chatbot")
18+
if st.button("Reset Chat", type="primary"):
19+
st.session_state["messages"] = start_state
1320

1421
if "messages" not in st.session_state:
15-
st.session_state["messages"] = [
16-
{
17-
"role": "system",
18-
"content": "You're an assistant. Be brief, no yapping. Use as few words as possible to respond to the users' questions.",
19-
},
20-
{"role": "assistant", "content": "How can I help you?"},
21-
]
22+
st.session_state["messages"] = start_state
23+
2224

2325
for msg in st.session_state.messages:
2426
st.chat_message(msg["role"]).write(msg["content"])
2527

2628
if prompt := st.chat_input():
2729
client = OpenAI(
28-
# This is the default and can be omitted
2930
base_url="http://127.0.0.1:5000/v1",
30-
api_key="YOURMOTHER",
31+
api_key="813", # The OpenAI API requires an API key, but since we don't consume it, this can be any non-empty string.
3132
)
3233

3334
st.session_state.messages.append({"role": "user", "content": prompt})
3435
st.chat_message("user").write(prompt)
35-
response = client.chat.completions.create(
36-
model="stories15m", messages=st.session_state.messages, max_tokens=64
37-
)
38-
msg = response.choices[0].message.content
39-
st.session_state.messages.append({"role": "assistant", "content": msg})
40-
st.chat_message("assistant").write(msg)
36+
37+
with st.chat_message("assistant"), st.status(
38+
"Generating... ", expanded=True
39+
) as status:
40+
41+
def get_streamed_completion(completion_generator):
42+
start = time.time()
43+
tokcount = 0
44+
for chunk in completion_generator:
45+
tokcount += 1
46+
yield chunk.choices[0].delta.content
47+
48+
status.update(
49+
label="Done, averaged {:.2f} tokens/second".format(
50+
tokcount / (time.time() - start)
51+
),
52+
state="complete",
53+
)
54+
55+
response = st.write_stream(
56+
get_streamed_completion(
57+
client.chat.completions.create(
58+
model="llama3",
59+
messages=st.session_state.messages,
60+
max_tokens=response_max_tokens,
61+
stream=True,
62+
)
63+
)
64+
)[0]
65+
66+
st.session_state.messages.append({"role": "assistant", "content": response})

server.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,6 @@ def chunk_processor(chunked_completion_generator):
7575
next_tok = ""
7676
print(next_tok, end="", flush=True)
7777
yield f"data:{json.dumps(_del_none(asdict(chunk)))}\n\n"
78-
# wasda = json.dumps(asdict(chunk))
79-
# print(wasda)
80-
# yield wasda
8178

8279
resp = Response(
8380
chunk_processor(gen.chunked_completion(req)),

0 commit comments

Comments
 (0)