Skip to content

Commit 9e3c483

Browse files
authored
Merge pull request #16 from apicalshark/temp
Temp
2 parents c9f3add + c0d480a commit 9e3c483

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+30973
-2573
lines changed

.editorconfig

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,16 @@ insert_final_newline = unset
2424
[examples/server/public/*]
2525
indent_size = 2
2626

27+
[examples/server/public/deps_*]
28+
trim_trailing_whitespace = unset
29+
indent_style = unset
30+
indent_size = unset
31+
32+
[examples/server/deps_*]
33+
trim_trailing_whitespace = unset
34+
indent_style = unset
35+
indent_size = unset
36+
2737
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
2838
indent_style = tab
2939

.github/workflows/build.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,14 @@ env:
6363

6464
jobs:
6565

66+
# TODO: build with GGML_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
67+
# how to debug it.
68+
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124
69+
70+
# TODO: build with GGML_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
71+
# how to debug it.
72+
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
73+
# would be great if we fix these
6674

6775
# CUDA Release
6876

@@ -232,6 +240,7 @@ jobs:
232240

233241
release:
234242
permissions: write-all
243+
235244
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
236245

237246
runs-on: ubuntu-latest

Makefile

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1455,22 +1455,13 @@ llama-server: \
14551455
examples/server/server.cpp \
14561456
examples/server/utils.hpp \
14571457
examples/server/httplib.h \
1458-
examples/server/colorthemes.css.hpp \
1459-
examples/server/style.css.hpp \
1460-
examples/server/theme-beeninorder.css.hpp \
1461-
examples/server/theme-ketivah.css.hpp \
1462-
examples/server/theme-mangotango.css.hpp \
1463-
examples/server/theme-playground.css.hpp \
1464-
examples/server/theme-polarnight.css.hpp \
1465-
examples/server/theme-snowstorm.css.hpp \
14661458
examples/server/index.html.hpp \
1467-
examples/server/index-new.html.hpp \
1468-
examples/server/index.js.hpp \
14691459
examples/server/completion.js.hpp \
1470-
examples/server/system-prompts.js.hpp \
1471-
examples/server/prompt-formats.js.hpp \
1472-
examples/server/json-schema-to-grammar.mjs.hpp \
14731460
examples/server/loading.html.hpp \
1461+
examples/server/deps_daisyui.min.css.hpp \
1462+
examples/server/deps_markdown-it.js.hpp \
1463+
examples/server/deps_tailwindcss.js.hpp \
1464+
examples/server/deps_vue.esm-browser.js.hpp \
14741465
common/json.hpp \
14751466
common/stb_image.h \
14761467
$(OBJ_ALL)

convert_hf_to_gguf.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3748,10 +3748,7 @@ def __init__(self, *args, **kwargs):
37483748

37493749
# Embeddings scale
37503750
self.embeddings_scale = 1.0
3751-
# note: For some JAIS flavors, output is tied to (same as) wte in original model
3752-
self.output_is_wte = False
37533751
if 'mup_embeddings_scale' in self.hparams:
3754-
self.output_is_wte = True # Hack (?)
37553752
self.embeddings_scale = self.hparams['mup_embeddings_scale']
37563753
elif 'embeddings_scale' in self.hparams:
37573754
self.embeddings_scale = self.hparams['embeddings_scale']
@@ -3808,10 +3805,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
38083805

38093806
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
38103807
tensors.append((new_name, data_torch * self.embeddings_scale))
3811-
if self.output_is_wte:
3812-
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
38133808
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
3814-
assert not self.output_is_wte
38153809
tensors.append((new_name, data_torch * self.width_scale))
38163810
else:
38173811
tensors.append((new_name, data_torch))

docs/backend/SYCL.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ found 2 SYCL devices:
377377

378378
|Chosen Device ID|Setting|
379379
|-|-|
380-
|0|`export ONEAPI_DEVICE_SELECTOR="level_zero:1"` or no action|
380+
|0|`export ONEAPI_DEVICE_SELECTOR="level_zero:0"` or no action|
381381
|1|`export ONEAPI_DEVICE_SELECTOR="level_zero:1"`|
382382
|0 & 1|`export ONEAPI_DEVICE_SELECTOR="level_zero:0;level_zero:1"`|
383383

examples/server/CMakeLists.txt

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,13 @@ set(TARGET_SRCS
1515
httplib.h
1616
)
1717
set(PUBLIC_ASSETS
18-
colorthemes.css
19-
style.css
20-
theme-beeninorder.css
21-
theme-ketivah.css
22-
theme-mangotango.css
23-
theme-playground.css
24-
theme-polarnight.css
25-
theme-snowstorm.css
2618
index.html
27-
index-new.html
28-
index.js
2919
completion.js
30-
system-prompts.js
31-
prompt-formats.js
32-
json-schema-to-grammar.mjs
3320
loading.html
21+
deps_daisyui.min.css
22+
deps_markdown-it.js
23+
deps_tailwindcss.js
24+
deps_vue.esm-browser.js
3425
)
3526

3627
foreach(asset ${PUBLIC_ASSETS})

examples/server/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,16 @@ Apart from error types supported by OAI, we also have custom types that are spec
928928
}
929929
```
930930

931+
### Legacy completion web UI
932+
933+
A new chat-based UI has replaced the old completion-based since [this PR](https://github.com/ggerganov/llama.cpp/pull/10175). If you want to use the old completion, start the server with `--path ./examples/server/public_legacy`
934+
935+
For example:
936+
937+
```sh
938+
./llama-server -m my_model.gguf -c 8192 --path ./examples/server/public_legacy
939+
```
940+
931941
### Extending or building alternative Web Front End
932942

933943
You can extend the front end by running the server binary with `--path` set to `./your-directory` and importing `/completion.js` to get access to the llamaComplete() method.

examples/server/chat.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import * as readline from 'node:readline'
22
import { stdin, stdout } from 'node:process'
33
import { readFileSync } from 'node:fs'
4-
import { SchemaConverter } from './public/json-schema-to-grammar.mjs'
4+
import { SchemaConverter } from './public_legacy/json-schema-to-grammar.mjs'
55

66
const args = process.argv.slice(2);
77
const grammarJsonSchemaFile = args.find(

examples/server/deps.sh

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,20 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
66
PUBLIC=$DIR/public
77

88
echo "download js bundle files"
9-
curl https://npm.reversehttp.com/@preact/signals-core,@preact/signals,htm/preact,preact,preact/hooks > $PUBLIC/index.js
10-
echo >> $PUBLIC/index.js # add newline
9+
10+
# Note for contributors: Always pin to a specific version "maj.min.patch" to avoid breaking the CI
11+
12+
curl -L https://cdn.tailwindcss.com/3.4.14 > $PUBLIC/deps_tailwindcss.js
13+
echo >> $PUBLIC/deps_tailwindcss.js # add newline
14+
15+
curl -L https://cdnjs.cloudflare.com/ajax/libs/daisyui/4.12.14/styled.min.css > $PUBLIC/deps_daisyui.min.css
16+
curl -L https://cdnjs.cloudflare.com/ajax/libs/daisyui/4.12.14/themes.min.css >> $PUBLIC/deps_daisyui.min.css
17+
echo >> $PUBLIC/deps_daisyui.min.css # add newline
18+
19+
curl -L https://unpkg.com/[email protected]/dist/vue.esm-browser.js > $PUBLIC/deps_vue.esm-browser.js
20+
echo >> $PUBLIC/deps_vue.esm-browser.js # add newline
21+
22+
curl -L https://cdnjs.cloudflare.com/ajax/libs/markdown-it/13.0.2/markdown-it.js > $PUBLIC/deps_markdown-it.js
23+
echo >> $PUBLIC/deps_markdown-it.js # add newline
24+
25+
ls -lah $PUBLIC

examples/server/public/completion.js

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
const paramDefaults = {
22
stream: true,
3-
n_predict: 500,
43
temperature: 0.2,
5-
stop: ["</s>"]
64
};
75

86
let generation_settings = null;
97

8+
export class CompletionError extends Error {
9+
constructor(message, name, data) {
10+
super(message);
11+
this.name = name;
12+
}
13+
};
1014

1115
// Completes the prompt as a generator. Recommended for most use cases.
1216
//
@@ -29,7 +33,7 @@ export async function* llama(prompt, params = {}, config = {}) {
2933

3034
const completionParams = { ...paramDefaults, ...params, prompt };
3135

32-
const response = await fetch(`${api_url}/completion`, {
36+
const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, {
3337
method: 'POST',
3438
body: JSON.stringify(completionParams),
3539
headers: {
@@ -41,6 +45,18 @@ export async function* llama(prompt, params = {}, config = {}) {
4145
signal: controller.signal,
4246
});
4347

48+
const status = response.status;
49+
if (status !== 200) {
50+
try {
51+
const body = await response.json();
52+
if (body && body.error && body.error.message) {
53+
throw new CompletionError(body.error.message, 'ServerError');
54+
}
55+
} catch (err) {
56+
throw new CompletionError(err.message, 'ServerError');
57+
}
58+
}
59+
4460
const reader = response.body.getReader();
4561
const decoder = new TextDecoder();
4662

@@ -78,7 +94,12 @@ export async function* llama(prompt, params = {}, config = {}) {
7894
for (const line of lines) {
7995
const match = regex.exec(line);
8096
if (match) {
81-
result[match[1]] = match[2]
97+
result[match[1]] = match[2];
98+
if (result.data === '[DONE]') {
99+
cont = false;
100+
break;
101+
}
102+
82103
// since we know this is llama.cpp, let's just decode the json in data
83104
if (result.data) {
84105
result.data = JSON.parse(result.data);

examples/server/public/deps_daisyui.min.css

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)