Skip to content

Server UI improvements #4236

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 42 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
2566e53
ic
Nov 27, 2023
4d15130
add start script
Nov 27, 2023
9abb310
Update index.html
mounta11n Nov 27, 2023
e7cfe1f
add favicon
Nov 27, 2023
ca22eb6
Merge pull request #1 from mounta11n/server-ui-improvements
mounta11n Nov 27, 2023
a28935f
Update README.md
mounta11n Nov 27, 2023
1bc9ca6
add ui and tui pics
Nov 27, 2023
25ed0c4
add ui and tui pics
Nov 27, 2023
1bb2df7
Update README.md
mounta11n Nov 27, 2023
49d7c07
Update README.md
mounta11n Nov 27, 2023
cf8cb0d
fix multi-modal-selection
Nov 27, 2023
09e3b50
fix wrong formattings
Nov 27, 2023
d568327
fix wrong translation
Nov 27, 2023
c161ad2
add mmproj function
Nov 27, 2023
809b269
Merge branch 'ggerganov:master' into master
mounta11n Nov 27, 2023
d55b482
Merge pull request #2 from mounta11n/server-ui-improvements
mounta11n Nov 27, 2023
464f073
add min-p
Nov 27, 2023
c48f3f2
Merge pull request #3 from mounta11n/server-ui-improvements
mounta11n Nov 27, 2023
082b335
Update README.md
mounta11n Nov 27, 2023
ecb3973
add min-p image
Nov 27, 2023
6c318b5
Update README.md
mounta11n Nov 27, 2023
ae096d0
Merge branch 'ggerganov:master' into master
mounta11n Nov 27, 2023
1b6d422
add start scripts to root path
Nov 27, 2023
4fa32ad
update
Nov 27, 2023
e056b06
error handling for missing dialog
Nov 27, 2023
57f8edd
fix start-server.sh
Nov 28, 2023
9dcb514
update start server scripts
Nov 28, 2023
2e4c05e
Update promptFormats.js
mounta11n Nov 28, 2023
116fc90
Update promptFormats.js
mounta11n Nov 28, 2023
5ac0f30
improve error handling
Nov 28, 2023
c96c458
Merge branch 'ggerganov:master' into master
mounta11n Nov 28, 2023
3a15b28
fix typo
mounta11n Nov 28, 2023
c8d847d
Merge branch 'master' into server-ui-improvements
mounta11n Nov 28, 2023
45f0415
Update README.md
mounta11n Dec 1, 2023
97f4ec4
Merge branch 'ggerganov:master' into server-ui-improvements
mounta11n Dec 1, 2023
fe6f5e1
add dark and light color themes
mounta11n Dec 1, 2023
b564660
Merge remote-tracking branch
mounta11n Dec 5, 2023
87fda0d
debugging
mounta11n Dec 5, 2023
4f88c2c
fix mistral prompt
mounta11n Dec 19, 2023
98366a4
sync gitignore
mounta11n Dec 20, 2023
2017112
clean up & split PRs
mounta11n Dec 21, 2023
4a8fd55
add tinyllama chat prompt
mounta11n Dec 21, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 0 additions & 23 deletions .clang-tidy

This file was deleted.

6 changes: 6 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,14 @@ indent_size = 4
[Makefile]
indent_style = tab

[scripts/*.mk]
indent_style = tab

[prompts/*.txt]
insert_final_newline = unset

[examples/server/public/*]
indent_size = 2

[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
indent_style = tab
15 changes: 11 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ jobs:
cd build
ctest --verbose

# TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
# how to debug it.
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124
macOS-latest-make:
runs-on: macos-latest

Expand All @@ -160,14 +163,18 @@ jobs:
- name: Build
id: make_build
run: |
make -j $(sysctl -n hw.logicalcpu)
LLAMA_NO_METAL=1 make -j $(sysctl -n hw.logicalcpu)

- name: Test
id: make_test
run: |
make tests -j $(sysctl -n hw.logicalcpu)
make test -j $(sysctl -n hw.logicalcpu)
LLAMA_NO_METAL=1 make tests -j $(sysctl -n hw.logicalcpu)
LLAMA_NO_METAL=1 make test -j $(sysctl -n hw.logicalcpu)

# TODO: build with LLAMA_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
# how to debug it.
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
# would be great if we fix these
macOS-latest-cmake:
runs-on: macos-latest

Expand All @@ -188,7 +195,7 @@ jobs:
sysctl -a
mkdir build
cd build
cmake ..
cmake -DLLAMA_METAL=OFF ..
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)

- name: Test
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,4 @@ poetry.toml
/tests/test-tokenizer-1-llama
/tests/test-tokenizer-1-bpe
/tests/test-rope
/tests/test-backend-ops
152 changes: 104 additions & 48 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,9 @@ option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging"
option(LLAMA_MPI "llama: use MPI" OFF)
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)

option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_SERVER "llama: build server example" ON)
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_SERVER "llama: build server example" ON)

# Required for relocatable CMake package
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
Expand Down Expand Up @@ -291,7 +291,12 @@ if (LLAMA_CUBLAS)
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${LLAMA_CUDA_PEER_MAX_BATCH_SIZE})

if (LLAMA_STATIC)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
if (WIN32)
# As of 12.3.1 CUDA Tookit for Windows does not offer a static cublas library
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
else ()
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
endif()
else()
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
endif()
Expand Down Expand Up @@ -397,57 +402,102 @@ if (LLAMA_HIPBLAS)
endif()
endif()

function(get_flags CCID CCVER)
set(C_FLAGS "")
set(CXX_FLAGS "")

if (CCID MATCHES "Clang")
set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi)

if (
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
)
set(C_FLAGS ${C_FLAGS} -Wdouble-promotion)
endif()
elseif (CCID STREQUAL "GNU")
set(C_FLAGS -Wdouble-promotion)
set(CXX_FLAGS -Wno-array-bounds)

if (CCVER VERSION_GREATER_EQUAL 7.1.0)
set(CXX_FLAGS ${CXX_FLAGS} -Wno-format-truncation)
endif()
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
set(CXX_FLAGS ${CXX_FLAGS} -Wextra-semi)
endif()
endif()

set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
endfunction()

if (LLAMA_ALL_WARNINGS)
if (NOT MSVC)
set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration)
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
set(host_cxx_flags "")

if (CMAKE_C_COMPILER_ID MATCHES "Clang")
set(warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return)
set(host_cxx_flags ${host_cxx_flags} -Wmissing-prototypes -Wextra-semi)

if (
(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0) OR
(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 7.3.0)
)
set(c_flags ${c_flags} -Wdouble-promotion)
endif()
elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU")
set(c_flags ${c_flags} -Wdouble-promotion)
set(host_cxx_flags ${host_cxx_flags} -Wno-array-bounds)
set(WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
set(C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
-Werror=implicit-int -Werror=implicit-function-declaration)
set(CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)

if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0)
set(host_cxx_flags ${host_cxx_flags} -Wno-format-truncation)
endif()
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0)
set(host_cxx_flags ${host_cxx_flags} -Wextra-semi)
endif()
endif()
set(C_FLAGS ${WARNING_FLAGS} ${C_FLAGS})
set(CXX_FLAGS ${WARNING_FLAGS} ${CXX_FLAGS})

get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})

add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
"$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
else()
# todo : msvc
set(C_FLAGS "")
set(CXX_FLAGS "")
endif()
endif()

if (LLAMA_CUBLAS)
set(CUDA_FLAGS ${CXX_FLAGS} -use_fast_math)
if (NOT MSVC)
set(CUDA_FLAGS ${CUDA_FLAGS} -Wno-pedantic)
endif()

set(c_flags ${c_flags} ${warning_flags})
set(cxx_flags ${cxx_flags} ${warning_flags})
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
"$<$<COMPILE_LANGUAGE:CXX>:${host_cxx_flags}>")
if (LLAMA_ALL_WARNINGS AND NOT MSVC)
set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
set(NVCC_CMD ${NVCC_CMD} -ccbin ${CMAKE_CUDA_HOST_COMPILER})
endif()

endif()
execute_process(
COMMAND ${NVCC_CMD} -Xcompiler --version
OUTPUT_VARIABLE CUDA_CCFULLVER
ERROR_QUIET
)

if (NOT MSVC)
set(cuda_flags -Wno-pedantic)
endif()
set(cuda_flags ${cxx_flags} -use_fast_math ${cuda_flags})
if (NOT CUDA_CCFULLVER MATCHES clang)
set(CUDA_CCID "GNU")
execute_process(
COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion"
OUTPUT_VARIABLE CUDA_CCVER
ERROR_QUIET
)
else()
if (CUDA_CCFULLVER MATCHES Apple)
set(CUDA_CCID "AppleClang")
else()
set(CUDA_CCID "Clang")
endif()
string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER})
endif()

list(JOIN host_cxx_flags " " cuda_host_flags) # pass host compiler flags as a single argument
if (NOT cuda_host_flags STREQUAL "")
set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_host_flags})
endif()
message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")

get_flags(${CUDA_CCID} ${CUDA_CCVER})
list(JOIN GF_CXX_FLAGS " " CUDA_CXX_FLAGS) # pass host compiler flags as a single argument
if (NOT CUDA_CXX_FLAGS STREQUAL "")
set(CUDA_FLAGS ${CUDA_FLAGS} -Xcompiler ${CUDA_CXX_FLAGS})
endif()
endif()

add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>")
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
endif()

if (WIN32)
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
Expand All @@ -471,6 +521,7 @@ endif()
execute_process(
COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v
ERROR_VARIABLE output
OUTPUT_QUIET
)
if (output MATCHES "dyld-1015\.7")
add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
Expand Down Expand Up @@ -593,6 +644,11 @@ else()
message(STATUS "Unknown architecture")
endif()

if (MINGW)
# Target Windows 8 for PrefetchVirtualMemory
add_compile_definitions(_WIN32_WINNT=0x602)
endif()

#
# POSIX conformance
#
Expand Down Expand Up @@ -662,11 +718,11 @@ add_library(ggml OBJECT
ggml-backend.h
ggml-quants.c
ggml-quants.h
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
)

target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
Expand Down
Loading