Skip to content

Commit 1c57f8d

Browse files
authored
feat(sycl): Add support for Intel GPUs with sycl (#1647) (#1660)
* feat(sycl): Add sycl support (#1647) * onekit: install without prompts * set cmake args only in grpc-server Signed-off-by: Ettore Di Giacinto <[email protected]> * cleanup * fixup sycl source env * Cleanup docs * ci: runs on self-hosted * fix typo * bump llama.cpp * llama.cpp: update server * adapt to upstream changes * adapt to upstream changes * docs: add sycl --------- Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent 16cebf0 commit 1c57f8d

File tree

13 files changed

+917
-755
lines changed

13 files changed

+917
-755
lines changed

.github/workflows/image-pr.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,13 @@ jobs:
7575
ffmpeg: 'true'
7676
image-type: 'core'
7777
runs-on: 'ubuntu-latest'
78+
- build-type: 'sycl_f16'
79+
platforms: 'linux/amd64'
80+
tag-latest: 'false'
81+
tag-suffix: 'sycl-f16-ffmpeg-core'
82+
ffmpeg: 'true'
83+
image-type: 'core'
84+
runs-on: 'arc-runner-set'
7885
- build-type: 'cublas'
7986
cuda-major-version: "12"
8087
cuda-minor-version: "1"

.github/workflows/image.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,34 @@ jobs:
122122
ffmpeg: 'true'
123123
image-type: 'core'
124124
runs-on: 'ubuntu-latest'
125+
- build-type: 'sycl_f16'
126+
platforms: 'linux/amd64'
127+
tag-latest: 'false'
128+
tag-suffix: 'sycl-f16-core'
129+
ffmpeg: 'false'
130+
image-type: 'core'
131+
runs-on: 'arc-runner-set'
132+
- build-type: 'sycl_f32'
133+
platforms: 'linux/amd64'
134+
tag-latest: 'false'
135+
tag-suffix: 'sycl-f32-core'
136+
ffmpeg: 'false'
137+
image-type: 'core'
138+
runs-on: 'arc-runner-set'
139+
- build-type: 'sycl_f16'
140+
platforms: 'linux/amd64'
141+
tag-latest: 'false'
142+
tag-suffix: 'sycl-f16-ffmpeg-core'
143+
ffmpeg: 'true'
144+
image-type: 'core'
145+
runs-on: 'arc-runner-set'
146+
- build-type: 'sycl_f32'
147+
platforms: 'linux/amd64'
148+
tag-latest: 'false'
149+
tag-suffix: 'sycl-f32-ffmpeg-core'
150+
ffmpeg: 'true'
151+
image-type: 'core'
152+
runs-on: 'arc-runner-set'
125153
- build-type: 'cublas'
126154
cuda-major-version: "11"
127155
cuda-minor-version: "7"

Dockerfile

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ ARG GO_VERSION=1.21-bullseye
22
ARG IMAGE_TYPE=extras
33
# extras or core
44

5-
65
FROM golang:$GO_VERSION as requirements-core
76

87
ARG BUILD_TYPE
@@ -38,6 +37,14 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
3837
apt-get update && \
3938
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
4039
; fi
40+
41+
# oneapi requirements
42+
RUN if [ "${BUILD_TYPE}" = "sycl_f16" ] || [ "${BUILD_TYPE}" = "sycl_f32" ]; then \
43+
wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/163da6e4-56eb-4948-aba3-debcec61c064/l_BaseKit_p_2024.0.1.46_offline.sh && \
44+
sh ./l_BaseKit_p_2024.0.1.46_offline.sh -a -s --eula accept && \
45+
rm -rf l_BaseKit_p_2024.0.1.46_offline.sh \
46+
; fi
47+
4148
ENV PATH /usr/local/cuda/bin:${PATH}
4249

4350
# OpenBLAS requirements and stable diffusion

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0
88

99
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
1010

11-
CPPLLAMA_VERSION?=e0085fdf7c758f0bc2746fc106fb29dd9df959de
11+
CPPLLAMA_VERSION?=1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
1212

1313
# gpt4all version
1414
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

backend/cpp/llama/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ add_library(hw_grpc_proto
7070
${hw_proto_srcs}
7171
${hw_proto_hdrs} )
7272

73-
add_executable(${TARGET} grpc-server.cpp json.hpp )
73+
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
7474
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
7575
absl::flags_parse
7676
gRPC::${_REFLECTION}

backend/cpp/llama/Makefile

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ LLAMA_VERSION?=
33

44
CMAKE_ARGS?=
55
BUILD_TYPE?=
6+
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
67

78
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
89
ifeq ($(BUILD_TYPE),cublas)
@@ -19,6 +20,14 @@ else ifeq ($(BUILD_TYPE),hipblas)
1920
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
2021
endif
2122

23+
ifeq ($(BUILD_TYPE),sycl_f16)
24+
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
25+
endif
26+
27+
ifeq ($(BUILD_TYPE),sycl_f32)
28+
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
29+
endif
30+
2231
llama.cpp:
2332
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
2433
if [ -z "$(LLAMA_VERSION)" ]; then \
@@ -31,6 +40,7 @@ llama.cpp/examples/grpc-server:
3140
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
3241
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
3342
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
43+
cp -rfv $(abspath ./)/utils.hpp llama.cpp/examples/grpc-server/
3444
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
3545
## XXX: In some versions of CMake clip wasn't being built before llama.
3646
## This is an hack for now, but it should be fixed in the future.
@@ -49,5 +59,10 @@ clean:
4959
rm -rf grpc-server
5060

5161
grpc-server: llama.cpp llama.cpp/examples/grpc-server
62+
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
63+
bash -c "source $(ONEAPI_VARS); \
64+
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release"
65+
else
5266
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
67+
endif
5368
cp llama.cpp/build/bin/grpc-server .

0 commit comments

Comments
 (0)