Skip to content

Fixing the static-linking. #547

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 9 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,22 @@ thiserror = "1.0"
rand = "0.9"
serial_test = "2.0.0"
cudarc = { version = "0.13" , features =["cuda-12020"]}
intel-mkl-src = { version = "0.8" }
candle = { version = "0.8", package = "candle-core" }
candle-nn = { version = "0.8", package = "candle-nn" }
candle-transformers = { version = "0.8", package = "candle-transformers" }
candle-flash-attn = { version = "0.8", package = "candle-flash-attn" }
half = { version = "2.3.1", features = ["num-traits"] }

[patch.crates-io]
cudarc = { git = "https://github.com/Narsil/cudarc" , rev = "1956436aeddea1da04fc3226282bc07c07eeaa35"}
candle = { git = "https://github.com/Narsil/candle", rev = "2e273ddf31b1b796d3cfcd181ccb98deaa48466e", package = "candle-core" }
candle-nn = { git = "https://github.com/Narsil/candle", rev = "2e273ddf31b1b796d3cfcd181ccb98deaa48466e", package = "candle-nn" }
candle-transformers = { git = "https://github.com/Narsil/candle", rev = "2e273ddf31b1b796d3cfcd181ccb98deaa48466e", package = "candle-transformers" }
candle-flash-attn = { git = "https://github.com/Narsil/candle", rev = "2e273ddf31b1b796d3cfcd181ccb98deaa48466e", package = "candle-flash-attn" }
cudarc = { git = "https://github.com/Narsil/cudarc" , rev = "18ae111a4e8779c11377636b9cc3379f686e99c6"}
candle = { git = "https://github.com/huggingface/candle", rev = "ec6d7ca7738f4052b6613edc8f4d2bb6866a7539", package = "candle-core" }
candle-nn = { git = "https://github.com/huggingface/candle", rev = "ec6d7ca7738f4052b6613edc8f4d2bb6866a7539", package = "candle-nn" }
candle-transformers = { git = "https://github.com/huggingface/candle", rev = "ec6d7ca7738f4052b6613edc8f4d2bb6866a7539", package = "candle-transformers" }
candle-flash-attn = { git = "https://github.com/huggingface/candle", rev = "ec6d7ca7738f4052b6613edc8f4d2bb6866a7539", package = "candle-flash-attn" }
# candle = { path = "../candle/candle-core", package = "candle-core" }
# candle-nn = { path = "../candle/candle-nn" }
# candle-flash-attn = { path = "../candle/candle-flash-attn" }

[profile.release]
debug = 0
Expand Down
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ COPY --from=planner /usr/src/recipe.json recipe.json

RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
cargo chef cook --release --features ort --features candle --features mkl-dynamic --no-default-features --recipe-path recipe.json && sccache -s
cargo chef cook --release --features ort,candle,mkl --no-default-features --recipe-path recipe.json && sccache -s

COPY backends backends
COPY core core
Expand All @@ -56,7 +56,7 @@ FROM builder AS http-builder

RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
cargo build --release --bin text-embeddings-router -F ort -F candle -F mkl-dynamic -F http --no-default-features && sccache -s
cargo build --release --bin text-embeddings-router --features ort,candle,mkl,http --no-default-features && sccache -s

FROM builder AS grpc-builder

Expand All @@ -70,7 +70,7 @@ COPY proto proto

RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
cargo build --release --bin text-embeddings-router -F grpc -F ort -F candle -F mkl-dynamic --no-default-features && sccache -s
cargo build --release --bin text-embeddings-router --features ort,candle,mkl,grpc --no-default-features && sccache -s

FROM debian:bookworm-slim AS base

Expand Down
1 change: 0 additions & 1 deletion backends/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ candle = ["dep:text-embeddings-backend-candle"]
cuda = ["text-embeddings-backend-candle?/cuda"]
metal = ["text-embeddings-backend-candle?/metal"]
mkl = ["text-embeddings-backend-candle?/mkl"]
mkl-dynamic = ["text-embeddings-backend-candle?/mkl-dynamic"]
accelerate = ["text-embeddings-backend-candle?/accelerate"]
flash-attn = ["text-embeddings-backend-candle?/flash-attn"]
flash-attn-v1 = ["text-embeddings-backend-candle?/flash-attn-v1"]
7 changes: 3 additions & 4 deletions backends/candle/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ homepage.workspace = true
[dependencies]
anyhow = { workspace = true }
accelerate-src = { version = "0.3.2", optional = true }
intel-mkl-src = { version = "0.8.1", optional = true }
intel-mkl-src = { workspace = true, optional = true }
candle = { workspace = true }
candle-nn = { workspace = true }
candle-transformers = { workspace = true }
Expand Down Expand Up @@ -40,8 +40,7 @@ anyhow = { version = "1", features = ["backtrace"] }
[features]
accelerate = ["dep:accelerate-src", "candle/accelerate", "candle-nn/accelerate"]
metal = ["candle/metal", "candle-nn/metal"]
mkl = ["dep:intel-mkl-src", "intel-mkl-src/mkl-static-lp64-iomp", "candle/mkl"]
mkl-dynamic = ["dep:intel-mkl-src", "intel-mkl-src/mkl-dynamic-lp64-iomp", "candle/mkl"]
cuda = ["candle/cuda", "candle-nn/cuda", "dep:candle-cublaslt", "dep:candle-layer-norm", "dep:candle-rotary"]
mkl = ["dep:intel-mkl-src", "candle/_mkl"]
cuda = ["candle/_cuda", "candle-nn/_cuda", "dep:candle-cublaslt", "dep:candle-layer-norm", "dep:candle-rotary"]
flash-attn-v1 = ["dep:candle-flash-attn-v1", "cuda"]
flash-attn = ["dep:candle-flash-attn", "cuda"]
2 changes: 1 addition & 1 deletion backends/candle/src/models/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#[cfg(any(feature = "mkl", feature = "mkl-dynamic"))]
#[cfg(feature = "mkl")]
extern crate intel_mkl_src;

#[cfg(feature = "accelerate")]
Expand Down
8 changes: 1 addition & 7 deletions backends/src/dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,13 @@ impl fmt::Display for DType {
#[allow(clippy::derivable_impls)]
impl Default for DType {
fn default() -> Self {
#[cfg(any(
feature = "accelerate",
feature = "mkl",
feature = "mkl-dynamic",
feature = "ort"
))]
#[cfg(any(feature = "accelerate", feature = "mkl", feature = "ort"))]
{
DType::Float32
}
#[cfg(not(any(
feature = "accelerate",
feature = "mkl",
feature = "mkl-dynamic",
feature = "ort",
feature = "python"
)))]
Expand Down
2 changes: 1 addition & 1 deletion candle-extensions/candle-cublaslt/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ edition = "2021"
description = "CUBLASLt gemm for the candle ML framework."

[dependencies]
candle = { workspace=true, features = ["cuda"]}
candle = { workspace=true, features = ["_cuda"]}
cudarc = { workspace = true, features = [ "cublaslt", "f16" ]}
half = { workspace = true}
54 changes: 41 additions & 13 deletions candle-extensions/candle-flash-attn-v1/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,39 @@
// variable in order to cache the compiled artifacts and avoid recompiling too often.
use anyhow::{Context, Result};
use rayon::prelude::*;
use std::fs;
use std::path::PathBuf;
use std::str::FromStr;

const KERNEL_FILES: [&str; 4] = [
"flash_api.cu",
"fmha_fwd_hdim32.cu",
"fmha_fwd_hdim64.cu",
"fmha_fwd_hdim128.cu",
];
// const KERNEL_FILES: [&str; 4] = [
// "flash_api.cu",
// "fmha_fwd_hdim32.cu",
// "fmha_fwd_hdim64.cu",
// "fmha_fwd_hdim128.cu",
// ];
Comment on lines +10 to +15
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this intended?


/// Recursively reads the filenames in a directory and stores them in a Vec.
fn _read_dir_recursively(dir_path: &PathBuf, paths: &mut Vec<PathBuf>) -> std::io::Result<()> {
for entry in fs::read_dir(dir_path)? {
let entry = entry?;
let path = entry.path();

if path.is_dir() {
_read_dir_recursively(&path, paths)?;
} else {
paths.push(path);
}
}

Ok(())
}

/// Recursively reads the filenames in a directory and stores them in a Vec.
fn read_dir_recursively(dir_path: &PathBuf) -> std::io::Result<Vec<PathBuf>> {
let mut paths = Vec::new();
_read_dir_recursively(dir_path, &mut paths)?;
Ok(paths)
}

fn main() -> Result<()> {
let num_cpus = std::env::var("RAYON_NUM_THREADS").map_or_else(
Expand All @@ -25,12 +49,11 @@ fn main() -> Result<()> {
.unwrap();

println!("cargo:rerun-if-changed=build.rs");
for kernel_file in KERNEL_FILES.iter() {
println!("cargo:rerun-if-changed=kernels/{kernel_file}");

let paths = read_dir_recursively(&PathBuf::from_str("kernels")?)?;
for file in paths.iter() {
println!("cargo:rerun-if-changed={}", file.display());
}
println!("cargo:rerun-if-changed=kernels/**.h");
println!("cargo:rerun-if-changed=kernels/**.cuh");
println!("cargo:rerun-if-changed=kernels/fmha/**.h");
let out_dir = PathBuf::from(std::env::var("OUT_DIR").context("OUT_DIR not set")?);
let build_dir = match std::env::var("CANDLE_FLASH_ATTN_BUILD_DIR") {
Err(_) =>
Expand All @@ -57,12 +80,17 @@ fn main() -> Result<()> {
let out_file = build_dir.join("libflashattentionv1.a");

let kernel_dir = PathBuf::from("kernels");
let cu_files: Vec<_> = KERNEL_FILES
let kernels: Vec<_> = paths
.iter()
.filter(|f| f.extension().map(|ext| ext == "cu").unwrap_or_default())
.collect();
let cu_files: Vec<_> = kernels
.iter()
.map(|f| {
let mut obj_file = out_dir.join(f);
fs::create_dir_all(obj_file.parent().unwrap()).unwrap();
obj_file.set_extension("o");
(kernel_dir.join(f), obj_file)
(f, obj_file)
})
.collect();
let out_modified: Result<_, _> = out_file.metadata().and_then(|m| m.modified());
Expand Down
2 changes: 1 addition & 1 deletion candle-extensions/candle-layer-norm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ edition = "2021"
description = "Layer Norm layer for the candle ML framework."

[dependencies]
candle = { workspace = true, features = ["cuda"] }
candle = { workspace = true, features = ["_cuda"] }
half = { workspace = true }

[build-dependencies]
Expand Down
4 changes: 3 additions & 1 deletion candle-extensions/candle-layer-norm/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ fn main() -> Result<()> {
for kernel_file in KERNEL_FILES.iter() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no read dir here?

println!("cargo:rerun-if-changed=kernels/{kernel_file}");
}
println!("cargo:rerun-if-changed=kernels/**.cu");
println!("cargo:rerun-if-changed=kernels/ln_fwd_kernels.cuh");
println!("cargo:rerun-if-changed=kernels/ln.h");
println!("cargo:rerun-if-changed=kernels/ln_kernel_traits.h");
println!("cargo:rerun-if-changed=kernels/ln_utils.cuh");
println!("cargo:rerun-if-changed=kernels/static_switch.h");
Expand Down Expand Up @@ -176,6 +176,8 @@ fn set_cuda_include_dir() -> Result<()> {
.chain(roots)
.find(|path| path.join("include").join("cuda.h").is_file())
.context("cannot find include/cuda.h")?;
println!("cargo:rustc-link-search={}", root.join("lib").display());
println!("cargo:rustc-link-search={}", root.join("lib64").display());
println!(
"cargo:rustc-env=CUDA_INCLUDE_DIR={}",
root.join("include").display()
Expand Down
2 changes: 1 addition & 1 deletion candle-extensions/candle-rotary/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ license = "MIT OR Apache-2.0"
readme = "README.md"

[dependencies]
candle = { workspace = true, features = ["cuda"]}
candle = { workspace = true, features = ["_cuda"]}
half = { workspace = true }

[build-dependencies]
Expand Down
7 changes: 4 additions & 3 deletions router/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ tokio-stream = { version = "0.1.14", optional = true }

# Optional
cudarc = { workspace = true, optional = true }
intel-mkl-src = { workspace = true, optional = true }

# Malloc trim hack for linux
[target.'cfg(target_os = "linux")'.dependencies]
Expand All @@ -78,18 +79,18 @@ vergen = { version = "8.0.0", features = ["build", "git", "gitcl"] }
tonic-build = { version = "0.11.0", optional = true }

[features]
default = ["candle", "http"]
default = ["candle", "http", "dynamic-linking"]
http = ["dep:axum", "dep:axum-tracing-opentelemetry", "dep:base64", "dep:tower-http", "dep:utoipa", "dep:utoipa-swagger-ui"]
grpc = ["metrics-exporter-prometheus/http-listener", "dep:prost", "dep:tonic", "dep:tonic-health", "dep:tonic-reflection", "dep:tonic-build", "dep:async-stream", "dep:tokio-stream"]
metal = ["text-embeddings-backend/metal"]
mkl = ["text-embeddings-backend/mkl"]
mkl-dynamic = ["text-embeddings-backend/mkl-dynamic"]
accelerate = ["text-embeddings-backend/accelerate"]
python = ["text-embeddings-backend/python"]
ort = ["text-embeddings-backend/ort"]
candle = ["text-embeddings-backend/candle"]
candle-cuda = ["candle", "text-embeddings-backend/flash-attn"]
candle-cuda-turing = ["candle", "text-embeddings-backend/flash-attn-v1"]
candle-cuda-volta = ["candle", "text-embeddings-backend/cuda"]
static-linking = ["cudarc/static-linking"]
static-linking = ["cudarc?/static-linking", "intel-mkl-src?/mkl-static-lp64-iomp"]
dynamic-linking = ["cudarc?/dynamic-linking", "intel-mkl-src?/mkl-dynamic-lp64-iomp"]
google = []
Loading