Skip to content

Feat: Adds rust config options to detect threading support in oxide #18292

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions crates/oxide/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ name = "tailwindcss-oxide"
version = "0.1.0"
edition = "2021"

[lib]
crate-type = ["lib", "cdylib"]

[dependencies]
bstr = "1.11.3"
globwalk = "0.9.1"
log = "0.4.22"
rayon = "1.10.0"
fxhash = { package = "rustc-hash", version = "2.1.1" }
crossbeam = "0.8.4"
tracing = { version = "0.1.40", features = [] }
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
walkdir = "2.5.0"
Expand All @@ -20,6 +21,16 @@ classification-macros = { path = "../classification-macros" }
ignore = { path = "../ignore" }
regex = "1.11.1"

# Threading dependencies - not available on wasm32-unknown-unknown
[target.'cfg(not(all(target_arch = "wasm32", target_os = "unknown")))'.dependencies]
rayon = "1.10.0"
crossbeam = "0.8.4"

# WASM-specific dependencies
[target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dependencies]
wasm-bindgen = "0.2"
console_error_panic_hook = "0.1"

[dev-dependencies]
tempfile = "3.13.0"
pretty_assertions = "1.4.1"
Expand Down
7 changes: 7 additions & 0 deletions crates/oxide/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,10 @@ pub use glob::GlobEntry;
pub use scanner::sources::PublicSourceEntry;
pub use scanner::ChangedContent;
pub use scanner::Scanner;

// WASM bindings
#[cfg(all(target_arch = "wasm32", target_os = "unknown"))]
pub mod wasm;

#[cfg(all(target_arch = "wasm32", target_os = "unknown"))]
pub use wasm::*;
208 changes: 138 additions & 70 deletions crates/oxide/src/scanner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,137 @@ use bstr::ByteSlice;
use fast_glob::glob_match;
use fxhash::{FxHashMap, FxHashSet};
use ignore::{gitignore::GitignoreBuilder, WalkBuilder};
#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))]
use rayon::prelude::*;

// Conditional parallel processing helpers
#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))]
mod parallel {
use rayon::prelude::*;
use fxhash::FxHashSet;

pub fn sort_candidates(candidates: &mut Vec<String>) {
candidates.par_sort_unstable();
}

pub fn filter_new_candidates(candidates: Vec<String>, existing: &FxHashSet<String>) -> Vec<String> {
candidates.into_par_iter().filter(|c| !existing.contains(c)).collect()
}

pub fn extend_candidates(target: &mut FxHashSet<String>, new: Vec<String>) {
target.par_extend(new);
}

pub fn map_files(files: &[std::path::PathBuf]) -> Vec<String> {
files.par_iter().filter_map(|x| x.clone().into_os_string().into_string().ok()).collect()
}

pub fn process_changed_content(content: Vec<super::ChangedContent>) -> Vec<Vec<u8>> {
content.into_par_iter().filter_map(super::read_changed_content).collect()
}

pub fn process_extraction_blobs<H>(blobs: Vec<Vec<u8>>, handle: H) -> Vec<String>
where
H: Fn(crate::extractor::Extractor) -> Vec<crate::extractor::Extracted> + std::marker::Sync,
{
let mut result: Vec<_> = blobs
.par_iter()
.flat_map(|blob| blob.par_split(|x| *x == b'\n'))
.filter_map(|blob| {
if blob.is_empty() { return None; }
let extracted = handle(crate::extractor::Extractor::new(blob));
if extracted.is_empty() { return None; }
Some(fxhash::FxHashSet::from_iter(extracted.into_iter().map(|x| match x {
crate::extractor::Extracted::Candidate(bytes) => bytes,
crate::extractor::Extracted::CssVariable(bytes) => bytes,
})))
})
.reduce(Default::default, |mut a, b| { a.extend(b); a })
.into_iter()
.map(|s| unsafe { String::from_utf8_unchecked(s.to_vec()) })
.collect();
result.par_sort_unstable();
result
}

pub fn extract_with_positions(extracted: Vec<crate::extractor::Extracted>, offset: usize, original_content: &[u8]) -> Vec<(String, usize)> {
extracted.into_par_iter().flat_map(|extracted| match extracted {
crate::extractor::Extracted::Candidate(s) => {
let i = s.as_ptr() as usize - offset;
let original = &original_content[i..i + s.len()];
if original.contains_str("-[]") {
return Some(unsafe { (String::from_utf8_unchecked(original.to_vec()), i) });
}
Some(unsafe { (String::from_utf8_unchecked(s.to_vec()), i) })
}
_ => None,
}).collect()
}
}

#[cfg(all(target_arch = "wasm32", target_os = "unknown"))]
mod parallel {
use fxhash::FxHashSet;
use bstr::ByteSlice;

pub fn sort_candidates(candidates: &mut Vec<String>) {
candidates.sort_unstable();
}

pub fn filter_new_candidates(candidates: Vec<String>, existing: &FxHashSet<String>) -> Vec<String> {
candidates.into_iter().filter(|c| !existing.contains(c)).collect()
}

pub fn extend_candidates(target: &mut FxHashSet<String>, new: Vec<String>) {
target.extend(new);
}

pub fn map_files(files: &[std::path::PathBuf]) -> Vec<String> {
files.iter().filter_map(|x| x.clone().into_os_string().into_string().ok()).collect()
}

pub fn process_changed_content(content: Vec<super::ChangedContent>) -> Vec<Vec<u8>> {
content.into_iter().filter_map(super::read_changed_content).collect()
}

pub fn process_extraction_blobs<H>(blobs: Vec<Vec<u8>>, handle: H) -> Vec<String>
where
H: Fn(crate::extractor::Extractor) -> Vec<crate::extractor::Extracted>,
{
let mut result: Vec<_> = blobs
.iter()
.flat_map(|blob| blob.split(|x| *x == b'\n'))
.filter_map(|blob| {
if blob.is_empty() { return None; }
let extracted = handle(crate::extractor::Extractor::new(blob));
if extracted.is_empty() { return None; }
Some(fxhash::FxHashSet::from_iter(extracted.into_iter().map(|x| match x {
crate::extractor::Extracted::Candidate(bytes) => bytes,
crate::extractor::Extracted::CssVariable(bytes) => bytes,
})))
})
.fold(fxhash::FxHashSet::default(), |mut a, b| { a.extend(b); a })
.into_iter()
.map(|s| unsafe { String::from_utf8_unchecked(s.to_vec()) })
.collect();
result.sort_unstable();
result
}

pub fn extract_with_positions(extracted: Vec<crate::extractor::Extracted>, offset: usize, original_content: &[u8]) -> Vec<(String, usize)> {
extracted.into_iter().flat_map(|extracted| match extracted {
crate::extractor::Extracted::Candidate(s) => {
let i = s.as_ptr() as usize - offset;
let original = &original_content[i..i + s.len()];
if original.contains_str("-[]") {
return Some(unsafe { (String::from_utf8_unchecked(original.to_vec()), i) });
}
Some(unsafe { (String::from_utf8_unchecked(s.to_vec()), i) })
}
_ => None,
}).collect()
}
}
use std::collections::{BTreeMap, BTreeSet};
use std::fs::OpenOptions;
use std::io::{self, Write};
Expand Down Expand Up @@ -190,7 +320,7 @@ impl Scanner {

// Make sure we have a sorted list of candidates
let mut candidates = self.candidates.iter().cloned().collect::<Vec<_>>();
candidates.par_sort_unstable();
parallel::sort_candidates(&mut candidates);

// Return all candidates instead of only the new ones
candidates
Expand Down Expand Up @@ -299,15 +429,12 @@ impl Scanner {

// Only compute the new candidates and ignore the ones we already have. This is for
// subsequent calls to prevent serializing the entire set of candidates every time.
let mut new_candidates = new_candidates
.into_par_iter()
.filter(|candidate| !self.candidates.contains(candidate))
.collect::<Vec<_>>();
let mut new_candidates = parallel::filter_new_candidates(new_candidates, &self.candidates);

new_candidates.par_sort_unstable();
parallel::sort_candidates(&mut new_candidates);

// Track new candidates for subsequent calls
self.candidates.par_extend(new_candidates.clone());
parallel::extend_candidates(&mut self.candidates, new_candidates.clone());

new_candidates
}
Expand Down Expand Up @@ -355,10 +482,7 @@ impl Scanner {
pub fn get_files(&mut self) -> Vec<String> {
self.scan_sources();

self.files
.par_iter()
.filter_map(|x| x.clone().into_os_string().into_string().ok())
.collect()
parallel::map_files(&self.files)
}

#[tracing::instrument(skip_all)]
Expand Down Expand Up @@ -433,28 +557,7 @@ impl Scanner {

let mut extractor = Extractor::new(&content[..]);

extractor
.extract()
.into_par_iter()
.flat_map(|extracted| match extracted {
Extracted::Candidate(s) => {
let i = s.as_ptr() as usize - offset;
let original = &original_content[i..i + s.len()];
if original.contains_str("-[]") {
return Some(unsafe {
(String::from_utf8_unchecked(original.to_vec()), i)
});
}

// SAFETY: When we parsed the candidates, we already guaranteed that the byte
// slices are valid, therefore we don't have to re-check here when we want to
// convert it back to a string.
Some(unsafe { (String::from_utf8_unchecked(s.to_vec()), i) })
}

_ => None,
})
.collect()
parallel::extract_with_positions(extractor.extract(), offset, original_content)
}
}

Expand Down Expand Up @@ -502,10 +605,7 @@ fn read_all_files(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> {
changed_content.len()
);

changed_content
.into_par_iter()
.filter_map(read_changed_content)
.collect()
parallel::process_changed_content(changed_content)
}

#[tracing::instrument(skip_all)]
Expand All @@ -525,39 +625,7 @@ fn extract<H>(blobs: Vec<Vec<u8>>, handle: H) -> Vec<String>
where
H: Fn(Extractor) -> Vec<Extracted> + std::marker::Sync,
{
let mut result: Vec<_> = blobs
.par_iter()
.flat_map(|blob| blob.par_split(|x| *x == b'\n'))
.filter_map(|blob| {
if blob.is_empty() {
return None;
}

let extracted = handle(crate::extractor::Extractor::new(blob));
if extracted.is_empty() {
return None;
}

Some(FxHashSet::from_iter(extracted.into_iter().map(
|x| match x {
Extracted::Candidate(bytes) => bytes,
Extracted::CssVariable(bytes) => bytes,
},
)))
})
.reduce(Default::default, |mut a, b| {
a.extend(b);
a
})
.into_iter()
.map(|s| unsafe { String::from_utf8_unchecked(s.to_vec()) })
.collect();

// SAFETY: Unstable sort is faster and in this scenario it's also safe because we are
// guaranteed to have unique candidates.
result.par_sort_unstable();

result
parallel::process_extraction_blobs(blobs, handle)
}

/// Create a walker for the given sources to detect all the files that we have to scan.
Expand Down
90 changes: 90 additions & 0 deletions crates/oxide/src/wasm.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
use wasm_bindgen::prelude::*;
use crate::{ChangedContent, Scanner};

// Set panic hook for better error messages
#[wasm_bindgen(start)]
pub fn wasm_init() {
console_error_panic_hook::set_once();
}

#[wasm_bindgen]
#[derive(Clone)]
pub struct WasmChangedContent {
content: Option<String>,
extension: String,
}

#[wasm_bindgen]
impl WasmChangedContent {
#[wasm_bindgen(constructor)]
pub fn new(content: Option<String>, extension: String) -> WasmChangedContent {
WasmChangedContent { content, extension }
}

#[wasm_bindgen(getter)]
pub fn content(&self) -> Option<String> {
self.content.clone()
}

#[wasm_bindgen(getter)]
pub fn extension(&self) -> String {
self.extension.clone()
}
}

#[wasm_bindgen]
#[derive(Clone)]
pub struct WasmCandidateWithPosition {
candidate: String,
position: usize,
}

#[wasm_bindgen]
impl WasmCandidateWithPosition {
#[wasm_bindgen(getter)]
pub fn candidate(&self) -> String {
self.candidate.clone()
}

#[wasm_bindgen(getter)]
pub fn position(&self) -> usize {
self.position
}
}

impl From<WasmChangedContent> for ChangedContent {
fn from(wasm_content: WasmChangedContent) -> Self {
match wasm_content.content {
Some(content) => ChangedContent::Content(content, wasm_content.extension),
None => panic!("File-based content not supported in browser WASM"),
}
}
}

#[wasm_bindgen]
pub struct WasmScanner {
scanner: Scanner,
}

#[wasm_bindgen]
impl WasmScanner {
#[wasm_bindgen(constructor)]
pub fn new() -> WasmScanner {
WasmScanner {
scanner: Scanner::new(vec![]),
}
}

#[wasm_bindgen(js_name = getCandidatesWithPositions)]
pub fn get_candidates_with_positions(
&mut self,
content: WasmChangedContent,
) -> Vec<WasmCandidateWithPosition> {
let changed_content: ChangedContent = content.into();
self.scanner
.get_candidates_with_positions(changed_content)
.into_iter()
.map(|(candidate, position)| WasmCandidateWithPosition { candidate, position })
.collect()
}
}