From 0b7cb80a57f5fd4861e7a6df6035af22f39ee977 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Tue, 21 Jan 2025 09:01:01 +0800 Subject: [PATCH 1/3] perf: replace source data struct --- src/replace_source.rs | 155 +++++++++++++++++------------------------- 1 file changed, 63 insertions(+), 92 deletions(-) diff --git a/src/replace_source.rs b/src/replace_source.rs index 29eaad34..0f66484d 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -1,14 +1,11 @@ use std::{ borrow::Cow, cell::RefCell, + collections::BTreeSet, hash::{Hash, Hasher}, - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, Mutex, - }, + sync::Arc, }; -use itertools::Itertools; use rustc_hash::FxHashMap as HashMap; use crate::{ @@ -41,10 +38,7 @@ use crate::{ /// ``` pub struct ReplaceSource { inner: Arc, - replacements: Vec, - sorted_index: Mutex>, - /// Whether `replacements` is sorted. - is_sorted: AtomicBool, + replacements: BTreeSet<(Replacement, u32)>, } /// Enforce replacement order when two replacement start and end are both equal @@ -68,6 +62,22 @@ struct Replacement { enforce: ReplacementEnforce, } +impl Ord for Replacement { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + (self.start, self.end, self.enforce).cmp(&( + other.start, + other.end, + other.enforce, + )) + } +} + +impl PartialOrd for Replacement { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + impl Replacement { pub fn new( start: u32, @@ -91,9 +101,7 @@ impl ReplaceSource { pub fn new(source: T) -> Self { Self { inner: Arc::new(source), - replacements: Vec::new(), - sorted_index: Mutex::new(Vec::new()), - is_sorted: AtomicBool::new(true), + replacements: BTreeSet::new(), } } @@ -101,32 +109,6 @@ impl ReplaceSource { pub fn original(&self) -> &T { &self.inner } - - fn sort_replacement(&self) { - if self.is_sorted.load(Ordering::SeqCst) { - return; - } - let sorted_index = self - .replacements - .iter() - .enumerate() - .sorted_by(|(_, a), (_, b)| { - (a.start, a.end, a.enforce).cmp(&(b.start, b.end, b.enforce)) - }) - .map(|replacement| replacement.0) - .collect::>(); - *self.sorted_index.lock().unwrap() = sorted_index; - self.is_sorted.store(true, Ordering::SeqCst) - } - - fn sorted_replacement(&self) -> Vec<&Replacement> { - self.sort_replacement(); - let sorted_index = self.sorted_index.lock().unwrap(); - sorted_index - .iter() - .map(|idx| &self.replacements[*idx]) - .collect() - } } impl ReplaceSource { @@ -154,14 +136,16 @@ impl ReplaceSource { content: &str, name: Option<&str>, ) { - self.replacements.push(Replacement::new( - start, - end, - content.into(), - name.map(|s| s.into()), - ReplacementEnforce::Normal, + self.replacements.insert(( + Replacement::new( + start, + end, + content.into(), + name.map(|s| s.into()), + ReplacementEnforce::Normal, + ), + self.replacements.len() as u32, )); - self.is_sorted.store(false, Ordering::SeqCst); } /// Create a replacement with content at `[start, end)`, with ReplacementEnforce. @@ -173,14 +157,16 @@ impl ReplaceSource { name: Option<&str>, enforce: ReplacementEnforce, ) { - self.replacements.push(Replacement::new( - start, - end, - content.into(), - name.map(|s| s.into()), - enforce, + self.replacements.insert(( + Replacement::new( + start, + end, + content.into(), + name.map(|s| s.into()), + enforce, + ), + self.replacements.len() as u32, )); - self.is_sorted.store(false, Ordering::SeqCst); } } @@ -190,18 +176,18 @@ impl Source for ReplaceSource { // mut_string_push_str is faster that vec join // concatenate strings benchmark, see https://github.com/hoodie/concatenation_benchmarks-rs - let replacements = self.sorted_replacement(); - if replacements.is_empty() { + if self.replacements.is_empty() { return inner_source_code; } - let max_len = replacements + let max_len = self + .replacements .iter() - .map(|replacement| replacement.content.len()) + .map(|(replacement, _)| replacement.content.len()) .sum::() + inner_source_code.len(); let mut source_code = String::with_capacity(max_len); let mut inner_pos = 0; - for replacement in replacements.iter() { + for (replacement, _) in self.replacements.iter() { if inner_pos < replacement.start { let end_pos = (replacement.start as usize).min(inner_source_code.len()); source_code.push_str(&inner_source_code[inner_pos as usize..end_pos]); @@ -226,13 +212,12 @@ impl Source for ReplaceSource { // mut_string_push_str is faster that vec join // concatenate strings benchmark, see https://github.com/hoodie/concatenation_benchmarks-rs - let replacements = self.sorted_replacement(); - if replacements.is_empty() { + if self.replacements.is_empty() { return inner_source_code; } let mut source_code = Rope::new(); let mut inner_pos = 0; - for replacement in replacements.iter() { + for (replacement, _) in self.replacements.iter() { if inner_pos < replacement.start { let end_pos = (replacement.start as usize).min(inner_source_code.len()); let slice = inner_source_code.byte_slice(inner_pos as usize..end_pos); @@ -288,7 +273,6 @@ impl std::fmt::Debug for ReplaceSource { "replacements", &self.replacements.iter().take(3).collect::>(), ) - .field("is_sorted", &self.is_sorted.load(Ordering::SeqCst)) .finish() } } @@ -330,11 +314,10 @@ impl StreamChunks for ReplaceSource { on_name: crate::helpers::OnName<'_, 'a>, ) -> crate::helpers::GeneratedInfo { let on_name = RefCell::new(on_name); - let repls = &self.sorted_replacement(); + let mut replacements = self.replacements.iter().map(|(r, _)| r); let mut pos: u32 = 0; - let mut i: usize = 0; let mut replacement_end: Option = None; - let mut next_replacement = (i < repls.len()).then(|| repls[i].start); + let mut next_replacement = replacements.next(); let mut generated_line_offset: i64 = 0; let mut generated_column_offset: i64 = 0; let mut generated_column_offset_line = 0; @@ -449,13 +432,13 @@ impl StreamChunks for ReplaceSource { } // Is a replacement in the chunk? - while let Some(next_replacement_pos) = next_replacement - .filter(|next_replacement_pos| *next_replacement_pos < end_pos) + while let Some(replacement) = + next_replacement.filter(|replacement| replacement.start < end_pos) { let mut line = mapping.generated_line as i64 + generated_line_offset; - if next_replacement_pos > pos { + if replacement.start > pos { // Emit chunk until replacement - let offset = next_replacement_pos - pos; + let offset = replacement.start - pos; let chunk_slice = chunk .byte_slice(chunk_pos as usize..(chunk_pos + offset) as usize); on_chunk( @@ -482,7 +465,7 @@ impl StreamChunks for ReplaceSource { ); mapping.generated_column += offset; chunk_pos += offset; - pos = next_replacement_pos; + pos = replacement.start; if let Some(original) = mapping.original.as_mut().filter(|original| { check_original_content( @@ -497,20 +480,16 @@ impl StreamChunks for ReplaceSource { } } // Insert replacement content split into chunks by lines - #[allow(unsafe_code)] - // SAFETY: The safety of this operation relies on the fact that the `ReplaceSource` type will not delete the `replacements` during its entire lifetime. - let repl = unsafe { - std::mem::transmute::<&Replacement, &'a Replacement>(repls[i]) - }; - let lines = - split_into_lines(&repl.content.as_str()).collect::>(); + split_into_lines(&replacement.content.as_str()).collect::>(); let mut replacement_name_index = mapping .original .as_ref() .and_then(|original| original.name_index); - if let Some(name) = - repl.name.as_ref().filter(|_| mapping.original.is_some()) + if let Some(name) = replacement + .name + .as_ref() + .filter(|_| mapping.original.is_some()) { let mut name_mapping = name_mapping.borrow_mut(); let mut global_index = name_mapping.get(name.as_str()).copied(); @@ -563,18 +542,13 @@ impl StreamChunks for ReplaceSource { // Remove replaced content by settings this variable replacement_end = if let Some(replacement_end) = replacement_end { - Some(replacement_end.max(repl.end)) + Some(replacement_end.max(replacement.end)) } else { - Some(repl.end) + Some(replacement.end) }; // Move to next replacement - i += 1; - next_replacement = if i < repls.len() { - Some(repls[i].start) - } else { - None - }; + next_replacement = replacements.next(); // Skip over when it has been replaced let offset = chunk.len() as i64 - end_pos as i64 @@ -687,9 +661,8 @@ impl StreamChunks for ReplaceSource { // Handle remaining replacements let mut remainder = Rope::new(); - while i < repls.len() { - remainder.add(&repls[i].content); - i += 1; + for replacement in replacements { + remainder.add(&replacement.content); } // Insert remaining replacements content split into chunks by lines @@ -742,8 +715,6 @@ impl Clone for ReplaceSource { Self { inner: self.inner.clone(), replacements: self.replacements.clone(), - sorted_index: Mutex::new(self.sorted_index.lock().unwrap().clone()), - is_sorted: AtomicBool::new(self.is_sorted.load(Ordering::SeqCst)), } } } @@ -751,7 +722,7 @@ impl Clone for ReplaceSource { impl Hash for ReplaceSource { fn hash(&self, state: &mut H) { "ReplaceSource".hash(state); - for repl in self.sorted_replacement() { + for (repl, _) in self.replacements.iter() { repl.hash(state); } self.inner.hash(state); From b45b001b68d43967bd2525d3df0f0498e6098226 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Tue, 21 Jan 2025 09:43:45 +0800 Subject: [PATCH 2/3] refactor: replace_with_enforce --- src/replace_source.rs | 94 +++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 49 deletions(-) diff --git a/src/replace_source.rs b/src/replace_source.rs index 0f66484d..6687ef4c 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -1,7 +1,6 @@ use std::{ borrow::Cow, cell::RefCell, - collections::BTreeSet, hash::{Hash, Hasher}, sync::Arc, }; @@ -38,7 +37,7 @@ use crate::{ /// ``` pub struct ReplaceSource { inner: Arc, - replacements: BTreeSet<(Replacement, u32)>, + replacements: Vec, } /// Enforce replacement order when two replacement start and end are both equal @@ -60,14 +59,16 @@ struct Replacement { content: String, name: Option, enforce: ReplacementEnforce, + index: u32, } impl Ord for Replacement { fn cmp(&self, other: &Self) -> std::cmp::Ordering { - (self.start, self.end, self.enforce).cmp(&( + (self.start, self.end, self.enforce, self.index).cmp(&( other.start, other.end, other.enforce, + other.index, )) } } @@ -78,30 +79,12 @@ impl PartialOrd for Replacement { } } -impl Replacement { - pub fn new( - start: u32, - end: u32, - content: String, - name: Option, - enforce: ReplacementEnforce, - ) -> Self { - Self { - start, - end, - content, - name, - enforce, - } - } -} - impl ReplaceSource { /// Create a [ReplaceSource]. pub fn new(source: T) -> Self { Self { inner: Arc::new(source), - replacements: BTreeSet::new(), + replacements: Vec::new(), } } @@ -136,16 +119,13 @@ impl ReplaceSource { content: &str, name: Option<&str>, ) { - self.replacements.insert(( - Replacement::new( - start, - end, - content.into(), - name.map(|s| s.into()), - ReplacementEnforce::Normal, - ), - self.replacements.len() as u32, - )); + self.replace_with_enforce( + start, + end, + content, + name, + ReplacementEnforce::Normal, + ); } /// Create a replacement with content at `[start, end)`, with ReplacementEnforce. @@ -157,16 +137,32 @@ impl ReplaceSource { name: Option<&str>, enforce: ReplacementEnforce, ) { - self.replacements.insert(( - Replacement::new( - start, - end, - content.into(), - name.map(|s| s.into()), - enforce, - ), - self.replacements.len() as u32, - )); + let replacement = Replacement { + start, + end, + content: content.into(), + name: name.map(|s| s.into()), + enforce, + index: self.replacements.len() as u32, + }; + + if let Some(last) = self.replacements.last() { + let cmp = replacement.cmp(last); + if cmp == std::cmp::Ordering::Greater || cmp == std::cmp::Ordering::Equal + { + self.replacements.push(replacement); + } else { + let insert_at = match self + .replacements + .binary_search_by(|other| other.cmp(&replacement)) + { + Ok(insert_at) | Err(insert_at) => insert_at, + }; + self.replacements.insert(insert_at, replacement); + } + } else { + self.replacements.push(replacement); + } } } @@ -182,12 +178,12 @@ impl Source for ReplaceSource { let max_len = self .replacements .iter() - .map(|(replacement, _)| replacement.content.len()) + .map(|replacement| replacement.content.len()) .sum::() + inner_source_code.len(); let mut source_code = String::with_capacity(max_len); let mut inner_pos = 0; - for (replacement, _) in self.replacements.iter() { + for replacement in self.replacements.iter() { if inner_pos < replacement.start { let end_pos = (replacement.start as usize).min(inner_source_code.len()); source_code.push_str(&inner_source_code[inner_pos as usize..end_pos]); @@ -217,7 +213,7 @@ impl Source for ReplaceSource { } let mut source_code = Rope::new(); let mut inner_pos = 0; - for (replacement, _) in self.replacements.iter() { + for replacement in self.replacements.iter() { if inner_pos < replacement.start { let end_pos = (replacement.start as usize).min(inner_source_code.len()); let slice = inner_source_code.byte_slice(inner_pos as usize..end_pos); @@ -314,7 +310,7 @@ impl StreamChunks for ReplaceSource { on_name: crate::helpers::OnName<'_, 'a>, ) -> crate::helpers::GeneratedInfo { let on_name = RefCell::new(on_name); - let mut replacements = self.replacements.iter().map(|(r, _)| r); + let mut replacements = self.replacements.iter(); let mut pos: u32 = 0; let mut replacement_end: Option = None; let mut next_replacement = replacements.next(); @@ -722,8 +718,8 @@ impl Clone for ReplaceSource { impl Hash for ReplaceSource { fn hash(&self, state: &mut H) { "ReplaceSource".hash(state); - for (repl, _) in self.replacements.iter() { - repl.hash(state); + for replacement in self.replacements.iter() { + replacement.hash(state); } self.inner.hash(state); } @@ -1137,7 +1133,7 @@ return
{data.foo}
assert_eq!(source.map(&MapOptions::default()), None); let mut hasher = twox_hash::XxHash64::default(); source.hash(&mut hasher); - assert_eq!(format!("{:x}", hasher.finish()), "5781cda25d360a42"); + assert_eq!(format!("{:x}", hasher.finish()), "aec81d0020320dd3"); } #[test] From 974d75c060615e68f60a33349eb10a608cfcfec4 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Tue, 21 Jan 2025 09:54:25 +0800 Subject: [PATCH 3/3] perf: remainder --- src/replace_source.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/replace_source.rs b/src/replace_source.rs index 6687ef4c..e5ceb4e8 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -112,6 +112,7 @@ impl ReplaceSource { } /// Create a replacement with content at `[start, end)`. + #[inline] pub fn replace( &mut self, start: u32, @@ -663,10 +664,14 @@ impl StreamChunks for ReplaceSource { // Insert remaining replacements content split into chunks by lines let mut line = result.generated_line as i64 + generated_line_offset; - let matches: Vec = split_into_lines(&remainder).collect(); - for (m, content_line) in matches.iter().enumerate() { + let lines: Vec = split_into_lines(&remainder).collect(); + let lines_len = lines.len(); + for (m, content_line) in lines.into_iter().enumerate() { + let newline = content_line.ends_with("\n"); + let content_line_len = content_line.len(); + on_chunk( - Some(content_line.clone()), + Some(content_line), Mapping { generated_line: line as u32, generated_column: ((result.generated_column as i64) @@ -679,11 +684,11 @@ impl StreamChunks for ReplaceSource { }, ); - if m == matches.len() - 1 && !content_line.ends_with("\n") { + if m == lines_len - 1 && !newline { if generated_column_offset_line == line { - generated_column_offset += content_line.len() as i64; + generated_column_offset += content_line_len as i64; } else { - generated_column_offset = content_line.len() as i64; + generated_column_offset = content_line_len as i64; generated_column_offset_line = line; } } else {