Skip to content

Commit a22151d

Browse files
author
bors-servo
authored
Auto merge of #132 - emilio:numeric-faster, r=SimonSapin
Numeric faster The following commit: * Removes char logic from consume_numeric. * Rearranges exponent parsing to look at one char in the common case (presumably not a big deal). This makes the benchmark added in the first commit go from: test tests::numeric ... bench: 48,221,308 ns/iter (+/- 3,356,881) to: test tests::numeric ... bench: 43,656,802 ns/iter (+/- 1,323,570) on my machine. Which is pretty much a micro-optimization, but perhaps it's worth given how common these values are. <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/rust-cssparser/132) <!-- Reviewable:end -->
2 parents 4a84ac1 + 2b6cc13 commit a22151d

File tree

2 files changed

+58
-29
lines changed

2 files changed

+58
-29
lines changed

src/tests.rs

+12
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,18 @@ fn unquoted_url(b: &mut Bencher) {
608608
})
609609
}
610610

611+
612+
#[cfg(feature = "bench")]
613+
#[bench]
614+
fn numeric(b: &mut Bencher) {
615+
b.iter(|| {
616+
for _ in 0..1000000 {
617+
let mut input = Parser::new("10px");
618+
let _ = test::black_box(input.next());
619+
}
620+
})
621+
}
622+
611623
struct JsonParser;
612624

613625
#[test]

src/tokenizer.rs

+46-29
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,24 @@ fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> Cow<'a, str> {
750750
)
751751
}
752752

753+
fn byte_to_hex_digit(b: u8) -> Option<u32> {
754+
Some(match_byte! { b,
755+
b'0' ... b'9' => { b - b'0' },
756+
b'a' ... b'f' => { b - b'a' + 10 },
757+
b'A' ... b'F' => { b - b'A' + 10 },
758+
_ => {
759+
return None
760+
}
761+
} as u32)
762+
}
763+
764+
fn byte_to_decimal_digit(b: u8) -> Option<u32> {
765+
if b >= b'0' && b <= b'9' {
766+
Some((b - b'0') as u32)
767+
} else {
768+
None
769+
}
770+
}
753771

754772
fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
755773
// Parse [+-]?\d*(\.\d+)?([eE][+-]?\d+)?
@@ -768,7 +786,7 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
768786
}
769787

770788
let mut integral_part: f64 = 0.;
771-
while let Some(digit) = tokenizer.next_char().to_digit(10) {
789+
while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) {
772790
integral_part = integral_part * 10. + digit as f64;
773791
tokenizer.advance(1);
774792
if tokenizer.is_eof() {
@@ -784,7 +802,7 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
784802
is_integer = false;
785803
tokenizer.advance(1); // Consume '.'
786804
let mut factor = 0.1;
787-
while let Some(digit) = tokenizer.next_char().to_digit(10) {
805+
while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) {
788806
fractional_part += digit as f64 * factor;
789807
factor *= 0.1;
790808
tokenizer.advance(1);
@@ -796,35 +814,34 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
796814

797815
let mut value = sign * (integral_part + fractional_part);
798816

799-
if (
800-
tokenizer.has_at_least(1)
801-
&& matches!(tokenizer.next_byte_unchecked(), b'e' | b'E')
802-
&& matches!(tokenizer.byte_at(1), b'0'...b'9')
803-
) || (
804-
tokenizer.has_at_least(2)
805-
&& matches!(tokenizer.next_byte_unchecked(), b'e' | b'E')
806-
&& matches!(tokenizer.byte_at(1), b'+' | b'-')
807-
&& matches!(tokenizer.byte_at(2), b'0'...b'9')
808-
) {
809-
is_integer = false;
810-
tokenizer.advance(1);
811-
let (has_sign, sign) = match tokenizer.next_byte_unchecked() {
812-
b'-' => (true, -1.),
813-
b'+' => (true, 1.),
814-
_ => (false, 1.),
815-
};
816-
if has_sign {
817-
tokenizer.advance(1);
818-
}
819-
let mut exponent: f64 = 0.;
820-
while let Some(digit) = tokenizer.next_char().to_digit(10) {
821-
exponent = exponent * 10. + digit as f64;
817+
if tokenizer.has_at_least(1)
818+
&& matches!(tokenizer.next_byte_unchecked(), b'e' | b'E') {
819+
820+
if matches!(tokenizer.byte_at(1), b'0'...b'9') ||
821+
(tokenizer.has_at_least(2)
822+
&& matches!(tokenizer.byte_at(1), b'+' | b'-')
823+
&& matches!(tokenizer.byte_at(2), b'0'...b'9'))
824+
{
825+
is_integer = false;
822826
tokenizer.advance(1);
823-
if tokenizer.is_eof() {
824-
break
827+
let (has_sign, sign) = match tokenizer.next_byte_unchecked() {
828+
b'-' => (true, -1.),
829+
b'+' => (true, 1.),
830+
_ => (false, 1.),
831+
};
832+
if has_sign {
833+
tokenizer.advance(1);
834+
}
835+
let mut exponent: f64 = 0.;
836+
while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) {
837+
exponent = exponent * 10. + digit as f64;
838+
tokenizer.advance(1);
839+
if tokenizer.is_eof() {
840+
break
841+
}
825842
}
843+
value *= f64::powf(10., sign * exponent);
826844
}
827-
value *= f64::powf(10., sign * exponent);
828845
}
829846

830847
let int_value = if is_integer {
@@ -1007,7 +1024,7 @@ fn consume_hex_digits<'a>(tokenizer: &mut Tokenizer<'a>) -> (u32, u32) {
10071024
let mut value = 0;
10081025
let mut digits = 0;
10091026
while digits < 6 && !tokenizer.is_eof() {
1010-
match tokenizer.next_char().to_digit(16) {
1027+
match byte_to_hex_digit(tokenizer.next_byte_unchecked()) {
10111028
Some(digit) => {
10121029
value = value * 16 + digit;
10131030
digits += 1;

0 commit comments

Comments
 (0)