Skip to content

Commit ad98a45

Browse files
author
bors-servo
authored
Auto merge of #159 - servo:size_of, r=emilio,mbrubeck
Reduce the size of Token Hopefully improving parsing performance: https://bugzilla.mozilla.org/show_bug.cgi?id=1347408 <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/rust-cssparser/159) <!-- Reviewable:end -->
2 parents b204732 + 60b953e commit ad98a45

11 files changed

+459
-191
lines changed

src/color.rs

+11-12
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ use std::fmt;
66
use std::f32::consts::PI;
77

88
use super::{Token, Parser, ToCss, ParseError, BasicParseError};
9-
use tokenizer::NumericValue;
109

1110
#[cfg(feature = "serde")]
1211
use serde::{Deserialize, Deserializer, Serialize, Serializer};
@@ -430,11 +429,11 @@ fn parse_color_function<'i, 't>(name: &str, arguments: &mut Parser<'i, 't>) -> R
430429
};
431430
let token = try!(arguments.next());
432431
match token {
433-
Token::Number(NumericValue { value: v, .. }) => {
432+
Token::Number { value: v, .. } => {
434433
clamp_unit_f32(v)
435434
}
436-
Token::Percentage(ref v) => {
437-
clamp_unit_f32(v.unit_value)
435+
Token::Percentage { unit_value: v, .. } => {
436+
clamp_unit_f32(v)
438437
}
439438
t => {
440439
return Err(BasicParseError::UnexpectedToken(t))
@@ -459,10 +458,10 @@ fn parse_rgb_components_rgb<'i, 't>(arguments: &mut Parser<'i, 't>) -> Result<(u
459458
// Either integers or percentages, but all the same type.
460459
// https://drafts.csswg.org/css-color/#rgb-functions
461460
match try!(arguments.next()) {
462-
Token::Number(NumericValue { value: v, .. }) => {
461+
Token::Number { value: v, .. } => {
463462
red = clamp_floor_256_f32(v);
464463
green = clamp_floor_256_f32(match try!(arguments.next()) {
465-
Token::Number(NumericValue { value: v, .. }) => v,
464+
Token::Number { value: v, .. } => v,
466465
Token::Comma => {
467466
uses_commas = true;
468467
try!(arguments.expect_number())
@@ -474,10 +473,10 @@ fn parse_rgb_components_rgb<'i, 't>(arguments: &mut Parser<'i, 't>) -> Result<(u
474473
}
475474
blue = clamp_floor_256_f32(try!(arguments.expect_number()));
476475
}
477-
Token::Percentage(ref v) => {
478-
red = clamp_unit_f32(v.unit_value);
476+
Token::Percentage { unit_value, .. } => {
477+
red = clamp_unit_f32(unit_value);
479478
green = clamp_unit_f32(match try!(arguments.next()) {
480-
Token::Percentage(ref v) => v.unit_value,
479+
Token::Percentage { unit_value, .. } => unit_value,
481480
Token::Comma => {
482481
uses_commas = true;
483482
try!(arguments.expect_percentage())
@@ -501,8 +500,8 @@ fn parse_rgb_components_hsl<'i, 't>(arguments: &mut Parser<'i, 't>) -> Result<(u
501500
// https://drafts.csswg.org/css-values/#angles
502501
let token = try!(arguments.next());
503502
let hue_degrees = match token {
504-
Token::Number(NumericValue { value: v, .. }) => Ok(v),
505-
Token::Dimension(NumericValue { value: v, .. }, ref unit) => {
503+
Token::Number { value: v, .. } => Ok(v),
504+
Token::Dimension { value: v, ref unit, .. } => {
506505
match_ignore_ascii_case! { &*unit,
507506
"deg" => Ok(v),
508507
"grad" => Ok(v * 360. / 400.),
@@ -521,7 +520,7 @@ fn parse_rgb_components_hsl<'i, 't>(arguments: &mut Parser<'i, 't>) -> Result<(u
521520
// Saturation and lightness are clamped to 0% ... 100%
522521
// https://drafts.csswg.org/css-color/#the-hsl-notation
523522
let saturation = match try!(arguments.next()) {
524-
Token::Percentage(ref v) => v.unit_value,
523+
Token::Percentage { unit_value, .. } => unit_value,
525524
Token::Comma => {
526525
uses_commas = true;
527526
try!(arguments.expect_percentage())

src/compact_cow_str.rs

+244
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
/* This Source Code Form is subject to the terms of the Mozilla Public
2+
* License, v. 2.0. If a copy of the MPL was not distributed with this
3+
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4+
5+
use std::borrow::{Borrow, Cow};
6+
use std::cmp;
7+
use std::fmt;
8+
use std::hash;
9+
use std::marker::PhantomData;
10+
use std::mem;
11+
use std::ops::Deref;
12+
use std::slice;
13+
use std::str;
14+
15+
// All bits set except the highest
16+
const MAX_LEN: usize = !0 >> 1;
17+
18+
// Only the highest bit
19+
const OWNED_TAG: usize = MAX_LEN + 1;
20+
21+
/// Like `Cow<'a, str>`, but with smaller `std::mem::size_of`. (Two words instead of four.)
22+
pub struct CompactCowStr<'a> {
23+
// `tagged_len` is a tag in its highest bit, and the string length in the rest of the bits.
24+
//
25+
// * If the tag is 1, the memory pointed to by `ptr` is owned
26+
// and the lifetime parameter is irrelevant.
27+
// `ptr` and `len` are the components of a `Box<str>`.
28+
//
29+
// * If the tag is 0, the memory is borrowed.
30+
// `ptr` and `len` are the components of a `&'a str`.
31+
32+
// FIXME: https://github.com/rust-lang/rust/issues/27730 use NonZero or Shared
33+
ptr: *const u8,
34+
tagged_len: usize,
35+
phantom: PhantomData<&'a str>,
36+
}
37+
38+
impl<'a> From<&'a str> for CompactCowStr<'a> {
39+
#[inline]
40+
fn from(s: &'a str) -> Self {
41+
let len = s.len();
42+
assert!(len <= MAX_LEN);
43+
CompactCowStr {
44+
ptr: s.as_ptr(),
45+
tagged_len: len,
46+
phantom: PhantomData,
47+
}
48+
}
49+
}
50+
51+
impl<'a> From<Box<str>> for CompactCowStr<'a> {
52+
#[inline]
53+
fn from(s: Box<str>) -> Self {
54+
let ptr = s.as_ptr();
55+
let len = s.len();
56+
assert!(len <= MAX_LEN);
57+
mem::forget(s);
58+
CompactCowStr {
59+
ptr: ptr,
60+
tagged_len: len | OWNED_TAG,
61+
phantom: PhantomData,
62+
}
63+
}
64+
}
65+
66+
impl<'a> CompactCowStr<'a> {
67+
/// Whether this string refers to borrowed memory
68+
/// (as opposed to owned, which would be freed when `CompactCowStr` goes out of scope).
69+
#[inline]
70+
pub fn is_borrowed(&self) -> bool {
71+
(self.tagged_len & OWNED_TAG) == 0
72+
}
73+
74+
/// The length of this string
75+
#[inline]
76+
pub fn len(&self) -> usize {
77+
self.tagged_len & !OWNED_TAG
78+
}
79+
80+
// Intentionally private since it is easy to use incorrectly.
81+
#[inline]
82+
fn as_raw_str(&self) -> *const str {
83+
unsafe {
84+
str::from_utf8_unchecked(slice::from_raw_parts(self.ptr, self.len()))
85+
}
86+
}
87+
88+
/// If this string is borrowed, return a slice with the original lifetime,
89+
/// not borrowing `self`.
90+
///
91+
/// (`Deref` is implemented unconditionally, but returns a slice with a shorter lifetime.)
92+
#[inline]
93+
pub fn as_str(&self) -> Option<&'a str> {
94+
if self.is_borrowed() {
95+
Some(unsafe { &*self.as_raw_str() })
96+
} else {
97+
None
98+
}
99+
}
100+
101+
/// Convert into `String`, re-using the memory allocation if it was already owned.
102+
#[inline]
103+
pub fn into_owned(self) -> String {
104+
unsafe {
105+
let raw = self.as_raw_str();
106+
let is_borrowed = self.is_borrowed();
107+
mem::forget(self);
108+
if is_borrowed {
109+
String::from(&*raw)
110+
} else {
111+
Box::from_raw(raw as *mut str).into_string()
112+
}
113+
}
114+
}
115+
}
116+
117+
impl<'a> Clone for CompactCowStr<'a> {
118+
#[inline]
119+
fn clone(&self) -> Self {
120+
if self.is_borrowed() {
121+
CompactCowStr { ..*self }
122+
} else {
123+
Self::from(String::from(&**self).into_boxed_str())
124+
}
125+
}
126+
}
127+
128+
impl<'a> Drop for CompactCowStr<'a> {
129+
#[inline]
130+
fn drop(&mut self) {
131+
if !self.is_borrowed() {
132+
unsafe {
133+
Box::from_raw(self.as_raw_str() as *mut str);
134+
}
135+
}
136+
}
137+
}
138+
139+
impl<'a> Deref for CompactCowStr<'a> {
140+
type Target = str;
141+
142+
#[inline]
143+
fn deref(&self) -> &str {
144+
unsafe {
145+
&*self.as_raw_str()
146+
}
147+
}
148+
}
149+
150+
impl<'a> From<CompactCowStr<'a>> for Cow<'a, str> {
151+
#[inline]
152+
fn from(cow: CompactCowStr<'a>) -> Self {
153+
unsafe {
154+
let raw = cow.as_raw_str();
155+
if cow.is_borrowed() {
156+
Cow::Borrowed(&*raw)
157+
} else {
158+
Cow::Owned(Box::from_raw(raw as *mut str).into_string())
159+
}
160+
}
161+
}
162+
}
163+
164+
impl<'a> From<String> for CompactCowStr<'a> {
165+
#[inline]
166+
fn from(s: String) -> Self {
167+
Self::from(s.into_boxed_str())
168+
}
169+
}
170+
171+
impl<'a> From<Cow<'a, str>> for CompactCowStr<'a> {
172+
#[inline]
173+
fn from(s: Cow<'a, str>) -> Self {
174+
match s {
175+
Cow::Borrowed(s) => Self::from(s),
176+
Cow::Owned(s) => Self::from(s),
177+
}
178+
}
179+
}
180+
181+
impl<'a> AsRef<str> for CompactCowStr<'a> {
182+
#[inline]
183+
fn as_ref(&self) -> &str {
184+
self
185+
}
186+
}
187+
188+
impl<'a> Borrow<str> for CompactCowStr<'a> {
189+
#[inline]
190+
fn borrow(&self) -> &str {
191+
self
192+
}
193+
}
194+
195+
impl<'a> Default for CompactCowStr<'a> {
196+
#[inline]
197+
fn default() -> Self {
198+
Self::from("")
199+
}
200+
}
201+
202+
impl<'a> hash::Hash for CompactCowStr<'a> {
203+
#[inline]
204+
fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
205+
str::hash(self, hasher)
206+
}
207+
}
208+
209+
impl<'a, T: AsRef<str>> PartialEq<T> for CompactCowStr<'a> {
210+
#[inline]
211+
fn eq(&self, other: &T) -> bool {
212+
str::eq(self, other.as_ref())
213+
}
214+
}
215+
216+
impl<'a, T: AsRef<str>> PartialOrd<T> for CompactCowStr<'a> {
217+
#[inline]
218+
fn partial_cmp(&self, other: &T) -> Option<cmp::Ordering> {
219+
str::partial_cmp(self, other.as_ref())
220+
}
221+
}
222+
223+
impl<'a> Eq for CompactCowStr<'a> {}
224+
225+
impl<'a> Ord for CompactCowStr<'a> {
226+
#[inline]
227+
fn cmp(&self, other: &Self) -> cmp::Ordering {
228+
str::cmp(self, other)
229+
}
230+
}
231+
232+
impl<'a> fmt::Display for CompactCowStr<'a> {
233+
#[inline]
234+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
235+
str::fmt(self, formatter)
236+
}
237+
}
238+
239+
impl<'a> fmt::Debug for CompactCowStr<'a> {
240+
#[inline]
241+
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
242+
str::fmt(self, formatter)
243+
}
244+
}

src/lib.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ fn parse_border_spacing(_context: &ParserContext, input: &mut Parser)
8080

8181
pub use cssparser_macros::*;
8282

83-
pub use tokenizer::{Token, NumericValue, PercentageValue, SourceLocation};
83+
pub use tokenizer::{Token, SourceLocation};
8484
pub use rules_and_declarations::{parse_important};
8585
pub use rules_and_declarations::{DeclarationParser, DeclarationListParser, parse_one_declaration};
8686
pub use rules_and_declarations::{RuleListParser, parse_one_rule};
@@ -91,6 +91,7 @@ pub use nth::parse_nth;
9191
pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType};
9292
pub use parser::{Parser, Delimiter, Delimiters, SourcePosition, ParseError, BasicParseError, ParserInput};
9393
pub use unicode_range::UnicodeRange;
94+
pub use compact_cow_str::CompactCowStr;
9495

9596
// For macros
9697
#[doc(hidden)] pub use macros::_internal__to_lowercase;
@@ -116,6 +117,7 @@ mod color;
116117
mod nth;
117118
mod serializer;
118119
mod unicode_range;
120+
mod compact_cow_str;
119121

120-
#[cfg(test)]
121-
mod tests;
122+
#[cfg(test)] mod tests;
123+
#[cfg(test)] mod size_of_tests;

0 commit comments

Comments
 (0)