Skip to content

Commit af1303c

Browse files
authored
Merge pull request #519 from Pazzaz/char-standard
Avoid surrogates when generating `char` using Standard distribution
2 parents b7b1176 + 02a7a11 commit af1303c

File tree

1 file changed

+14
-8
lines changed

1 file changed

+14
-8
lines changed

src/distributions/other.rs

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,21 @@ pub struct Alphanumeric;
4444
impl Distribution<char> for Standard {
4545
#[inline]
4646
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> char {
47-
let range = Uniform::new(0u32, 0x11_0000);
48-
loop {
49-
match char::from_u32(range.sample(rng)) {
50-
Some(c) => return c,
51-
// About 0.2% of numbers in the range 0..0x110000 are invalid
52-
// codepoints (surrogates).
53-
None => {}
54-
}
47+
// A valid `char` is either in the interval `[0, 0xD800)` or
48+
// `(0xDFFF, 0x11_0000)`. All `char`s must therefore be in
49+
// `[0, 0x11_0000)` but not in the "gap" `[0xD800, 0xDFFF]` which is
50+
// reserved for surrogates. This is the size of that gap.
51+
const GAP_SIZE: u32 = 0xDFFF - 0xD800 + 1;
52+
53+
// Uniform::new(0, 0x11_0000 - GAP_SIZE) can also be used but it
54+
// seemed slower.
55+
let range = Uniform::new(GAP_SIZE, 0x11_0000);
56+
57+
let mut n = range.sample(rng);
58+
if n <= 0xDFFF {
59+
n -= GAP_SIZE;
5560
}
61+
unsafe { char::from_u32_unchecked(n) }
5662
}
5763
}
5864

0 commit comments

Comments
 (0)