Skip to content

Commit 0c6fc68

Browse files
committed
use hyphens: manual
1 parent 058a430 commit 0c6fc68

File tree

7 files changed

+142
-14
lines changed

7 files changed

+142
-14
lines changed

Cargo.lock

Lines changed: 53 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ fs_extra = "1.1.0"
1616
regex = "1.3"
1717
sass-rs = "0.2.1"
1818
chrono = "0.4.13"
19+
kl-hyphenate = "0.7.2"

hyphenation-en-us.bincode

148 KB
Binary file not shown.

src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
mod blogs;
2+
mod markdown;
23
mod posts;
34

45
use crate::blogs::Blog;

src/markdown.rs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
use comrak::{
2+
nodes::{AstNode, NodeValue},
3+
Arena, ComrakExtensionOptions, ComrakOptions, ComrakRenderOptions,
4+
};
5+
use kl_hyphenate::{Hyphenator, Language, Load, Standard};
6+
use std::error::Error;
7+
8+
const SOFT_HYPHEN: char = '\u{00AD}';
9+
const HYPHENATION_DICTIONARY: &str = "hyphenation-en-us.bincode";
10+
11+
pub(crate) fn render(input: &str) -> Result<String, Box<dyn Error>> {
12+
let options = ComrakOptions {
13+
render: ComrakRenderOptions {
14+
unsafe_: true, // Allow rendering of raw HTML
15+
..ComrakRenderOptions::default()
16+
},
17+
extension: ComrakExtensionOptions {
18+
header_ids: Some(String::new()),
19+
..ComrakExtensionOptions::default()
20+
},
21+
..ComrakOptions::default()
22+
};
23+
24+
let hyphenator = Standard::from_path(Language::EnglishUS, HYPHENATION_DICTIONARY)?;
25+
26+
let arena = Arena::new();
27+
let ast = comrak::parse_document(&arena, input, &options);
28+
29+
hyphenate(&ast, &hyphenator);
30+
31+
let mut output = Vec::new();
32+
comrak::format_html(&ast, &options, &mut output)?;
33+
Ok(String::from_utf8(output)?)
34+
}
35+
36+
// Pre-compute points inside words where browsers can add hyphens during rendering.
37+
//
38+
// Support for the CSS rule `hyphens: auto`, which tells the browser to split words by adding
39+
// hyphens when there is no space left on the line, is quite low across browsers, preventing us
40+
// from using it on the blog.
41+
//
42+
// A widely supported alternative is the `hyphens: manual` rule, which moves the burden of deciding
43+
// *where* to break the word to the website. To properly use that rule, the website has to insert
44+
// the "soft hyphen" unicode character (U+00AD) in every position the browser is allowed to break
45+
// the word.
46+
//
47+
// The following piece of code walks through the Markdown AST adding those characters in every
48+
// suitable place, thanks to the kl-hyphenate library.
49+
50+
fn hyphenate<'a>(node: &'a AstNode<'a>, hyphenator: &Standard) {
51+
match &mut node.data.borrow_mut().value {
52+
NodeValue::Text(content) => {
53+
if let Ok(string) = std::str::from_utf8(&content) {
54+
let hyphenated = add_soft_hyphens(string, hyphenator);
55+
*content = hyphenated.as_bytes().to_vec();
56+
}
57+
}
58+
_ => {}
59+
}
60+
for child in node.children() {
61+
hyphenate(child, hyphenator);
62+
}
63+
}
64+
65+
fn add_soft_hyphens(content: &str, hyphenator: &Standard) -> String {
66+
let mut output = String::with_capacity(content.len());
67+
for (i, word) in content.split(' ').enumerate() {
68+
if i != 0 {
69+
output.push(' ');
70+
}
71+
let hyphenated = hyphenator.hyphenate(word);
72+
for (j, segment) in hyphenated.into_iter().segments().enumerate() {
73+
if j != 0 {
74+
output.push(SOFT_HYPHEN);
75+
}
76+
output.push_str(&segment);
77+
}
78+
}
79+
output
80+
}

src/posts.rs

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
use crate::blogs::Manifest;
2-
use comrak::{ComrakExtensionOptions, ComrakOptions, ComrakRenderOptions};
32
use regex::Regex;
43
use serde_derive::{Deserialize, Serialize};
54
use std::error::Error;
@@ -63,19 +62,7 @@ impl Post {
6362
layout,
6463
} = serde_yaml::from_str(yaml)?;
6564
// next, the contents. we add + to get rid of the final "---\n\n"
66-
let options = ComrakOptions {
67-
render: ComrakRenderOptions {
68-
unsafe_: true, // Allow rendering of raw HTML
69-
..ComrakRenderOptions::default()
70-
},
71-
extension: ComrakExtensionOptions {
72-
header_ids: Some(String::new()),
73-
..ComrakExtensionOptions::default()
74-
},
75-
..ComrakOptions::default()
76-
};
77-
78-
let contents = comrak::markdown_to_html(&contents[end_of_yaml + 5..], &options);
65+
let contents = crate::markdown::render(&contents[end_of_yaml + 5..])?;
7966

8067
// finally, the url.
8168
let mut url = PathBuf::from(&*filename);

src/styles/app.scss

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,12 @@ blockquote {
5656

5757
p {
5858
text-align: justify;
59+
60+
/* Use manual hyphenation, as automatic hyphenation is not widely
61+
* supported (Chrome doesn't implement it on all platforms). */
62+
-webkit-hyphens: manual;
63+
-ms-hyphens: manual;
64+
hyphens: manual;
5965
}
6066

6167
code {

0 commit comments

Comments
 (0)