Skip to content

chinese search support #1496

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ tokio = { version = "0.2.18", features = ["macros"], optional = true }
warp = { version = "0.2.2", default-features = false, features = ["websocket"], optional = true }

# Search feature
elasticlunr-rs = { version = "2.3", optional = true, default-features = false }
elasticlunr-rs = { version = "2.3", optional = true }
ammonia = { version = "3", optional = true }

[dev-dependencies]
Expand Down
7 changes: 6 additions & 1 deletion src/renderer/html_handlebars/hbs_renderer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,12 @@ impl Renderer for HtmlHandlebars {
{
let search = html_config.search.unwrap_or_default();
if search.enable {
super::search::create_files(&search, &destination, &book)?;
super::search::create_files(
&search,
&ctx.config.book.language,
&destination,
&book,
)?;
}
}

Expand Down
20 changes: 18 additions & 2 deletions src/renderer/html_handlebars/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,23 @@ use crate::theme::searcher;
use crate::utils;

/// Creates all files required for search.
pub fn create_files(search_config: &Search, destination: &Path, book: &Book) -> Result<()> {
let mut index = Index::new(&["title", "body", "breadcrumbs"]);
pub fn create_files(
search_config: &Search,
lang: &Option<String>,
destination: &Path,
book: &Book,
) -> Result<()> {
let mut index = match lang {
Some(lang_str) => match lang_str.to_lowercase().as_str() {
"zh" => Index::with_language(
elasticlunr::Language::Chinese,
&["title", "body", "breadcrumbs"],
),
_ => Index::new(&["title", "body", "breadcrumbs"]),
},
None => Index::new(&["title", "body", "breadcrumbs"]),
};

let mut doc_urls = Vec::with_capacity(book.sections.len());

for item in book.iter() {
Expand All @@ -36,6 +51,7 @@ pub fn create_files(search_config: &Search, destination: &Path, book: &Book) ->
utils::fs::write_file(destination, "searcher.js", searcher::JS)?;
utils::fs::write_file(destination, "mark.min.js", searcher::MARK_JS)?;
utils::fs::write_file(destination, "elasticlunr.min.js", searcher::ELASTICLUNR_JS)?;
utils::fs::write_file(destination, "lunr.zh.js", searcher::LUNR_ZH_JS)?;
debug!("Copying search files ✓");
}

Expand Down
1 change: 1 addition & 0 deletions src/theme/index.hbs
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@

{{#if search_js}}
<script src="{{ path_to_root }}elasticlunr.min.js" type="text/javascript" charset="utf-8"></script>
<script src="{{ path_to_root }}lunr.zh.js" type="text/javascript" charset="utf-8"></script>
<script src="{{ path_to_root }}mark.min.js" type="text/javascript" charset="utf-8"></script>
<script src="{{ path_to_root }}searcher.js" type="text/javascript" charset="utf-8"></script>
{{/if}}
Expand Down
89 changes: 89 additions & 0 deletions src/theme/searcher/lunr.zh.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
(function (root, factory) {
if (typeof define === 'function' && define.amd) {
// AMD. Register as an anonymous module.
define(factory)
} else if (typeof exports === 'object') {
/**
* Node. Does not work with strict CommonJS, but
* only CommonJS-like environments that support module.exports,
* like Node.
*/
module.exports = factory()
} else {
// Browser globals (root is window)
factory()(root.lunr);
}
}(this, function () {
return function (lunr) {
if ('undefined' === typeof lunr) {
throw new Error('Lunr is not present. Please include / require Lunr before this script.');
}

/* register specific locale function */
lunr.zh = function () {
this.pipeline.reset();
this.pipeline.add(
lunr.zh.trimmer,
lunr.zh.stopWordFilter,
lunr.zh.stemmer
);

// for lunr version 2
// this is necessary so that every searched word is also stemmed before
// in lunr <= 1 this is not needed, as it is done using the normal pipeline
if (this.searchPipeline) {
this.searchPipeline.reset();
this.searchPipeline.add(lunr.zh.stemmer)
}
};

lunr.zh.tokenizer = function (str) {
if (!arguments.length || str === null || str === undefined) return [];
if (Array.isArray(str)) {
var arr = str.filter(function (token) {
if (token === null || token === undefined) {
return false;
}

return true;
});

arr = arr.map(function (t) {
return lunr.utils.toString(t);
});

var out = [];
arr.forEach(function (item) {
var tokens = item.split(lunr.tokenizer.seperator);
out = out.concat(tokens);
}, this);

return out;
}

return str.toString().trim().split(lunr.tokenizer.seperator);
};


/* lunr trimmer function */
lunr.zh.trimmer = function (_token) {
return _token;
}

lunr.Pipeline.registerFunction(lunr.zh.trimmer, 'trimmer-zh');

/* lunr stemmer function */
lunr.zh.stemmer = (function () {
/* and return a function that stems a word for the current locale */
return function (token) {
return token;
}
})();
lunr.Pipeline.registerFunction(lunr.zh.stemmer, 'stemmer-zh');

lunr.zh.stopWordFilter = function (token) {
return token;
};
lunr.Pipeline.registerFunction(lunr.zh.stopWordFilter, 'stopWordFilter-zh');
};
}))
1 change: 1 addition & 0 deletions src/theme/searcher/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
pub static JS: &[u8] = include_bytes!("searcher.js");
pub static MARK_JS: &[u8] = include_bytes!("mark.min.js");
pub static ELASTICLUNR_JS: &[u8] = include_bytes!("elasticlunr.min.js");
pub static LUNR_ZH_JS: &[u8] = include_bytes!("lunr.zh.js");
33 changes: 18 additions & 15 deletions src/theme/searcher/searcher.js
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ window.search = window.search || {};
path: a.pathname.replace(/^([^/])/,'/$1')
};
}

// Helper to recreate a url string from its building blocks.
function renderURL(urlobject) {
var url = urlobject.protocol + "://" + urlobject.host;
Expand Down Expand Up @@ -124,7 +124,7 @@ window.search = window.search || {};
return s.replace(/[&<>'"]/g, repl);
};
})();

function formatSearchMetric(count, searchterm) {
if (count == 1) {
return count + " search result for '" + searchterm + "':";
Expand All @@ -134,7 +134,7 @@ window.search = window.search || {};
return count + " search results for '" + searchterm + "':";
}
}

function formatSearchResult(result, searchterms) {
var teaser = makeTeaser(escapeHTML(result.doc.body), searchterms);
teaser_count++;
Expand All @@ -152,10 +152,10 @@ window.search = window.search || {};

return '<a href="' + path_to_root + url[0] + '?' + URL_MARK_PARAM + '=' + searchterms + '#' + url[1]
+ '" aria-details="teaser_' + teaser_count + '">' + result.doc.breadcrumbs + '</a>'
+ '<span class="teaser" id="teaser_' + teaser_count + '" aria-label="Search Result Teaser">'
+ '<span class="teaser" id="teaser_' + teaser_count + '" aria-label="Search Result Teaser">'
+ teaser + '</span>';
}

function makeTeaser(body, searchterms) {
// The strategy is as follows:
// First, assign a value to each word in the document:
Expand Down Expand Up @@ -257,6 +257,9 @@ window.search = window.search || {};
search_options = config.search_options;
searchbar_outer = config.searchbar_outer;
doc_urls = config.doc_urls;
if (config.index.lang == "Chinese") {
elasticlunr.tokenizer = elasticlunr.zh.tokenizer
}
searchindex = elasticlunr.Index.load(config.index);

// Set up events
Expand All @@ -271,7 +274,7 @@ window.search = window.search || {};
// If reloaded, do the search or mark again, depending on the current url parameters
doSearchOrMarkFromUrl();
}

function unfocusSearchbar() {
// hacky, but just focusing a div only works once
var tmp = document.createElement('input');
Expand All @@ -280,7 +283,7 @@ window.search = window.search || {};
tmp.focus();
tmp.remove();
}

// On reload or browser history backwards/forwards events, parse the url and do search or mark
function doSearchOrMarkFromUrl() {
// Check current URL for search request
Expand Down Expand Up @@ -313,7 +316,7 @@ window.search = window.search || {};
}
}
}

// Eventhandler for keyevents on `document`
function globalKeyHandler(e) {
if (e.altKey || e.ctrlKey || e.metaKey || e.shiftKey || e.target.type === 'textarea' || e.target.type === 'text') { return; }
Expand All @@ -338,8 +341,8 @@ window.search = window.search || {};
unfocusSearchbar();
searchresults.firstElementChild.classList.add("focus");
} else if (!hasFocus() && (e.keyCode === DOWN_KEYCODE
|| e.keyCode === UP_KEYCODE
|| e.keyCode === SELECT_KEYCODE)) {
|| e.keyCode === UP_KEYCODE
|| e.keyCode === SELECT_KEYCODE)) {
// not `:focus` because browser does annoying scrolling
var focused = searchresults.querySelector("li.focus");
if (!focused) return;
Expand All @@ -363,7 +366,7 @@ window.search = window.search || {};
}
}
}

function showSearch(yes) {
if (yes) {
search_wrap.classList.remove('hidden');
Expand Down Expand Up @@ -396,7 +399,7 @@ window.search = window.search || {};
showSearch(false);
}
}

// Eventhandler for keyevents while the searchbar is focused
function searchbarKeyUpHandler() {
var searchterm = searchbar.value.trim();
Expand All @@ -414,7 +417,7 @@ window.search = window.search || {};
// Remove marks
marker.unmark();
}

// Update current url with ?URL_SEARCH_PARAM= parameter, remove ?URL_MARK_PARAM and #heading-anchor .
// `action` can be one of "push", "replace", "push_if_new_search_else_replace"
// and replaces or pushes a new browser history item.
Expand All @@ -439,7 +442,7 @@ window.search = window.search || {};
history.replaceState({}, document.title, renderURL(url));
}
}

function doSearch(searchterm) {

// Don't search the same twice
Expand Down Expand Up @@ -470,7 +473,7 @@ window.search = window.search || {};

fetch(path_to_root + 'searchindex.json')
.then(response => response.json())
.then(json => init(json))
.then(json => init(json))
.catch(error => { // Try to load searchindex.js if fetch failed
var script = document.createElement('script');
script.src = path_to_root + 'searchindex.js';
Expand Down