-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Cover all plural forms in NumberParser #5134
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Cover all plural forms in NumberParser #5134
Conversation
// This list is derived from https://www.unicode.org/cldr/charts/43/supplemental/language_plural_rules.html#comparison and includes | ||
// all unique numbers which we need to check in order to determine all the plural forms for a given locale. | ||
// TODO: add a link to the used script | ||
const pluralNumbers = [ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This list of values I've extracted from the table on https://www.unicode.org/cldr/charts/43/supplemental/language_plural_rules.html#comparison. Using the following script:
function getRange(row) {
let range = [];
let th = row.firstElementChild;
do {
const { textContent } = th;
let [start, end = start] = textContent.split('-');
if (start === '') {
range.push([]);
} else {
start = start.replace(/\.x$/, '.1');
end = end.replace(/\.x$/, '.1');
range.push([Number(start), Number(end)]);
}
} while ((th = th.nextElementSibling));
return range;
}
function getLocales(cell) {
let locales = [];
let element = cell.firstElementChild;
do {
if (element.tagName === 'SPAN') {
locales.push(element.title);
}
} while ((element = element.nextElementSibling));
return locales;
}
function getPlurals(tr, categories, range) {
const rules = Object.fromEntries(categories.map((key) => [key, []]));
let td = tr.firstElementChild;
let index = 1;
do {
const category = td.title;
let columns = td.hasAttribute('colspan')
? Number(td.getAttribute('colspan'))
: 1;
do {
rules[category].push(range[index]);
index++;
} while (columns-- > 1);
} while ((td = td.nextElementSibling));
return rules;
}
function extractTable(integerTable, fractionTable) {
function extract(table) {
const tbody = table.querySelector('tbody');
let tr = tbody.firstElementChild;
let current;
let results = {};
do {
if (tr.firstElementChild.tagName === 'TH') {
if (current) {
for (const language of current.languages) {
if (!results[language]) {
results[language] = Object.fromEntries(
Object.keys(current.rules).map((key) => [key, []])
);
}
for (let rule in current.rules) {
results[language][rule] = [].concat(
results[language][rule],
current.rules[rule]
);
}
}
}
current = {
range: [],
languages: [],
rules: {},
};
current.range = getRange(tr);
} else if (
tr.children[1] instanceof HTMLTableCellElement &&
tr.children[1].classList.contains('l')
) {
current.languages = getLocales(tr.children[1]);
} else {
const [lang] = current['languages'];
const { pluralCategories } = new Intl.PluralRules(
lang
).resolvedOptions();
current.rules = getPlurals(tr, pluralCategories, current.range);
}
} while ((tr = tr.nextElementSibling));
return results;
}
const integer = extract(integerTable);
const fraction = extract(fractionTable);
let values = new Set();
for (let language in integer) {
for (let rule in integer[language]) {
if (integer[language][rule].length > 1) {
values.add(integer[language][rule][0][0]);
}
}
}
for (let language in fraction) {
for (let rule in fraction[language]) {
if (fraction[language][rule].length > 1) {
values.add(fraction[language][rule][0][0]);
}
}
}
return Array.from(values);
}
const [integerTable, fractionTable] = document.querySelectorAll('.pluralComp');
let values = extractTable(integerTable, fractionTable);
The code itself is not written optimal but it works to get a list of values with which we would get all plural forms.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is wonderful, thank you for sharing this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add the script to our repo
#5141
// This list is derived from https://www.unicode.org/cldr/charts/43/supplemental/language_plural_rules.html#comparison and includes | ||
// all unique numbers which we need to check in order to determine all the plural forms for a given locale. | ||
// TODO: add a link to the used script | ||
const pluralNumbers = [ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is wonderful, thank you for sharing this.
Closes #5104
✅ Pull Request Checklist:
📝 Test Instructions:
This feature is testable by either looking up different forms of pluralisation in the https://www.unicode.org/cldr/charts/43/supplemental/language_plural_rules.html#comparison table. This can be done by updating the formatOptions on a NumberField component to
{ style: 'unit', type: 'year' }
for example.