diff --git a/README.md b/README.md index 983305d..a69d47b 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # ovos-date-parser `ovos-date-parser` is a comprehensive library for multilingual date and time parsing, extraction, and formatting, -designed to handle a range of human-readable date, time, and duration expressions. +designed to handle a range of human-readable date, time, and duration expressions. ## Features @@ -74,29 +74,61 @@ print(relative_time) # "tomorrow" ### Languages Supported `ovos-date-parser` supports a wide array of languages, each with its own set of methods for handling natural language -time expressions. Available methods include `extract_datetime`, `extract_duration`, `nice_time`, and `nice_duration` for -the following languages: - -| Language | `nice_time` | `nice_relative_time` | `nice_duration` | `extract_duration` | `extract_datetime` | -|----------|-------------|----------------------|-----------------|--------------------|--------------------| -| az | ✅ | ❌ | ✅ | ✅ | ✅ | -| ca | ✅ | ❌ | ❌ | ❌ | ✅ | -| cs | ✅ | ❌ | ❌ | ✅ | ✅ | -| da | ✅ | ❌ | ❌ | ❌ | ✅ | -| de | ✅ | ❌ | ❌ | ✅ | ✅ | -| en | ✅ | ❌ | ❌ | ✅ | ✅ | -| es | ✅ | ❌ | ❌ | ✅ | ✅ | -| eu | ✅ | ✅ | ❌ | ❌ | ✅ | -| fa | ✅ | ❌ | ❌ | ✅ | ✅ | -| fr | ✅ | ❌ | ❌ | ❌ | ✅ | -| hu | ✅ | ❌ | ❌ | ❌ | ❌ | -| it | ✅ | ❌ | ❌ | ❌ | ✅ | -| nl | ✅ | ❌ | ❌ | ✅ | ✅ | -| pl | ✅ | ❌ | ✅ | ✅ | ✅ | -| pt | ✅ | ❌ | ❌ | ✅ | ✅ | -| ru | ✅ | ❌ | ✅ | ✅ | ✅ | -| sv | ✅ | ❌ | ❌ | ✅ | ✅ | -| uk | ✅ | ❌ | ✅ | ✅ | ✅ | +time expressions. + +Parse + +| Language | `extract_duration` | `extract_datetime` | +|----------|--------------------|--------------------| +| az | ✅ | ✅ | +| ca | ❌ | ✅ | +| cs | ✅ | ✅ | +| da | ❌ | ✅ | +| de | ✅ | ✅ | +| en | ✅ | ✅ | +| es | ✅ | ✅ | +| eu | ❌ | ✅ | +| fa | ✅ | ✅ | +| fr | ❌ | ✅ | +| hu | ❌ | ❌ | +| it | ❌ | ✅ | +| nl | ✅ | ✅ | +| pl | ✅ | ✅ | +| pt | ✅ | ✅ | +| ru | ✅ | ✅ | +| sv | ✅ | ✅ | +| uk | ✅ | ✅ | + +Format + +| Language | `nice_date`
`nice_date_time`
`nice_day`
`nice_weekday`
`nice_month`
`nice_year`
`get_date_strings` | `nice_time` | `nice_relative_time` | `nice_duration` | +|----------|--------------------------------------------------------------------------------------------------------------------------|-------------|----------------------|-----------------| +| az | ✅ | ✅ | ❌ | ✅ | +| ca | ✅ | ✅ | ❌ | ❌ | +| cs | ✅ | ✅ | ❌ | ❌ | +| da | ✅ | ✅ | ❌ | ❌ | +| de | ✅ | ✅ | ❌ | ❌ | +| en | ✅ | ✅ | ❌ | ❌ | +| es | ❌ | ✅ | ❌ | ❌ | +| eu | ✅ | ✅ | ✅ | ❌ | +| fa | ✅ | ✅ | ❌ | ❌ | +| fr | ✅ | ✅ | ❌ | ❌ | +| hu | ✅ | ✅ | ❌ | ❌ | +| it | ✅ | ✅ | ❌ | ❌ | +| nl | ✅ | ✅ | ❌ | ❌ | +| pl | ✅ | ✅ | ❌ | ✅ | +| pt | ❌ | ✅ | ❌ | ❌ | +| ru | ✅ | ✅ | ❌ | ✅ | +| sv | ✅ | ✅ | ❌ | ❌ | +| sl | ✅ | ❌ | ❌ | ❌ | +| uk | ✅ | ✅ | ❌ | ✅ | + +## Related Projects + +- [ovos-number-parser](https://github.com/OpenVoiceOS/ovos-number-parser) - for handling numbers +- [ovos-lang-parser](https://github.com/OVOSHatchery/ovos-lang-parser) - for handling languages +- [ovos-color-parser](https://github.com/OVOSHatchery/ovos-color-parser) - for handling colors + ## License diff --git a/ovos_date_parser/__init__.py b/ovos_date_parser/__init__.py index 7b99d81..65af7e0 100644 --- a/ovos_date_parser/__init__.py +++ b/ovos_date_parser/__init__.py @@ -1,6 +1,12 @@ +import json +import os +import re +from collections import namedtuple from datetime import datetime, timedelta, time from typing import Optional, Tuple, Union +from ovos_utils.lang import standardize_lang_tag + from ovos_date_parser.dates_az import ( extract_datetime_az, extract_duration_az, @@ -87,7 +93,7 @@ extract_datetime_uk, extract_duration_uk, nice_time_uk, -nice_duration_uk + nice_duration_uk ) @@ -286,3 +292,311 @@ def extract_datetime( if lang.startswith("uk"): return extract_datetime_uk(text, anchorDate=anchorDate, default_time=default_time) raise NotImplementedError(f"Unsupported language: {lang}") + + +NUMBER_TUPLE = namedtuple( + 'number', + ('x, xx, x0, x_in_x0, xxx, x00, x_in_x00, xx00, xx_in_xx00, x000, ' + + 'x_in_x000, x0_in_x000, x_in_0x00')) + + +class DateTimeFormat: + def __init__(self, config_path): + self.lang_config = {} + self.config_path = config_path + + def cache(self, lang): + # TODO - find closest lang code + if lang not in self.lang_config: + try: + # Attempt to load the language-specific formatting data + with open(self.config_path + '/' + lang + '/date_time.json', + 'r', encoding='utf8') as lang_config_file: + self.lang_config[lang] = json.loads( + lang_config_file.read()) + except FileNotFoundError: + # Fallback to English formatting + with open(self.config_path + '/en-us/date_time.json', + 'r') as lang_config_file: + self.lang_config[lang] = json.loads( + lang_config_file.read()) + + for x in ['decade_format', 'hundreds_format', 'thousand_format', + 'year_format']: + i = 1 + while self.lang_config[lang][x].get(str(i)): + self.lang_config[lang][x][str(i)]['re'] = ( + re.compile(self.lang_config[lang][x][str(i)]['match'] + )) + i = i + 1 + + def _number_strings(self, number, lang): + x = (self.lang_config[lang]['number'].get(str(number % 10)) or + str(number % 10)) + xx = (self.lang_config[lang]['number'].get(str(number % 100)) or + str(number % 100)) + x_in_x0 = self.lang_config[lang]['number'].get( + str(int(number % 100 / 10))) or str(int(number % 100 / 10)) + x0 = (self.lang_config[lang]['number'].get( + str(int(number % 100 / 10) * 10)) or + str(int(number % 100 / 10) * 10)) + xxx = (self.lang_config[lang]['number'].get(str(number % 1000)) or + str(number % 1000)) + x00 = (self.lang_config[lang]['number'].get(str(int( + number % 1000 / 100) * 100)) or + str(int(number % 1000 / 100) * 100)) + x_in_x00 = self.lang_config[lang]['number'].get(str(int( + number % 1000 / 100))) or str(int(number % 1000 / 100)) + xx00 = self.lang_config[lang]['number'].get(str(int( + number % 10000 / 100) * 100)) or str(int(number % 10000 / 100) * + 100) + xx_in_xx00 = self.lang_config[lang]['number'].get(str(int( + number % 10000 / 100))) or str(int(number % 10000 / 100)) + x000 = (self.lang_config[lang]['number'].get(str(int( + number % 10000 / 1000) * 1000)) or + str(int(number % 10000 / 1000) * 1000)) + x_in_x000 = self.lang_config[lang]['number'].get(str(int( + number % 10000 / 1000))) or str(int(number % 10000 / 1000)) + x0_in_x000 = self.lang_config[lang]['number'].get(str(int( + number % 10000 / 1000) * 10)) or str(int(number % 10000 / 1000) * 10) + x_in_0x00 = self.lang_config[lang]['number'].get(str(int( + number % 1000 / 100)) or str(int(number % 1000 / 100))) + + return NUMBER_TUPLE( + x, xx, x0, x_in_x0, xxx, x00, x_in_x00, xx00, xx_in_xx00, x000, + x_in_x000, x0_in_x000, x_in_0x00) + + def _format_string(self, number, format_section, lang): + s = self.lang_config[lang][format_section]['default'] + i = 1 + while self.lang_config[lang][format_section].get(str(i)): + e = self.lang_config[lang][format_section][str(i)] + if e['re'].match(str(number)): + return e['format'] + i = i + 1 + return s + + def _decade_format(self, number, number_tuple, lang): + s = self._format_string(number % 100, 'decade_format', lang) + decade = s.format(x=number_tuple.x, xx=number_tuple.xx, + x0=number_tuple.x0, x_in_x0=number_tuple.x_in_x0, + number=str(number % 100)) + return s.format(x=number_tuple.x, xx=number_tuple.xx, + x0=number_tuple.x0, x_in_x0=number_tuple.x_in_x0, + number=str(number % 100)) + + def _number_format_hundreds(self, number, number_tuple, lang, + formatted_decade): + s = self._format_string(number % 1000, 'hundreds_format', lang) + hundreds = s.format(xxx=number_tuple.xxx, x00=number_tuple.x00, + x_in_x00=number_tuple.x_in_x00, + formatted_decade=formatted_decade, + number=str(number % 1000)) + return s.format(xxx=number_tuple.xxx, x00=number_tuple.x00, + x_in_x00=number_tuple.x_in_x00, + formatted_decade=formatted_decade, + number=str(number % 1000)) + + def _number_format_thousand(self, number, number_tuple, lang, + formatted_decade, formatted_hundreds): + s = self._format_string(number % 10000, 'thousand_format', lang) + return s.format(x_in_x00=number_tuple.x_in_x00, + xx00=number_tuple.xx00, + xx_in_xx00=number_tuple.xx_in_xx00, + x000=number_tuple.x000, + x_in_x000=number_tuple.x_in_x000, + x0_in_x000=number_tuple.x0_in_x000, + x_in_0x00=number_tuple.x_in_0x00, + formatted_decade=formatted_decade, + formatted_hundreds=formatted_hundreds, + number=str(number % 10000)) + + def date_format(self, dt, lang, now): + format_str = 'date_full' + if now: + if dt.year == now.year: + format_str = 'date_full_no_year' + if dt.month == now.month and dt.day > now.day: + format_str = 'date_full_no_year_month' + + tomorrow = now + datetime.timedelta(days=1) + yesterday = now - datetime.timedelta(days=1) + if tomorrow.date() == dt.date(): + format_str = 'tomorrow' + elif now.date() == dt.date(): + format_str = 'today' + elif yesterday.date() == dt.date(): + format_str = 'yesterday' + + return self.lang_config[lang]['date_format'][format_str].format( + weekday=self.lang_config[lang]['weekday'][str(dt.weekday())], + month=self.lang_config[lang]['month'][str(dt.month)], + day=self.lang_config[lang]['date'][str(dt.day)], + formatted_year=self.year_format(dt, lang, False)) + + def date_time_format(self, dt, lang, now, use_24hour, use_ampm): + date_str = self.date_format(dt, lang, now) + time_str = nice_time(dt, lang, use_24hour=use_24hour, + use_ampm=use_ampm) + return self.lang_config[lang]['date_time_format']['date_time'].format( + formatted_date=date_str, formatted_time=time_str) + + def year_format(self, dt, lang, bc): + number_tuple = self._number_strings(dt.year, lang) + formatted_bc = ( + self.lang_config[lang]['year_format']['bc'] if bc else '') + formatted_decade = self._decade_format( + dt.year, number_tuple, lang) + formatted_hundreds = self._number_format_hundreds( + dt.year, number_tuple, lang, formatted_decade) + formatted_thousand = self._number_format_thousand( + dt.year, number_tuple, lang, formatted_decade, formatted_hundreds) + + s = self._format_string(dt.year, 'year_format', lang) + return re.sub(' +', ' ', + s.format( + year=str(dt.year), + century=str(int(dt.year / 100)), + decade=str(dt.year % 100), + formatted_hundreds=formatted_hundreds, + formatted_decade=formatted_decade, + formatted_thousand=formatted_thousand, + bc=formatted_bc)).strip() + + +date_time_format = DateTimeFormat(os.path.join(os.path.dirname(__file__), 'res')) + + +def nice_date(dt, lang, now=None): + """ + Format a datetime to a pronounceable date + + For example, generates 'tuesday, june the fifth, 2018' + + Args: + dt (datetime): date to format (assumes already in local timezone) + lang (str, optional): an optional BCP-47 language code, if omitted + the default language will be used. + now (datetime): Current date. If provided, the returned date for speech + will be shortened accordingly: No year is returned if now is in the + same year as td, no month is returned if now is in the same month + as td. If now and td is the same day, 'today' is returned. + + Returns: + (str): The formatted date string + """ + full_code = standardize_lang_tag(lang) + date_time_format.cache(full_code) + + return date_time_format.date_format(dt, full_code, now) + + +def nice_date_time(dt, lang, now=None, use_24hour=False, + use_ampm=False): + """ + Format a datetime to a pronounceable date and time + + For example, generate 'tuesday, june the fifth, 2018 at five thirty' + + Args: + dt (datetime): date to format (assumes already in local timezone) + lang (str, optional): an optional BCP-47 language code, if omitted + the default language will be used. + now (datetime): Current date. If provided, the returned date for + speech will be shortened accordingly: No year is returned if + now is in the same year as td, no month is returned if now is + in the same month as td. If now and td is the same day, 'today' + is returned. + use_24hour (bool): output in 24-hour/military or 12-hour format + use_ampm (bool): include the am/pm for 12-hour format + Returns: + (str): The formatted date time string + """ + + full_code = standardize_lang_tag(lang) + date_time_format.cache(full_code) + + return date_time_format.date_time_format(dt, full_code, now, use_24hour, + use_ampm) + + +def nice_day(dt, lang, date_format='DMY', include_month=True): + if include_month: + month = nice_month(dt, date_format, lang) + if date_format == 'MDY': + return "{} {}".format(month, dt.strftime("%d")) + else: + return "{} {}".format(dt.strftime("%d"), month) + return dt.strftime("%d") + + +def nice_weekday(dt, lang): + full_code = standardize_lang_tag(lang) + date_time_format.cache(full_code) + + if full_code in date_time_format.lang_config.keys(): + localized_day_names = list( + date_time_format.lang_config[lang]['weekday'].values()) + weekday = localized_day_names[dt.weekday()] + else: + weekday = dt.strftime("%A") + return weekday.capitalize() + + +def nice_month(dt, lang, date_format='MDY'): + full_code = standardize_lang_tag(lang) + date_time_format.cache(full_code) + + if full_code in date_time_format.lang_config.keys(): + localized_month_names = date_time_format.lang_config[lang]['month'] + month = localized_month_names[str(int(dt.strftime("%m")))] + else: + month = dt.strftime("%B") + return month.capitalize() + + +def nice_year(dt, lang, bc=False): + """ + Format a datetime to a pronounceable year + + For example, generate 'nineteen-hundred and eighty-four' for year 1984 + + Args: + dt (datetime): date to format (assumes already in local timezone) + lang (str, optional): an optional BCP-47 language code, if omitted + the default language will be used. + bc (bool) pust B.C. after the year (python does not support dates + B.C. in datetime) + Returns: + (str): The formatted year string + """ + + full_code = standardize_lang_tag(lang) + date_time_format.cache(full_code) + return date_time_format.year_format(dt, full_code, bc) + + +def get_date_strings(dt, lang, date_format='MDY', time_format="full"): + lang = standardize_lang_tag(lang) + timestr = nice_time(dt, lang, speech=False, + use_24hour=time_format == "full") + monthstr = nice_month(dt, date_format, lang) + weekdaystr = nice_weekday(dt, lang) + yearstr = dt.strftime("%Y") + daystr = nice_day(dt, date_format, include_month=False, lang=lang) + if date_format == 'MDY': + dtstr = dt.strftime("%-m/%-d/%Y") + elif date_format == 'DMY': + dtstr = dt.strftime("%d/%-m/%-Y") + elif date_format == 'YMD': + dtstr = dt.strftime("%Y/%-m/%-d") + else: + raise ValueError("invalid date_format") + return { + "date_string": dtstr, + "time_string": timestr, + "month_string": monthstr, + "day_string": daystr, + 'year_string': yearstr, + "weekday_string": weekdaystr + } diff --git a/ovos_date_parser/dates_az.py b/ovos_date_parser/dates_az.py index 6fd7cb4..8e8884d 100644 --- a/ovos_date_parser/dates_az.py +++ b/ovos_date_parser/dates_az.py @@ -2,7 +2,7 @@ from datetime import datetime, timedelta from dateutil.relativedelta import relativedelta -from ovos_number_parser.numbers_az import pronounce_number_az, extract_number_az, _convert_words_to_numbers_az +from ovos_number_parser.numbers_az import pronounce_number_az, extract_number_az, numbers_to_digits_az from ovos_number_parser.util import is_numeric from ovos_utils.time import now_local @@ -263,7 +263,7 @@ def extract_duration_az(text): } pattern = r"(?P\d+(?:\.?\d+)?)(?:\s+|\-){unit}?(?:yə|a|ə)?(?:(?:\s|,)+)?(?Pyarım|0\.5)?(?:a)?" - text = _convert_words_to_numbers_az(text) + text = numbers_to_digits_az(text) for unit_az in time_units_az: unit_pattern = pattern.format(unit=unit_az) @@ -312,7 +312,7 @@ def extract_datetime_az(text, anchorDate=None, default_time=None): def clean_string(s, word_list): # normalize and lowercase utt (replaces words with numbers) - s = _convert_words_to_numbers_az(s, ordinals=None) + s = numbers_to_digits_az(s, ordinals=None) # clean unneeded punctuation and capitalization among other things. s = s.lower().replace('?', '').replace('.', '').replace(',', '') diff --git a/ovos_date_parser/dates_cs.py b/ovos_date_parser/dates_cs.py index fa296c5..08d08f2 100644 --- a/ovos_date_parser/dates_cs.py +++ b/ovos_date_parser/dates_cs.py @@ -3,7 +3,7 @@ from dateutil.relativedelta import relativedelta from ovos_number_parser.numbers_cs import pronounce_number_cs, _ORDINAL_BASE_CS, extract_number_cs, \ - _convert_words_to_numbers_cs + numbers_to_digits_cs from ovos_number_parser.util import is_numeric from ovos_utils.time import now_local @@ -175,7 +175,7 @@ def extract_duration_cs(text): } pattern = r"(?P\d+(?:\.?\d+)?)(?:\s+|\-){unit}[ay]?" - text = _convert_words_to_numbers_cs(text) + text = numbers_to_digits_cs(text) for (unit_cs, unit_en) in _TIME_UNITS_CONVERSION.items(): unit_pattern = pattern.format(unit=unit_cs) diff --git a/ovos_date_parser/dates_de.py b/ovos_date_parser/dates_de.py index 26c2408..e3f69c8 100644 --- a/ovos_date_parser/dates_de.py +++ b/ovos_date_parser/dates_de.py @@ -3,7 +3,7 @@ from dateutil.relativedelta import relativedelta from ovos_number_parser.numbers_de import pronounce_number_de, _get_ordinal_index, is_number_de, is_numeric_de, \ - _convert_words_to_numbers_de + numbers_to_digits_de from ovos_utils.time import now_local @@ -110,7 +110,7 @@ def extract_duration_de(text): # Einzahl und Mehrzahl pattern = r"(?:^|\s)(?P\d+(?:[.,]?\d+)?\b)(?:\s+|\-)(?P{unit}[nes]?[sn]?\b)" - text = _convert_words_to_numbers_de(text) + text = numbers_to_digits_de(text) for (unit_en, unit_de) in time_units.items(): unit_pattern = pattern.format( @@ -140,7 +140,7 @@ def clean_string(s): for 12 hour date format """ - s = _convert_words_to_numbers_de(s) + s = numbers_to_digits_de(s) s = s.lower().replace('?', '').replace(' der ', ' ').replace(' den ', ' ') \ .replace(' an ', ' ').replace(' am ', ' ').replace(' auf ', ' ') \ .replace(' um ', ' ') diff --git a/ovos_date_parser/dates_en.py b/ovos_date_parser/dates_en.py index a3766f0..da76009 100644 --- a/ovos_date_parser/dates_en.py +++ b/ovos_date_parser/dates_en.py @@ -2,7 +2,7 @@ from datetime import datetime, timedelta, time from dateutil.relativedelta import relativedelta -from ovos_number_parser.numbers_en import extract_number_en, _convert_words_to_numbers_en, pronounce_number_en +from ovos_number_parser.numbers_en import extract_number_en, numbers_to_digits_en, pronounce_number_en from ovos_number_parser.util import is_numeric from ovos_utils.time import now_local, DAYS_IN_1_YEAR, DAYS_IN_1_MONTH @@ -134,7 +134,7 @@ def extract_duration_en(text): list(time_units.keys()) pattern = r"(?P\d+(?:\.?\d+)?)(?:\s+|\-){unit}s?" - text = _convert_words_to_numbers_en(text) + text = numbers_to_digits_en(text) text = text.replace("centuries", "century").replace("millenia", "millennium") for word in ('day', 'month', 'year', 'decade', 'century', 'millennium'): text = text.replace(f'a {word}', f'1 {word}') @@ -205,7 +205,7 @@ def extract_datetime_en(text, anchorDate=None, default_time=None): def clean_string(s): # normalize and lowercase utt (replaces words with numbers) - s = _convert_words_to_numbers_en(s, ordinals=None) + s = numbers_to_digits_en(s, ordinals=None) # clean unneeded punctuation and capitalization among other things. s = s.lower().replace('?', '').replace(',', '') \ .replace(' the ', ' ').replace(' a ', ' ').replace(' an ', ' ') \ diff --git a/ovos_date_parser/dates_es.py b/ovos_date_parser/dates_es.py index 76d1227..dfe0611 100644 --- a/ovos_date_parser/dates_es.py +++ b/ovos_date_parser/dates_es.py @@ -2,8 +2,7 @@ from datetime import datetime, timedelta from dateutil.relativedelta import relativedelta -from ovos_number_parser.numbers_es import pronounce_number_es -from ovos_number_parser.util import tokenize +from ovos_number_parser.numbers_es import pronounce_number_es, numbers_to_digits_es from ovos_utils.time import now_local, DAYS_IN_1_YEAR, DAYS_IN_1_MONTH @@ -964,28 +963,3 @@ def repl_non_std(match): duration = timedelta(**time_units) if any(time_units.values()) else None return (duration, text) - - -def numbers_to_digits_es(utterance: str) -> str: - """ - Replace written numbers in a Spanish text with their digit equivalents. - - Args: - utterance (str): Input string possibly containing written numbers. - - Returns: - str: Text with written numbers replaced by digits. - """ # TODO - standardize in ovos-number-parser - number_replacements = { - "uno": "1", "dos": "2", "tres": "3", "cuatro": "4", - "cinco": "5", "seis": "6", "siete": "7", "ocho": "8", "nueve": "9", - "diez": "10", "once": "11", "doce": "12", "trece": "13", "catorce": "14", - "quince": "15", "dieciséis": "16", "diecisiete": "17", "dieciocho": "18", - "diecinueve": "19", "veinte": "20" - # Extend this dictionary for higher numbers as needed - } - words = tokenize(utterance) - for idx, word in enumerate(words): - if word in number_replacements: - words[idx] = number_replacements[word] - return " ".join(words) diff --git a/ovos_date_parser/dates_nl.py b/ovos_date_parser/dates_nl.py index 06f25bf..47a4528 100644 --- a/ovos_date_parser/dates_nl.py +++ b/ovos_date_parser/dates_nl.py @@ -2,7 +2,7 @@ import re from dateutil.relativedelta import relativedelta -from ovos_number_parser.numbers_nl import pronounce_number_nl, extract_number_nl, _convert_words_to_numbers_nl +from ovos_number_parser.numbers_nl import pronounce_number_nl, extract_number_nl, numbers_to_digits_nl from ovos_number_parser.util import is_numeric from ovos_utils.time import now_local @@ -56,7 +56,7 @@ def extract_duration_nl(text): } pattern = r"(?P\d+(?:\.?\d+)?)\s+{unit}" - text = _convert_words_to_numbers_nl(text) + text = numbers_to_digits_nl(text) for unit in time_units: unit_nl_words = nl_translations[unit] diff --git a/ovos_date_parser/dates_pl.py b/ovos_date_parser/dates_pl.py index 1044512..735cf87 100644 --- a/ovos_date_parser/dates_pl.py +++ b/ovos_date_parser/dates_pl.py @@ -2,7 +2,7 @@ from datetime import datetime, timedelta from dateutil.relativedelta import relativedelta -from ovos_number_parser.numbers_pl import pronounce_number_pl, extract_number_pl, _convert_words_to_numbers_pl +from ovos_number_parser.numbers_pl import pronounce_number_pl, extract_number_pl, numbers_to_digits_pl from ovos_number_parser.util import is_numeric from ovos_utils.time import now_local @@ -396,7 +396,7 @@ def extract_duration_pl(text): } pattern = r"(?P\d+(?:\.?\d+)?)(?:\s+|\-){unit}[ayeę]?" - text = _convert_words_to_numbers_pl(text) + text = numbers_to_digits_pl(text) for unit in _TIME_UNITS_CONVERSION: unit_pattern = pattern.format(unit=unit) diff --git a/ovos_date_parser/dates_pt.py b/ovos_date_parser/dates_pt.py index fc6ea62..8a04a65 100644 --- a/ovos_date_parser/dates_pt.py +++ b/ovos_date_parser/dates_pt.py @@ -2,7 +2,7 @@ from datetime import datetime, timedelta from dateutil.relativedelta import relativedelta -from ovos_number_parser.numbers_pt import pronounce_number_pt +from ovos_number_parser.numbers_pt import pronounce_number_pt, numbers_to_digits_pt, _pt_pruning from ovos_utils.time import now_local, DAYS_IN_1_YEAR, DAYS_IN_1_MONTH @@ -988,122 +988,3 @@ def repl_non_std(match): return (duration, text) - -def numbers_to_digits_pt(utterance: str) -> str: - """ - Replace written numbers in text with their digit equivalents. - - Args: - utterance (str): Input string possibly containing written numbers. - - Returns: - str: Text with written numbers replaced by digits. - """ # TODO - standardize in ovos-number-parser - number_replacements = { - "catorze": "14", - "cem": "100", - "cento": "100", - "cinco": "5", - "cinquenta": "50", - "dez": "10", - "dezanove": "19", - "dezasseis": "16", - "dezassete": "17", - "dezoito": "18", - "dois": "2", - "doze": "12", - "duas": "2", - "duzentas": "200", - "duzentos": "200", - "mil": "1000", - "milhão": "1000000", - "nove": "9", - "novecentas": "900", - "novecentos": "900", - "noventa": "90", - "oitenta": "80", - "oito": "8", - "oitocentas": "800", - "oitocentos": "800", - "onze": "11", - "primeiro": "1", - "quarenta": "40", - "quatro": "4", - "quatrocentas": "400", - "quatrocentos": "400", - "quinhentas": "500", - "quinhentos": "500", - "quinze": "15", - "segundo": "2", - "seis": "6", - "seiscentas": "600", - "seiscentos": "600", - "sessenta": "60", - "sete": "7", - "setecentas": "700", - "setecentos": "700", - "setenta": "70", - "terceiro": "3", - "tres": "3", - "treze": "13", - "trezentas": "300", - "trezentos": "300", - "trinta": "30", - "três": "3", - "um": "1", - "uma": "1", - "vinte": "20", - "zero": "0" - } - words = tokenize(utterance) - for idx, word in enumerate(words): - if word in number_replacements: - words[idx] = number_replacements[word] - return " ".join(words) - - -def tokenize(utterance): - # Split things like 12% - utterance = re.sub(r"([0-9]+)([\%])", r"\1 \2", utterance) - # Split things like #1 - utterance = re.sub(r"(\#)([0-9]+\b)", r"\1 \2", utterance) - # Split things like amo-te - utterance = re.sub(r"([a-zA-Z]+)(-)([a-zA-Z]+\b)", r"\1 \2 \3", - utterance) - tokens = utterance.split() - if tokens[-1] == '-': - tokens = tokens[:-1] - - return tokens - - -def _pt_pruning(text, symbols=True, accents=True, agressive=True): - # agressive pt word pruning - words = ["a", "o", "os", "as", "de", "dos", "das", - "lhe", "lhes", "me", "e", "no", "nas", "na", "nos", "em", "para", - "este", - "esta", "deste", "desta", "neste", "nesta", "nesse", - "nessa", "foi", "que"] - if symbols: - symbols = [".", ",", ";", ":", "!", "?", "º", "ª"] - for symbol in symbols: - text = text.replace(symbol, "") - text = text.replace("-", " ").replace("_", " ") - if accents: - accents = {"a": ["á", "à", "ã", "â"], - "e": ["ê", "è", "é"], - "i": ["í", "ì"], - "o": ["ò", "ó"], - "u": ["ú", "ù"], - "c": ["ç"]} - for char in accents: - for acc in accents[char]: - text = text.replace(acc, char) - if agressive: - text_words = text.split(" ") - for idx, word in enumerate(text_words): - if word in words: - text_words[idx] = "" - text = " ".join(text_words) - text = ' '.join(text.split()) - return text diff --git a/ovos_date_parser/dates_ru.py b/ovos_date_parser/dates_ru.py index 96a842c..57bbc6b 100644 --- a/ovos_date_parser/dates_ru.py +++ b/ovos_date_parser/dates_ru.py @@ -3,7 +3,7 @@ from dateutil.relativedelta import relativedelta from ovos_number_parser.numbers_ru import pronounce_number_ru, _ORDINAL_BASE_RU, extract_number_ru, \ - _convert_words_to_numbers_ru + numbers_to_digits_ru from ovos_number_parser.util import is_numeric from ovos_utils.time import now_local @@ -270,7 +270,7 @@ def extract_duration_ru(text): } pattern = r"(?P\d+(?:\.?\d+)?)(?:\s+|\-){unit}(?:а|ов|у|ут|уту)?" - text = _convert_words_to_numbers_ru(text) + text = numbers_to_digits_ru(text) for (unit_ru, unit_en) in _TIME_UNITS_CONVERSION.items(): unit_pattern = pattern.format(unit=unit_ru) diff --git a/ovos_date_parser/dates_uk.py b/ovos_date_parser/dates_uk.py index f092f45..977e5a8 100644 --- a/ovos_date_parser/dates_uk.py +++ b/ovos_date_parser/dates_uk.py @@ -2,7 +2,7 @@ from datetime import datetime, timedelta from dateutil.relativedelta import relativedelta -from ovos_number_parser.numbers_uk import extract_number_uk, _convert_words_to_numbers_uk, _ORDINAL_BASE_UK, pronounce_number_uk, \ +from ovos_number_parser.numbers_uk import extract_number_uk, numbers_to_digits_uk, _ORDINAL_BASE_UK, pronounce_number_uk, \ _NUM_STRING_UK from ovos_number_parser.util import invert_dict, is_numeric from ovos_utils.time import now_local @@ -206,7 +206,7 @@ def extract_duration_uk(text): } pattern = r"(?P\d+(?:\.?\d+)?)(?:\s+|\-){unit}(?:ів|я|и|ин|і|унд|ни|ну|ку|дні|у|днів)?" - text = _convert_words_to_numbers_uk(text) + text = numbers_to_digits_uk(text) for (unit_uk, unit_en) in _TIME_UNITS_CONVERSION.items(): unit_pattern = pattern.format(unit=unit_uk) diff --git a/ovos_date_parser/res/az-az/date_time.json b/ovos_date_parser/res/az-az/date_time.json new file mode 100644 index 0000000..bf0c0e7 --- /dev/null +++ b/ovos_date_parser/res/az-az/date_time.json @@ -0,0 +1,130 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1\\d$", "format": "{xx}"}, + "3": {"match": "^\\d0$", "format": "{x0}"}, + "4": {"match": "^[2-9]\\d$", "format": "{x0} {x}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^1\\d{2}$", "format": "yüz"}, + "2": {"match": "^[2-9]\\d{2}$", "format": "{x_in_x00} yüz"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^10\\d{2}$", "format": "min"}, + "2": {"match": "^[2-9]0\\d{2}$", "format": "{x_in_x000} min"}, + "3": {"match": "^11\\d{2}$", "format": "min yüz"}, + "4": {"match": "^1\\d{3}$", "format": "min {x_in_x00} yüz"}, + "5": {"match": "^[2-9]1\\d{2}$", "format": "{x_in_x000} min yüz"}, + "6": {"match": "^[2-9]\\d{3}$", "format": "{x_in_x000} min {x_in_x00} yüz"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d{1}$", "format": "{bc} {formatted_decade}"}, + "2": {"match": "^\\d{2}$", "format": "{bc} {formatted_decade}"}, + "3": {"match": "^\\d00$", "format": "{bc} {formatted_hundreds}"}, + "4": {"match": "^\\d{3}$", "format": "{bc} {formatted_hundreds} {formatted_decade}"}, + "5": {"match": "^\\d\\d00$", "format": "{bc} {formatted_thousand}"}, + "6": {"match": "^\\d{4}$", "format": "{bc} {formatted_thousand} {formatted_decade}"}, + "default": "{bc} {year}", + "bc": "e.ə." + }, + "date_format": { + "date_full": "{weekday}, {month} {day}, {formatted_year}", + "date_full_no_year": "{weekday}, {month} {day}", + "date_full_no_year_month": "{weekday}, ayın {day}", + "today": "bu gün", + "tomorrow": "sabah", + "yesterday": "dünən" + }, + "date_time_format": { + "date_time": "{formatted_date} {formatted_time}" + }, + "weekday": { + "0": "bazar ertəsi", + "1": "çərşənbə axşamı", + "2": "çərşənbə", + "3": "cümə axşamı", + "4": "cümə", + "5": "şənbə", + "6": "bazar" + }, + "date": { + "1": "biri", + "2": "ikisi", + "3": "üçü", + "4": "dördü", + "5": "beşi", + "6": "altısı", + "7": "yeddisi", + "8": "səkkizi", + "9": "doqquzu", + "10": "onu", + "11": "on biri", + "12": "on ikisi", + "13": "on üçü", + "14": "on dördü", + "15": "on beşi", + "16": "on altısı", + "17": "on yeddisi", + "18": "on səkkizi", + "19": "on doqquzu", + "20": "iyirmisi", + "21": "iyirmi biri", + "22": "iyirmi ikisi", + "23": "iyirmi üçü", + "24": "iyirmi dördü", + "25": "iyirmi beşi", + "26": "iyirmi altısı", + "27": "iyirmi yeddisi", + "28": "iyirmi səkkizi", + "29": "iyirmi doqquzu", + "30": "otuzu", + "31": "otuz biri" + }, + "month": { + "1": "yanvarın", + "2": "fevralın", + "3": "martın", + "4": "aprelin", + "5": "mayın", + "6": "iyunun", + "7": "iyulun", + "8": "avgustun", + "9": "sentyabrın", + "10": "oktyabrın", + "11": "noyabrın", + "12": "dekabrın" + }, + "number": { + "0": "sıfır", + "1": "bir", + "2": "iki", + "3": "üç", + "4": "dörd", + "5": "beş", + "6": "altı", + "7": "yeddi", + "8": "səkkiz", + "9": "doqquz", + "10": "on", + "11": "on bir", + "12": "on iki", + "13": "on üç", + "14": "on dörd", + "15": "on beş", + "16": "on altı", + "17": "on yeddi", + "18": "on səkkiz", + "19": "on doqquz", + "20": "iyirmi", + "30": "otuz", + "40": "qırx", + "50": "əlli", + "60": "altmış", + "70": "yetmiş", + "80": "səksən", + "90": "doxsan" + } +} diff --git a/ovos_date_parser/res/az-az/date_time_test.json b/ovos_date_parser/res/az-az/date_time_test.json new file mode 100644 index 0000000..d43f740 --- /dev/null +++ b/ovos_date_parser/res/az-az/date_time_test.json @@ -0,0 +1,43 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "e.ə. bir" }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "e.ə. on" }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "e.ə. doxsan iki" }, + "4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "səkkiz yüz üç" }, + "5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "səkkiz yüz on bir" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dörd yüz əlli dörd" }, + "7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "min beş" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "min on iki" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "min qırx altı" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "min səkkiz yüz yeddi" }, + "11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "min yeddi yüz on yeddi" }, + "12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "min doqquz yüz səksən səkkiz"}, + "13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "iki min doqquz"}, + "14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "iki min on səkkiz"}, + "15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "iki min iyirmi bir"}, + "16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "iki min otuz"}, + "17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "iki min yüz" }, + "18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "min" }, + "19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "iki min" }, + "20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "e.ə. üç min yüz iyirmi" }, + "21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "e.ə. üç min iki yüz qırx bir" }, + "22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "beş min iki yüz" }, + "23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "min yüz" }, + "24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "iki min yüz" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "çərşənbə axşamı, yanvarın otuz biri, iki min on yeddi"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "bazar, fevralın dördü, iki min on səkkiz"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "bazar, fevralın dördü"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "bazar, ayın dördü"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "sabah"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "bu gün"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "dünən"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "bazar, fevralın dördü"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "bazar, fevralın dördü, iki min on səkkiz"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "çərşənbə axşamı, yanvarın otuz biri, iki min on yeddi gündüz ikiyə iyirmi iki dəqiqə işləyib"}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "çərşənbə axşamı, yanvarın otuz biri, iki min on yeddi on üç iyirmi iki"} + } +} diff --git a/ovos_date_parser/res/ca-es/date_time.json b/ovos_date_parser/res/ca-es/date_time.json new file mode 100644 index 0000000..da75a05 --- /dev/null +++ b/ovos_date_parser/res/ca-es/date_time.json @@ -0,0 +1,130 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1\\d$", "format": "{xx}"}, + "3": {"match": "^\\d0$", "format": "{x0}"}, + "4": {"match": "^2\\d$", "format": "vint-i-{x}"}, + "5": {"match": "^[3-9]\\d$", "format": "{x0}-{x}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^1\\d{2}$", "format": "{x_in_x00}-cent"}, + "2": {"match": "^\\d{3}$", "format": "{x_in_x00}-cents"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^10\\d\\d$", "format": "mil"}, + "2": {"match": "^11\\d\\d$", "format": "mil cent"}, + "3": {"match": "^1[2-9]\\d\\d$", "format": "mil {x_in_x00}-cents"}, + "4": {"match": "^[2-9]0\\d{2}$", "format": "{x_in_x000} mil"}, + "5": {"match": "^[2-9]1\\d{2}$", "format": "{x_in_x000} mil cent"}, + "6": {"match": "^[2-9][2-9]\\d{2}$", "format": "{x_in_x000} mil {x_in_x00}-cents"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"}, + "2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, + "3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, + "4": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"}, + "5": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "default": "{year} {bc}", + "bc": "a.C." + }, + "date_format": { + "date_full": "{weekday}, {day} de {month} de {formatted_year}", + "date_full_no_year": "{weekday}, {day} de {month}", + "date_full_no_year_month": "{weekday}, dia {day}", + "today": "avui", + "tomorrow": "demà", + "yesterday": "ahir" + }, + "date_time_format": { + "date_time": "{formatted_date} a {formatted_time}" + }, + "weekday": { + "0": "dilluns", + "1": "dimarts", + "2": "dimecres", + "3": "dijous", + "4": "divendres", + "5": "dissabte", + "6": "diumenge" + }, + "date": { + "1": "primer", + "2": "dos", + "3": "tres", + "4": "quatre", + "5": "cinc", + "6": "sis", + "7": "set", + "8": "vuit", + "9": "nou", + "10": "deu", + "11": "onze", + "12": "dotze", + "13": "tretze", + "14": "catorze", + "15": "quinze", + "16": "setze", + "17": "disset", + "18": "divuit", + "19": "dinou", + "20": "vint", + "21": "vint-i-u", + "22": "vint-i-dos", + "23": "vint-i-tres", + "24": "vint-i-quatre", + "25": "vint-i-cinc", + "26": "vint-i-sis", + "27": "vint-i-set", + "28": "vint-i-vuit", + "29": "vint-i-nou", + "30": "trenta", + "31": "trenta-u" + }, + "month": { + "1": "gener", + "2": "febrer", + "3": "març", + "4": "abril", + "5": "maig", + "6": "juny", + "7": "juliol", + "8": "agost", + "9": "setembre", + "10": "octubre", + "11": "novembre", + "12": "desembre" + }, + "number": { + "0": "zero", + "1": "u", + "2": "dos", + "3": "tres", + "4": "quatre", + "5": "cinc", + "6": "sis", + "7": "set", + "8": "vuit", + "9": "nou", + "10": "deu", + "11": "onze", + "12": "dotze", + "13": "tretze", + "14": "catorze", + "15": "quinze", + "16": "setze", + "17": "disset", + "18": "divuit", + "19": "dinou", + "20": "vint", + "30": "trenta", + "40": "quaranta", + "50": "cinquanta", + "60": "seixanta", + "70": "setanta", + "80": "vuitanta", + "90": "noranta" + } +} diff --git a/ovos_date_parser/res/ca-es/date_time_test.json b/ovos_date_parser/res/ca-es/date_time_test.json new file mode 100644 index 0000000..ea1087d --- /dev/null +++ b/ovos_date_parser/res/ca-es/date_time_test.json @@ -0,0 +1,43 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "u a.C." }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "deu a.C." }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "noranta-dos a.C." }, + "4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "vuit-cents tres" }, + "5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "vuit-cents onze" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "quatre-cents cinquanta-quatre" }, + "7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil cinc" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil dotze" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil quaranta-sis" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil vuit-cents set" }, + "11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil set-cents disset" }, + "12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil nou-cents vuitanta-vuit"}, + "13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil nou"}, + "14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil divuit"}, + "15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil vint-i-u"}, + "16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil trenta"}, + "17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dos mil cent" }, + "18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mil" }, + "19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dos mil" }, + "20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tres mil cent vint a.C." }, + "21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tres mil dos-cents quaranta-u a.C." }, + "22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "cinc mil dos-cents" }, + "23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mil cent" }, + "24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dos mil cent" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "dimarts, trenta-u de gener de dos mil disset"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "diumenge, quatre de febrer de dos mil divuit"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "diumenge, quatre de febrer"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "diumenge, dia quatre"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "demà"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "avui"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "ahir"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "diumenge, quatre de febrer"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "diumenge, quatre de febrer de dos mil divuit"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "dimarts, trenta-u de gener de dos mil disset a la una i vint-i-dos de la tarda"}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "dimarts, trenta-u de gener de dos mil disset a les tretze i vint-i-dos"} + } +} diff --git a/ovos_date_parser/res/cs-cz/date_time.json b/ovos_date_parser/res/cs-cz/date_time.json new file mode 100644 index 0000000..e4430fd --- /dev/null +++ b/ovos_date_parser/res/cs-cz/date_time.json @@ -0,0 +1,129 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1\\d$", "format": "{xx}"}, + "3": {"match": "^\\d0$", "format": "{x0}"}, + "4": {"match": "^[2-9]\\d$", "format": "{x0} {x}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^\\d{3}$", "format": "{x_in_x00} sto"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^\\d00\\d$", "format": "{x_in_x000} tisíc"}, + "2": {"match": "^1\\d00$", "format": "{xx_in_xx00} sto"}, + "3": {"match": "^\\d{2}00$", "format": "{x0_in_x000} {x_in_x00} sto"}, + "4": {"match": "^(1\\d{3})|(\\d0\\d{2})$", "format": "{xx_in_xx00}"}, + "5": {"match": "^\\d{4}$", "format": "{x0_in_x000} {x_in_x00}"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"}, + "2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, + "3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, + "4": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"}, + "5": {"match": "^\\d00\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "6": {"match": "^\\d{2}0\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "7": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "default": "{year} {bc}", + "bc": "b.c." + }, + "date_format": { + "date_full": "{weekday}, {month} {day}, {formatted_year}", + "date_full_no_year": "{weekday}, {month} {day}", + "date_full_no_year_month": "{weekday}, {day}", + "today": "dnes", + "tomorrow": "zítra", + "yesterday": "včera" + }, + "date_time_format": { + "date_time": "{formatted_date} v {formatted_time}" + }, + "weekday": { + "0": "pondělí", + "1": "úterý", + "2": "středa", + "3": "čtvrtek", + "4": "pátek", + "5": "sobota", + "6": "neděle" + }, + "date": { + "1": "prvního", + "2": "druhého", + "3": "třetího", + "4": "čtvrtého", + "5": "pátého", + "6": "šestého", + "7": "sedmého", + "8": "osmého", + "9": "devátého", + "10": "desátého", + "11": "jedenáctého", + "12": "dvanáctého", + "13": "třináctého", + "14": "čtrnáctého", + "15": "patnáctého", + "16": "šestnáctého", + "17": "sedmnáctého", + "18": "osmnáctého", + "19": "devatenáctého", + "20": "dvacátého", + "21": "dvacátého-prvního", + "22": "dvacátého-druhého", + "23": "dvacátého-třetího", + "24": "dvacátého-čtvrtého", + "25": "dvacátého-pátého", + "26": "dvacátého-šestého", + "27": "dvacátého-sedmého", + "28": "dvacátého-osmého", + "29": "dvacátého-devátého", + "30": "třicátého", + "31": "třicátého-prvního" + }, + "month": { + "1": "leden", + "2": "únor", + "3": "březen", + "4": "duben", + "5": "květen", + "6": "červen", + "7": "červenec", + "8": "srpen", + "9": "září", + "10": "říjen", + "11": "listopad", + "12": "prosinec" + }, + "number": { + "0": "nula", + "1": "jedna", + "2": "dva", + "3": "tři", + "4": "čtyři", + "5": "pět", + "6": "šest", + "7": "sedm", + "8": "osm", + "9": "devět", + "10": "deset", + "11": "jedenáct", + "12": "dvanáct", + "13": "třináct", + "14": "čtrnáct", + "15": "patnáct", + "16": "šestnáct", + "17": "sedmnáct", + "18": "osmnáct", + "19": "devatenáct", + "20": "dvacet", + "30": "třicet", + "40": "čtyřicet", + "50": "padesát", + "60": "šedesát", + "70": "sedmdesát", + "80": "osmdesát", + "90": "devadesát" + } +} diff --git a/ovos_date_parser/res/cs-cz/date_time_test.json b/ovos_date_parser/res/cs-cz/date_time_test.json new file mode 100644 index 0000000..97459e6 --- /dev/null +++ b/ovos_date_parser/res/cs-cz/date_time_test.json @@ -0,0 +1,43 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "jedna b.c." }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "deset b.c." }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "devadesát dva b.c." }, + "4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "osm sto tři" }, + "5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "osm sto jedenáct" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "čtyři sto padesát čtyři" }, + "7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "jedna tisíc pět" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "deset dvanáct" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "deset čtyřicet šest" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "osmnáct sedm" }, + "11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "sedmnáct sedmnáct" }, + "12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "devatenáct osmdesát osm"}, + "13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dva tisíc devět"}, + "14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dvacet osmnáct"}, + "15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dvacet dvacet jedna"}, + "16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dvacet třicet"}, + "17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dvacet jedna sto" }, + "18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "jedna tisíc" }, + "19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dva tisíc" }, + "20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "třicet jedna dvacet b.c." }, + "21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "třicet dva čtyřicet jedna b.c." }, + "22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "padesát dva sto" }, + "23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "jedenáct sto" }, + "24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dvacet jedna sto" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "úterý, leden třicátého-prvního, dvacet sedmnáct"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "neděle, únor čtvrtého, dvacet osmnáct"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "neděle, únor čtvrtého"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "neděle, čtvrtého"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "zítra"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "dnes"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "včera"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "neděle, únor čtvrtého"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "neděle, únor čtvrtého, dvacet osmnáct"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "úterý, leden třicátého-prvního, dvacet sedmnáct v jedna dvacet dva p.m."}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "úterý, leden třicátého-prvního, dvacet sedmnáct v třináct dvacet dva"} + } +} diff --git a/ovos_date_parser/res/da-dk/date_time.json b/ovos_date_parser/res/da-dk/date_time.json new file mode 100644 index 0000000..390f620 --- /dev/null +++ b/ovos_date_parser/res/da-dk/date_time.json @@ -0,0 +1,132 @@ +{ + "decade_format": { + "1": {"match": "^1$", "format": "et"}, + "2": {"match": "^\\d$", "format": "{x}"}, + "3": {"match": "^1\\d$", "format": "{xx}"}, + "4": {"match": "^\\d0$", "format": "{x0}"}, + "5": {"match": "^[2-9]\\d$", "format": "{x} og {x0}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^1\\d{2}$", "format": "et hundred"}, + "2": {"match": "^\\d{3}$", "format": "{x_in_x00} hundred"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^1[1-9]\\d{2}$", "format": "{xx_in_xx00} hundred"}, + "2": {"match": "^1\\d{3}$", "format": "et tusind"}, + "3": {"match": "^\\d{4}$", "format": "{x_in_x000} tusind"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"}, + "2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, + "3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} og {formatted_decade} {bc}"}, + "4": {"match": "^(1\\d00)|([2-9]000)$", "format": "{formatted_thousand} {bc}"}, + "5": {"match": "^(1\\d{3})|(\\d0\\d{2})$", "format": "{formatted_thousand} og {formatted_decade} {bc}"}, + "6": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_hundreds} og {formatted_decade} {bc}"}, + "default": "{year} {bc}", + "bc": "f.kr." + }, + "date_format": { + "date_full": "{weekday}, den {day} {month}, {formatted_year}", + "date_full_no_year": "{weekday}, den {day} {month}", + "date_full_no_year_month": "{weekday}, den {day}", + "today": "i dag", + "tomorrow": "i morgen", + "yesterday": "i går" + }, + "date_time_format": { + "date_time": "{formatted_date} klokken {formatted_time}" + }, + "weekday": { + "0": "mandag", + "1": "tirsdag", + "2": "onsdag", + "3": "torsdag", + "4": "fredag", + "5": "lørdag", + "6": "søndag" + }, + "date": { + "1": "første", + "2": "anden", + "3": "tredie", + "4": "fjerde", + "5": "femte", + "6": "sjette", + "7": "syvende", + "8": "ottende", + "9": "ninende", + "10": "tiende", + "11": "elvte", + "12": "tolvte", + "13": "trettende", + "14": "fjortende", + "15": "femtende", + "16": "sekstende", + "17": "syttende", + "18": "attende", + "19": "nittende", + "20": "tyvende", + "21": "en og tyvende", + "22": "to og tyvende", + "23": "tre og tyvende", + "24": "fire og tyvende", + "25": "fem og tyvende", + "26": "seks og tyvende", + "27": "syv og tyvende", + "28": "otte og tyvende", + "29": "ni og tyvende", + "30": "tredivte", + "31": "en og tredivte" + }, + "month": { + "1": "januar", + "2": "februar", + "3": "marts", + "4": "april", + "5": "maj", + "6": "juni", + "7": "juli", + "8": "august", + "9": "september", + "10": "oktober", + "11": "november", + "12": "december" + }, + "number": { + "0": "nul", + "1": "en", + "2": "to", + "3": "tre", + "4": "fire", + "5": "fem", + "6": "seks", + "7": "syv", + "8": "otte", + "9": "ni", + "10": "ti", + "11": "elve", + "12": "tolv", + "13": "tretten", + "14": "fjorten", + "15": "femten", + "16": "seksten", + "17": "sytten", + "18": "atten", + "19": "nitten", + "20": "tyve", + "30": "tredive", + "40": "fyrre", + "50": "halvtreds", + "60": "treds", + "70": "halvfjerds", + "80": "firs", + "90": "halvfems", + "100": "hundrede", + "1000": "tusind", + "2000": "to tusind" + } + +} diff --git a/ovos_date_parser/res/da-dk/date_time_test.json b/ovos_date_parser/res/da-dk/date_time_test.json new file mode 100644 index 0000000..7705399 --- /dev/null +++ b/ovos_date_parser/res/da-dk/date_time_test.json @@ -0,0 +1,32 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "to tusind og sytten"}, + "2": {"datetime_param": "1984, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "nitten hundred og fire og firs"}, + "3": {"datetime_param": "1906, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "nitten hundred og seks"}, + "4": {"datetime_param": "1802, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "atten hundred og to" }, + "5": {"datetime_param": "806, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "otte hundred og seks" }, + "6": {"datetime_param": "1800, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "atten hundred" }, + "7": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "et" }, + "8": {"datetime_param": "103, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "et hundred og tre" }, + "9": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "et tusind" }, + "10": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "to tusind" }, + "11": {"datetime_param": "99, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "ni og halvfems f.kr." }, + "12": {"datetime_param": "5, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "fem f.kr." }, + "13": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tre tusind et hundred og tyve f.kr." }, + "14": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tre tusind to hundred og en og fyrre f.kr." } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "tirsdag, den en og tredivte januar, to tusind og sytten"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "søndag, den fjerde februar, to tusind og atten"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "søndag, den fjerde februar"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "søndag, den fjerde"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "i morgen"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "i dag"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "i går"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "søndag, den fjerde februar"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "søndag, den fjerde februar, to tusind og atten"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "tirsdag, den en og tredivte januar, to tusind og sytten klokken et toogtyve om eftermiddagen"} + } +} \ No newline at end of file diff --git a/ovos_date_parser/res/de-de/date_time.json b/ovos_date_parser/res/de-de/date_time.json new file mode 100644 index 0000000..8a5aace --- /dev/null +++ b/ovos_date_parser/res/de-de/date_time.json @@ -0,0 +1,136 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1\\d$", "format": "{xx}"}, + "3": {"match": "^\\d0$", "format": "{x0}"}, + "4": {"match": "^[2-9]\\d$", "format": "{x} und {x0}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^1\\d{2}$", "format": "hundert"}, + "2": {"match": "^\\d{3}$", "format": "{x_in_x00} hundert"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^10\\d\\d$", "format": "tausend"}, + "2": {"match": "^\\d0\\d{2}$", "format": "{x_in_x000} tausend"}, + "3": {"match": "^1\\d00$", "format": "{xx_in_xx00} hundert"}, + "4": {"match": "^\\d{2}00$", "format": "{x_in_x000} tausend {x_in_x00} hundert"}, + "5": {"match": "^\\d0\\d\\d$", "format": "{x_in_x000} tausend"}, + "6": {"match": "^1\\d{3}$", "format": "{xx_in_xx00}"}, + "7": {"match": "^\\d{4}$", "format": "{x_in_x000} tausend {x_in_x00} hundert"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^1$", "format": "eins {bc}"}, + "2": {"match": "^\\d{1}?$", "format": "{formatted_decade} {bc}"}, + "3": {"match": "^\\d{2}?$", "format": "{formatted_decade} {bc}"}, + "4": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, + "5": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, + "6": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"}, + "7": {"match": "^\\d00\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "8": {"match": "^\\d{2}0\\d$", "format": "{formatted_thousand} hundert {formatted_decade} {bc}"}, + "9": {"match": "^1[2-9]\\d{2}$", "format": "{formatted_thousand} hundert {formatted_decade} {bc}"}, + "10": {"match": "^1\\d{3}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "11": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "default": "{year} {bc}", + "bc": "v.d.Z." + }, + "date_format": { + "date_full": "{weekday}, {day} {month}, {formatted_year}", + "date_full_no_year": "{weekday}, {day} {month}", + "date_full_no_year_month": "{weekday}, {day}", + "today": "heute", + "tomorrow": "morgen", + "yesterday": "gestern" + }, + "date_time_format": { + "date_time": "{formatted_date} um {formatted_time}" + }, + "weekday": { + "0": "Montag", + "1": "Dienstag", + "2": "Mittwoch", + "3": "Donnerstag", + "4": "Freitag", + "5": "Samstag", + "6": "Sonntag" + }, + "date": { + "1": "erster", + "2": "zweiter", + "3": "dritter", + "4": "vierter", + "5": "fünfter", + "6": "sechster", + "7": "siebter", + "8": "achter", + "9": "neunter", + "10": "zehnter", + "11": "elfter", + "12": "zwölfter", + "13": "dreizehnter", + "14": "vierzehnter", + "15": "fünfzehnter", + "16": "sechzehnter", + "17": "siebzehnter", + "18": "achtzehnter", + "19": "neunzehnter", + "20": "zwanzigster", + "21": "einundzwanzigster", + "22": "zweiundzwanzigster", + "23": "dreiundzwanzigster", + "24": "vierundzwanzigster", + "25": "fünfundzwanzigster", + "26": "sechsundzwanzigster", + "27": "siebenundzwanzigster", + "28": "achtundzwanzigster", + "29": "neunundzwanzigster", + "30": "dreißigster", + "31": "einunddreißigster" + }, + "month": { + "1": "Januar", + "2": "Februar", + "3": "März", + "4": "April", + "5": "Mai", + "6": "Juni", + "7": "Juli", + "8": "August", + "9": "September", + "10": "Oktober", + "11": "November", + "12": "Dezember" + }, + "number": { + "0": "null", + "1": "ein", + "2": "zwei", + "3": "drei", + "4": "vier", + "5": "fünf", + "6": "sechs", + "7": "sieben", + "8": "acht", + "9": "neun", + "10": "zehn", + "11": "elf", + "12": "zwölf", + "13": "dreizehn", + "14": "vierzehn", + "15": "fünfzehn", + "16": "sechzehn", + "17": "siebzehn", + "18": "achtzehn", + "19": "neunzehn", + "20": "zwanzig", + "30": "dreißig", + "40": "vierzig", + "50": "fünfzig", + "60": "sechzig", + "70": "siebzig", + "80": "achtzig", + "90": "neunzig" + } +} diff --git a/ovos_date_parser/res/de-de/date_time_test.json b/ovos_date_parser/res/de-de/date_time_test.json new file mode 100644 index 0000000..fe33bcd --- /dev/null +++ b/ovos_date_parser/res/de-de/date_time_test.json @@ -0,0 +1,43 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "eins v.d.Z." }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "zehn v.d.Z." }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "zwei und neunzig v.d.Z." }, + "4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "acht hundert drei" }, + "5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "acht hundert elf" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "vier hundert vier und fünfzig" }, + "7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tausend fünf" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tausend zwölf" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tausend sechs und vierzig" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "achtzehn hundert sieben" }, + "11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "siebzehn hundert siebzehn" }, + "12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "neunzehn hundert acht und achtzig"}, + "13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend neun"}, + "14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend achtzehn"}, + "15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend ein und zwanzig"}, + "16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend dreißig"}, + "17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "zwei tausend ein hundert" }, + "18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "tausend" }, + "19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zwei tausend" }, + "20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "drei tausend ein hundert zwanzig v.d.Z." }, + "21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "drei tausend zwei hundert ein und vierzig v.d.Z." }, + "22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "fünf tausend zwei hundert" }, + "23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "elf hundert" }, + "24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "zwei tausend ein hundert" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "Dienstag, einunddreißigster Januar, zwei tausend siebzehn"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar, zwei tausend achtzehn"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "Sonntag, vierter"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "morgen"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "heute"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "gestern"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "Sonntag, vierter Februar, zwei tausend achtzehn"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "Dienstag, einunddreißigster Januar, zwei tausend siebzehn um ein uhr zweiundzwanzig nachmittags"}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "Dienstag, einunddreißigster Januar, zwei tausend siebzehn um dreizehn uhr zweiundzwanzig"} + } +} diff --git a/ovos_date_parser/res/en-us/date_time.json b/ovos_date_parser/res/en-us/date_time.json new file mode 100644 index 0000000..c9ca605 --- /dev/null +++ b/ovos_date_parser/res/en-us/date_time.json @@ -0,0 +1,129 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1\\d$", "format": "{xx}"}, + "3": {"match": "^\\d0$", "format": "{x0}"}, + "4": {"match": "^[2-9]\\d$", "format": "{x0} {x}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^\\d{3}$", "format": "{x_in_x00} hundred"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^\\d00\\d$", "format": "{x_in_x000} thousand"}, + "2": {"match": "^1\\d00$", "format": "{xx_in_xx00} hundred"}, + "3": {"match": "^\\d{2}00$", "format": "{x0_in_x000} {x_in_x00} hundred"}, + "4": {"match": "^(1\\d{3})|(\\d0\\d{2})$", "format": "{xx_in_xx00}"}, + "5": {"match": "^\\d{4}$", "format": "{x0_in_x000} {x_in_x00}"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"}, + "2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, + "3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, + "4": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"}, + "5": {"match": "^\\d00\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "6": {"match": "^\\d{2}0\\d$", "format": "{formatted_thousand} oh {formatted_decade} {bc}"}, + "7": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "default": "{year} {bc}", + "bc": "b.c." + }, + "date_format": { + "date_full": "{weekday}, {month} {day}, {formatted_year}", + "date_full_no_year": "{weekday}, {month} {day}", + "date_full_no_year_month": "{weekday}, the {day}", + "today": "today", + "tomorrow": "tomorrow", + "yesterday": "yesterday" + }, + "date_time_format": { + "date_time": "{formatted_date} at {formatted_time}" + }, + "weekday": { + "0": "monday", + "1": "tuesday", + "2": "wednesday", + "3": "thursday", + "4": "friday", + "5": "saturday", + "6": "sunday" + }, + "date": { + "1": "first", + "2": "second", + "3": "third", + "4": "fourth", + "5": "fifth", + "6": "sixth", + "7": "seventh", + "8": "eighth", + "9": "ninth", + "10": "tenth", + "11": "eleventh", + "12": "twelfth", + "13": "thirteenth", + "14": "fourteenth", + "15": "fifteenth", + "16": "sixteenth", + "17": "seventeenth", + "18": "eighteenth", + "19": "nineteenth", + "20": "twentieth", + "21": "twenty-first", + "22": "twenty-second", + "23": "twenty-third", + "24": "twenty-fourth", + "25": "twenty-fifth", + "26": "twenty-sixth", + "27": "twenty-seventh", + "28": "twenty-eighth", + "29": "twenty-ninth", + "30": "thirtieth", + "31": "thirty-first" + }, + "month": { + "1": "january", + "2": "february", + "3": "march", + "4": "april", + "5": "may", + "6": "june", + "7": "july", + "8": "august", + "9": "september", + "10": "october", + "11": "november", + "12": "december" + }, + "number": { + "0": "zero", + "1": "one", + "2": "two", + "3": "three", + "4": "four", + "5": "five", + "6": "six", + "7": "seven", + "8": "eight", + "9": "nine", + "10": "ten", + "11": "eleven", + "12": "twelve", + "13": "thirteen", + "14": "fourteen", + "15": "fifteen", + "16": "sixteen", + "17": "seventeen", + "18": "eighteen", + "19": "nineteen", + "20": "twenty", + "30": "thirty", + "40": "forty", + "50": "fifty", + "60": "sixty", + "70": "seventy", + "80": "eighty", + "90": "ninety" + } +} diff --git a/ovos_date_parser/res/en-us/date_time_test.json b/ovos_date_parser/res/en-us/date_time_test.json new file mode 100644 index 0000000..ffde77b --- /dev/null +++ b/ovos_date_parser/res/en-us/date_time_test.json @@ -0,0 +1,43 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "one b.c." }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "ten b.c." }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "ninety two b.c." }, + "4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "eight hundred three" }, + "5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "eight hundred eleven" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "four hundred fifty four" }, + "7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "one thousand five" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "ten twelve" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "ten forty six" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "eighteen oh seven" }, + "11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "seventeen seventeen" }, + "12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "nineteen eighty eight"}, + "13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "two thousand nine"}, + "14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "twenty eighteen"}, + "15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "twenty twenty one"}, + "16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "twenty thirty"}, + "17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "twenty one hundred" }, + "18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "one thousand" }, + "19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "two thousand" }, + "20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "thirty one twenty b.c." }, + "21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "thirty two forty one b.c." }, + "22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "fifty two hundred" }, + "23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "eleven hundred" }, + "24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "twenty one hundred" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "tuesday, january thirty-first, twenty seventeen"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "sunday, february fourth, twenty eighteen"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "sunday, february fourth"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "sunday, the fourth"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "tomorrow"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "today"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "yesterday"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "sunday, february fourth"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "sunday, february fourth, twenty eighteen"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "tuesday, january thirty-first, twenty seventeen at one twenty two p.m."}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "tuesday, january thirty-first, twenty seventeen at thirteen twenty two"} + } +} diff --git a/ovos_date_parser/res/eu-eu/date_time.json b/ovos_date_parser/res/eu-eu/date_time.json new file mode 100644 index 0000000..69f176b --- /dev/null +++ b/ovos_date_parser/res/eu-eu/date_time.json @@ -0,0 +1,112 @@ +{ + "decade_format": { + "default": "{number}" + }, + "hundreds_format": { + "default": "{number}" + }, + "thousand_format": { + "default": "{number}" + }, + "year_format": { + "default": "{year} {bc}", + "bc": "k.a." + }, + "date_format": { + "date_full": "{weekday}, {formatted_year}ko {month}ren {day}", + "date_full_no_year": "{weekday}, {month}ren {day}", + "date_full_no_year_month": "{weekday}, {day}", + "today": "gaur", + "tomorrow": "bihar", + "yesterday": "atzo" + }, + "date_time_format": { + "date_time": "{formatted_date} {formatted_time}tan" + }, + "weekday": { + "0": "astelehena", + "1": "asteartea", + "2": "asteazkena", + "3": "osteguna", + "4": "ostirala", + "5": "larunbata", + "6": "igandea" + }, + "date": { + "1": "bat", + "2": "bi", + "3": "hiru", + "4": "lau", + "5": "bost", + "6": "sei", + "7": "zazpi", + "8": "zortzi", + "9": "bederatzi", + "10": "hamar", + "11": "hamaika", + "12": "hamabi", + "13": "hamahiru", + "14": "hamalau", + "15": "hamabost", + "16": "hamasei", + "17": "hamazazpi", + "18": "hemezortzi", + "19": "hemeretzi", + "20": "hogei", + "21": "hogeita bat", + "22": "hogeita bi", + "23": "hogeita hiru", + "24": "hogeita lau", + "25": "hogeita bost", + "26": "hogeita sei", + "27": "hogeita zazpi", + "28": "hogeita zorti", + "29": "hogeita bederatzi", + "30": "hogeita hamar", + "31": "hogeita hamaika" + }, + "month": { + "1": "urtarrila", + "2": "otsaila", + "3": "martxoa", + "4": "apirila", + "5": "maiatza", + "6": "ekaina", + "7": "uztaila", + "8": "abuztua", + "9": "iraula", + "10": "urria", + "11": "azaroa", + "12": "abendua" + }, + "number": { + "0": "zero", + "1": "bat", + "2": "bi", + "3": "hiru", + "4": "lau", + "5": "bost", + "6": "sei", + "7": "zazpi", + "8": "zortzi", + "9": "bederatzi", + "10": "hamar", + "11": "hamaika", + "12": "hamabi", + "13": "hamahiru", + "14": "hamalau", + "15": "hamabost", + "16": "hamasei", + "17": "hamazazpi", + "18": "hemezortzi", + "19": "hemeretzi", + "20": "hogei", + "30": "hogeita hamar", + "40": "berrogei", + "50": "berrogeita hamar", + "60": "hirurogei", + "70": "hirurogeita hamar", + "80": "laurogei", + "90": "laurogeita hamar" + } +} diff --git a/ovos_date_parser/res/fa-ir/date_time.json b/ovos_date_parser/res/fa-ir/date_time.json new file mode 100644 index 0000000..1a43989 --- /dev/null +++ b/ovos_date_parser/res/fa-ir/date_time.json @@ -0,0 +1,180 @@ +{ + "decade_format": { + "1": { + "match": "^\\d$", + "format": "{x}" + }, + "2": { + "match": "^1\\d$", + "format": "{xx}" + }, + "3": { + "match": "^\\d0$", + "format": "{x0}" + }, + "4": { + "match": "^[2-9]\\d$", + "format": "{x0} {x}" + }, + "default": "{number}" + }, + "hundreds_format": { + "1": { + "match": "^\\d{3}$", + "format": "{x_in_x00} hundred" + }, + "default": "{number}" + }, + "thousand_format": { + "1": { + "match": "^\\d00\\d$", + "format": "{x_in_x000} thousand" + }, + "2": { + "match": "^1\\d00$", + "format": "{xx_in_xx00} hundred" + }, + "3": { + "match": "^\\d{2}00$", + "format": "{x0_in_x000} {x_in_x00} hundred" + }, + "4": { + "match": "^(1\\d{3})|(\\d0\\d{2})$", + "format": "{xx_in_xx00}" + }, + "5": { + "match": "^\\d{4}$", + "format": "{x0_in_x000} {x_in_x00}" + }, + "default": "{number}" + }, + "year_format": { + "1": { + "match": "^\\d\\d?$", + "format": "{formatted_decade} {bc}" + }, + "2": { + "match": "^\\d00$", + "format": "{formatted_hundreds} {bc}" + }, + "3": { + "match": "^\\d{3}$", + "format": "{formatted_hundreds} {formatted_decade} {bc}" + }, + "4": { + "match": "^\\d{2}00$", + "format": "{formatted_thousand} {bc}" + }, + "5": { + "match": "^\\d00\\d$", + "format": "{formatted_thousand} {formatted_decade} {bc}" + }, + "6": { + "match": "^\\d{2}0\\d$", + "format": "{formatted_thousand} {formatted_decade} {bc}" + }, + "7": { + "match": "^\\d{4}$", + "format": "{formatted_thousand} {formatted_decade} {bc}" + }, + "default": "{year} {bc}", + "bc": "بعد از میلاد" + }, + "date_format": { + "date_full": "{weekday}, {day} {month} {formatted_year}", + "date_full_no_year": "{weekday}, {day} {month}", + "date_full_no_year_month": "{weekday}, {day}", + "today": "امروز", + "tomorrow": "فردا", + "yesterday": "دیروز" + }, + "date_time_format": { + "date_time": "{formatted_date} ساعت {formatted_time}" + }, + "weekday": { + "0": "دوشنبه", + "1": "سه شنبه", + "2": "چهارشنبه", + "3": "پنج شنبه", + "4": "جمعه", + "5": "شنبه", + "6": "یکشنبه" + }, + "date": { + "1": "یکم", + "2": "دوم", + "3": "سوم", + "4": "چهارم", + "5": "پنجم", + "6": "ششم", + "7": "هفتم", + "8": "هشتم", + "9": "نهم", + "10": "دهم", + "11": "یازدهم", + "12": "دوازدهم", + "13": "سیزدهم", + "14": "چهاردهم", + "15": "پونزدهم", + "16": "شونزدهم", + "17": "هیفدهم", + "18": "هیجدهم", + "19": "نوزدهم", + "20": "بیستم", + "21": "بیست و یکم", + "22": "بیست و دوم", + "23": "بیست و سوم", + "24": "بیست و چهارم", + "25": "بیست و پنجم", + "26": "بیست و ششم", + "27": "بیست و هفتم", + "28": "بیست و هشتم", + "29": "بیست و نهم", + "30": "سیم", + "31": "سی و یکم" + }, + "month": { + "1": "ژانویه", + "2": "فوریه", + "3": "مارس", + "4": "آوریل", + "5": "مه", + "6": "جون", + "7": "جولای", + "8": "آگوست", + "9": "سپتامبر", + "10": "اکتبر", + "11": "نوامبر", + "12": "دسامبر" + }, + "number": { + "0": "صفر", + "1": "یک", + "2": "دو", + "3": "سه", + "4": "چهار", + "5": "پنج", + "6": "شش", + "7": "هفت", + "8": "هشت", + "9": "نه", + "10": "ده", + "11": "یازده", + "12": "دوازده", + "13": "سیزده", + "14": "چهارده", + "15": "پونزده", + "16": "شونزده", + "17": "هیفده", + "18": "هیجده", + "19": "نوزده", + "20": "بیست", + "30": "سی", + "40": "چهل", + "50": "پنجاه", + "60": "شصت", + "70": "هفتاد", + "80": "هشتاد", + "90": "نود" + } +} diff --git a/ovos_date_parser/res/fa-ir/date_time_test.json b/ovos_date_parser/res/fa-ir/date_time_test.json new file mode 100644 index 0000000..72321e3 --- /dev/null +++ b/ovos_date_parser/res/fa-ir/date_time_test.json @@ -0,0 +1,36 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "یک بعد از میلاد" }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "ده بعد از میلاد" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "ده دوازده" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "ده چهل و شش" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "هیجده صفر هفت" }, + "11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "هیفده هیفده" }, + "12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "نوزده هشتاد و هشت"}, + "13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "دو هزار و نه"}, + "14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "بیست هیجده"}, + "15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "بیست بیست و یک"}, + "16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "بیست سی"}, + "17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "دو هزار و صد" }, + "18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "هزار" }, + "19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "دو هزار" }, + "20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "سی و یک بیست بعد از میلاد" }, + "21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "سی و دو چهل و یک بعد از میلاد" }, + "22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "پنجاه و دو هزار" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "سه شنبه, سی و یکم ژانویه بیست هیفده"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "یکشنبه, چهارم فوریه بیست هیجده"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "یکشنبه, چهارم فوریه"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "یکشنبه, چهارم"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "فردا"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "امروز"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "دیروز"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "یکشنبه, چهارم فوریه"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "یکشنبه, چهارم فوریه بیست هیجده"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "سه شنبه, سی و یکم ژانویه بیست هیفده ساعت یک و بیست و دو دقیقه بعد از ظهر"}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "سه شنبه, سی و یکم ژانویه بیست هیفده ساعت سیزده و بیست و دو دقیقه"} + } +} diff --git a/ovos_date_parser/res/fr-fr/date_time.json b/ovos_date_parser/res/fr-fr/date_time.json new file mode 100644 index 0000000..4344920 --- /dev/null +++ b/ovos_date_parser/res/fr-fr/date_time.json @@ -0,0 +1,147 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^\\d0$", "format": "{x0}"}, + "3": {"match": "^[2-6]1$", "format": "{x0}-et-un"}, + "4": {"match": "^[2-6|8]\\d$", "format": "{x0}-{x}"}, + "5": {"match": "^\\d{2}$", "format": "{xx}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^\\d{1,2}$", "format": "{formatted_decade}"}, + "2": {"match": "^100$", "format": "cent"}, + "3": {"match": "^\\d00$", "format": "{x_in_x00}-cents"}, + "4": {"match": "^1\\d{2}$", "format": "cent-{formatted_decade}"}, + "5": {"match": "^\\d{3}$", "format": "{x_in_x00}-cent-{formatted_decade}"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^1000$", "format": "mille"}, + "2": {"match": "^\\d000$", "format": "{x_in_x000}-mille"}, + "3": {"match": "^1\\d{3}$", "format": "mille-{formatted_hundreds}"}, + "4": {"match": "^\\d{4}$", "format": "{x_in_x000}-mille-{formatted_hundreds}"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"}, + "2": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {bc}"}, + "3": {"match": "^[1-9]\\d{3}$", "format": "{formatted_thousand} {bc}"}, + "default": "{year} {bc}", + "bc": "avant Jésus Christ " + }, + "date_format": { + "date_full": "{weekday} {day} {month} {formatted_year}", + "date_full_no_year": "{weekday} {day} {month}", + "date_full_no_year_month": "{weekday} {day}", + "today": "aujourd'hui", + "tomorrow": "demain", + "yesterday": "hier" + }, + "date_time_format": { + "date_time": "{formatted_date} {formatted_time}" + }, + "weekday": { + "0": "lundi", + "1": "mardi", + "2": "mercredi", + "3": "jeudi", + "4": "vendredi", + "5": "samedi", + "6": "dimanche" + }, + "date": { + "1": "premier", + "2": "deux", + "3": "trois", + "4": "quatre", + "5": "cinq", + "6": "six", + "7": "sept", + "8": "huit", + "9": "neuf", + "10": "dix", + "11": "onze", + "12": "douze", + "13": "treize", + "14": "quatorze", + "15": "quinze", + "16": "seize", + "17": "dix-sept", + "18": "dix-huit", + "19": "dix-neuf", + "20": "vingt", + "21": "vingt-et-un", + "22": "vingt-deux", + "23": "vingt-trois", + "24": "vingt-quatre", + "25": "vingt-cinq", + "26": "vingt-six", + "27": "vingt-sept", + "28": "vingt-huit", + "29": "vingt-neuf", + "30": "trente", + "31": "trente-et-un" + }, + "month": { + "1": "janvier", + "2": "février", + "3": "mars", + "4": "avril", + "5": "mai", + "6": "juin", + "7": "juillet", + "8": "août", + "9": "septembre", + "10": "octobe", + "11": "novembre", + "12": "décembre" + }, + "number": { + "0": "zéro", + "1": "un", + "2": "deux", + "3": "trois", + "4": "quatre", + "5": "cinq", + "6": "six", + "7": "sept", + "8": "huit", + "9": "neuf", + "10": "dix", + "11": "onze", + "12": "douze", + "13": "treize", + "14": "quatorze", + "15": "quinze", + "16": "seize", + "17": "dix-sept", + "18": "dix-huit", + "19": "dix-neuf", + "20": "vingt", + "30": "trente", + "40": "quarante", + "50": "cinquante", + "60": "soixante", + "70": "soixante-dix", + "71": "soixante-et-onze", + "72": "soixante-douze", + "73": "soixante-treize", + "74": "soixante-quatorze", + "75": "soixante-quinze", + "76": "soixante-seize", + "77": "soixante-dix-sept", + "78": "soixante-dix-huit", + "79": "soixante-dix-neuf", + "80": "quatre-vingt", + "90": "quatre-vingt-dix", + "91": "quatre-vingt-onze", + "92": "quatre-vingt-douze", + "93": "quatre-vingt-treize", + "94": "quatre-vingt-quatorze", + "95": "quatre-vingt-quinze", + "96": "quatre-vingt-seize", + "97": "quatre-vingt-dix-sept", + "98": "quatre-vingt-dix-huit", + "99": "quatre-vingt-dix-neuf" + } +} diff --git a/ovos_date_parser/res/fr-fr/date_time_test.json b/ovos_date_parser/res/fr-fr/date_time_test.json new file mode 100644 index 0000000..448f219 --- /dev/null +++ b/ovos_date_parser/res/fr-fr/date_time_test.json @@ -0,0 +1,43 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "un avant Jésus Christ" }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "dix avant Jésus Christ" }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "quatre-vingt-douze avant Jésus Christ" }, + "4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "huit-cent-trois" }, + "5": {"datetime_param": "111, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "cent-onze" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "quatre-cent-cinquante-quatre" }, + "7": {"datetime_param": "2005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "deux-mille-cinq" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mille-douze" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mille-quarante-six" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mille-huit-cent-sept" }, + "11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mille-sept-cent-dix-sept" }, + "12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mille-neuf-cent-quatre-vingt-huit"}, + "13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "deux-mille-neuf"}, + "14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "deux-mille-dix-huit"}, + "15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "deux-mille-vingt-et-un"}, + "16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "deux-mille-trente"}, + "17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "deux-mille-cent" }, + "18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mille" }, + "19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "deux-mille" }, + "20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "trois-mille-cent-vingt avant Jésus Christ" }, + "21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "trois-mille-deux-cent-quarante-et-un avant Jésus Christ" }, + "22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "cinq-mille-deux-cents" }, + "23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mille-cent" }, + "24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "deux-mille-cent" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "mardi trente-et-un janvier deux-mille-dix-sept"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "dimanche quatre février deux-mille-dix-huit"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "dimanche quatre février"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "dimanche quatre"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "demain"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "aujourd'hui"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "hier"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "dimanche quatre février"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "dimanche quatre février deux-mille-dix-huit"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "mardi trente-et-un janvier deux-mille-dix-sept une heure vingt-deux de l'après-midi"}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "mardi trente-et-un janvier deux-mille-dix-sept treize heures vingt-deux"} + } +} diff --git a/ovos_date_parser/res/hu-hu/date_time.json b/ovos_date_parser/res/hu-hu/date_time.json new file mode 100644 index 0000000..9dcea53 --- /dev/null +++ b/ovos_date_parser/res/hu-hu/date_time.json @@ -0,0 +1,132 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1\\d$", "format": "{xx}"}, + "3": {"match": "^20$", "format": "húsz"}, + "4": {"match": "^\\d0$", "format": "{x0}"}, + "5": {"match": "^[2-9]\\d$", "format": "{x0}{x}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^2\\d{2}$", "format": "kétszáz"}, + "2": {"match": "^\\d{3}$", "format": "{x_in_x00}száz"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^1\\d{3}$", "format": "ezer"}, + "2": {"match": "^2\\d{3}$", "format": "kétezer"}, + "3": {"match": "^\\d{4}$", "format": "{x_in_x000}ezer"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d\\d?$", "format": "{bc} {formatted_decade}"}, + "2": {"match": "^\\d000$", "format": "{bc} {formatted_thousand}"}, + "3": {"match": "^\\d{3}$", "format": "{bc} {formatted_hundreds}{formatted_decade}"}, + "4": {"match": "^[2-9]\\d00$", "format": "{bc} {formatted_thousand}-{formatted_hundreds}"}, + "5": {"match": "^1[0-9]00$", "format": "{bc} {formatted_thousand}{formatted_hundreds}"}, + "6": {"match": "^[2-9]0\\d{2}$", "format": "{bc} {formatted_thousand}-{formatted_decade}"}, + "7": {"match": "^10\\d{2}$", "format": "{bc} {formatted_thousand}{formatted_decade}"}, + "8": {"match": "^[2-9]00\\d$", "format": "{bc} {formatted_thousand}{formatted_decade}"}, + "9": {"match": "^1\\d{3}$", "format": "{bc} {formatted_thousand}{formatted_hundreds}{formatted_decade}"}, + "10": {"match": "^[2-9]\\d{3}$", "format": "{bc} {formatted_thousand}-{formatted_hundreds}{formatted_decade}"}, + "default": "{bc} {year}", + "bc": "kr.e." + }, + "date_format": { + "date_full": "{formatted_year} {month} {day}, {weekday}", + "date_full_no_year": "{month} {day}, {weekday}", + "date_full_no_year_month": "{day}, {weekday}", + "today": "ma", + "tomorrow": "holnap", + "yesterday": "tegnap" + }, + "date_time_format": { + "date_time": "{formatted_date}, {formatted_time}" + }, + "weekday": { + "0": "hétfő", + "1": "kedd", + "2": "szerda", + "3": "csütörtök", + "4": "péntek", + "5": "szombat", + "6": "vasárnap" + }, + "date": { + "1": "elseje", + "2": "másodika", + "3": "harmadika", + "4": "negyedike", + "5": "ötödike", + "6": "hatodika", + "7": "hetedike", + "8": "nyolcadika", + "9": "kilencedike", + "10": "tizedike", + "11": "tizenegyedike", + "12": "tizenkettedike", + "13": "tizenharmadika", + "14": "tizennegyedike", + "15": "tizenötödike", + "16": "tizenhatodika", + "17": "tizenhetedike", + "18": "tizennyolcadika", + "19": "tizenkilencedike", + "20": "huszadika", + "21": "huszonegyedike", + "22": "huszonkettedike", + "23": "huszonharmadika", + "24": "huszonnegyedike", + "25": "huszonötödike", + "26": "huszonhatodika", + "27": "huszonhetedike", + "28": "huszonnyolcadika", + "29": "huszonkilencedike", + "30": "harmincadika", + "31": "harmincegyedike" + }, + "month": { + "1": "január", + "2": "február", + "3": "március", + "4": "április", + "5": "május", + "6": "június", + "7": "július", + "8": "augusztus", + "9": "szeptember", + "10": "október", + "11": "november", + "12": "december" + }, + "number": { + "0": "nulla", + "1": "egy", + "2": "kettő", + "3": "három", + "4": "négy", + "5": "öt", + "6": "hat", + "7": "hét", + "8": "nyolc", + "9": "kilenc", + "10": "tíz", + "11": "tizenegy", + "12": "tizenkettő", + "13": "tizenhárom", + "14": "tizennégy", + "15": "tizenöt", + "16": "tizenhat", + "17": "tizenhét", + "18": "tizennyolc", + "19": "tizenkilenc", + "20": "huszon", + "30": "harminc", + "40": "negyven", + "50": "ötven", + "60": "hatvan", + "70": "hetven", + "80": "nyolcvan", + "90": "kilencven" + } +} diff --git a/ovos_date_parser/res/hu-hu/date_time_test.json b/ovos_date_parser/res/hu-hu/date_time_test.json new file mode 100644 index 0000000..cfe6601 --- /dev/null +++ b/ovos_date_parser/res/hu-hu/date_time_test.json @@ -0,0 +1,43 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "kr.e. egy" }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "kr.e. tíz" }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "kr.e. kilencvenkettő" }, + "4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "nyolcszázhárom" }, + "5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "nyolcszáztizenegy" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "négyszázötvennégy" }, + "7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "ezeröt" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "ezertizenkettő" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "ezernegyvenhat" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "ezernyolcszázhét" }, + "11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "ezerhétszáztizenhét" }, + "12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "ezerkilencszáznyolcvannyolc"}, + "13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "kétezer-kilenc"}, + "14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "kétezer-tizennyolc"}, + "15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "kétezer-huszonegy"}, + "16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "kétezer-harminc"}, + "17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "kétezer-egyszáz" }, + "18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "ezer" }, + "19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "kétezer" }, + "20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "kr.e. háromezer-egyszázhúsz" }, + "21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "kr.e. háromezer-kétszáznegyvenegy" }, + "22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "ötezer-kétszáz" }, + "23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "ezeregyszáz" }, + "24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "kétezer-egyszáz" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "kétezer-tizenhét január harmincegyedike, kedd"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "kétezer-tizennyolc február negyedike, vasárnap"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "február negyedike, vasárnap"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "negyedike, vasárnap"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "holnap"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "ma"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "tegnap"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "február negyedike, vasárnap"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "kétezer-tizennyolc február negyedike, vasárnap"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "kétezer-tizenhét január harmincegyedike, kedd, délután egy óra huszonkettő"}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "kétezer-tizenhét január harmincegyedike, kedd, tizenhárom óra huszonkettő"} + } +} diff --git a/ovos_date_parser/res/it-it/date_time.json b/ovos_date_parser/res/it-it/date_time.json new file mode 100644 index 0000000..4580b11 --- /dev/null +++ b/ovos_date_parser/res/it-it/date_time.json @@ -0,0 +1,153 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1\\d$", "format": "{xx}"}, + "3": {"match": "^\\d0$", "format": "{x0}"}, + "4": {"match": "^[2-9][1|8]", "format": "{xx}"}, + "5": {"match": "^[2-9]\\d$", "format": "{x0}{x}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^1\\d{2}$", "format": "cento"}, + "2": {"match": "^\\d{3}$", "format": "{x_in_x00}cento"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^1\\d{3}$", "format": "mille"}, + "2": {"match": "^\\d{4}$", "format": "{x_in_x000}mila"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"}, + "2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, + "3": {"match": "^\\d000$", "format": "{formatted_thousand} {bc}"}, + "4": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, + "5": {"match": "^1[0-9]00$", "format": "{formatted_thousand} {formatted_hundreds} {bc}"}, + "6": {"match": "^10\\d{2}$", "format": "{formatted_thousand} e {formatted_decade} {bc}"}, + "7": {"match": "^[2-9][0-9]00$", "format": "{formatted_thousand} {formatted_hundreds} {bc}"}, + "8": {"match": "^20\\d{2}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "9": {"match": "^(1\\d{3})|(\\d0\\d{2})$", "format": "{formatted_thousand} {formatted_hundreds} {formatted_decade} {bc}"}, + "10": {"match": "^[2-9]000$", "format": "{formatted_thousand} {bc}"}, + "11": {"match": "^20\\d{2}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "12": {"match": "^([2-9]\\d{3})|(\\d0\\d{2})$", "format": "{formatted_thousand} {formatted_hundreds} {formatted_decade} {bc}"}, + "13": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_hundreds} {formatted_decade} {bc}"}, + "default": "{year} {bc}", + "bc": "a.C." + }, + "date_format": { + "date_full": "{weekday}, {day} {month}, {formatted_year}", + "date_full_no_year": "{weekday}, {day} {month}", + "date_full_no_year_month": "{weekday}, {day}", + "today": "oggi", + "tomorrow": "domani", + "yesterday": "ieri" + }, + "date_time_format": { + "date_time": "{formatted_date} alle {formatted_time}" + }, + "weekday": { + "0": "lunedì", + "1": "martedì", + "2": "mercoledì", + "3": "giovedì", + "4": "venerdì", + "5": "sabato", + "6": "domenica" + }, + "date": { + "1": "primo", + "2": "due", + "3": "tre", + "4": "quattro", + "5": "cinque", + "6": "sei", + "7": "sette", + "8": "otto", + "9": "nove", + "10": "dieci", + "11": "undici", + "12": "dodici", + "13": "tredici", + "14": "quattordici", + "15": "quindici", + "16": "sedici", + "17": "diciassette", + "18": "diciotto", + "19": "diciannove", + "20": "venti", + "21": "ventuno", + "22": "ventidue", + "23": "ventitre", + "24": "ventiquattro", + "25": "venticinque", + "26": "ventisei", + "27": "ventisette", + "28": "ventotto", + "29": "ventinove", + "30": "trenta", + "31": "trentuno" + }, + "month": { + "1": "gennaio", + "2": "febbraio", + "3": "marzo", + "4": "aprile", + "5": "maggio", + "6": "giugno", + "7": "luglio", + "8": "agosto", + "9": "settembre", + "10": "ottobre", + "11": "novembre", + "12": "dicembre" + }, + "number": { + "0": "zero", + "1": "uno", + "2": "due", + "3": "tre", + "4": "quattro", + "5": "cinque", + "6": "sei", + "7": "sette", + "8": "otto", + "9": "nove", + "10": "dieci", + "11": "undici", + "12": "dodici", + "13": "tredici", + "14": "quattordici", + "15": "quindici", + "16": "sedici", + "17": "diciassette", + "18": "diciotto", + "19": "diciannove", + "20": "venti", + "21": "ventuno", + "28": "ventotto", + "30": "trenta", + "31": "trentuno", + "38": "trentotto", + "40": "quaranta", + "41": "quarantuno", + "48": "quarantotto", + "50": "cinquanta", + "51": "cinquantuno", + "58": "cinquantotto", + "60": "sessanta", + "61": "sessantuno", + "68": "sessantotto", + "70": "settanta", + "71": "settantuno", + "78": "settantotto", + "80": "ottanta", + "81": "ottantuno", + "88": "ottantotto", + "90": "novanta", + "91": "novantuno", + "98": "novantotto", + "100": "cento", + "1000": "mille", + "2000": "duemila" + } +} diff --git a/ovos_date_parser/res/it-it/date_time_test.json b/ovos_date_parser/res/it-it/date_time_test.json new file mode 100644 index 0000000..f646c5c --- /dev/null +++ b/ovos_date_parser/res/it-it/date_time_test.json @@ -0,0 +1,42 @@ +{ "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "uno a.C." }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "dieci a.C." }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "novantadue a.C." }, + "4": {"datetime_param": "100, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "cento" }, + "5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "ottocento undici" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "quattrocento cinquantaquattro" }, + "7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mille e cinque" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mille e dodici" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mille e quarantasei" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mille ottocento sette" }, + "11": {"datetime_param": "1700, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mille settecento" }, + "12": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mille settecento diciassette" }, + "13": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mille novecento ottantotto"}, + "14": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "duemila nove"}, + "15": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "duemila diciotto"}, + "16": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "duemila ventuno"}, + "17": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "duemila trenta"}, + "18": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "duemila cento" }, + "19": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "mille" }, + "20": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "duemila" }, + "21": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tremila cento venti a.C." }, + "22": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tremila duecento quarantuno a.C." }, + "23": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "cinquemila duecento" }, + "24": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "mille cento" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "martedì, trentuno gennaio, duemila diciassette"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "domenica, quattro febbraio, duemila diciotto"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "domenica, quattro febbraio"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "domenica, quattro"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "domani"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "oggi"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "ieri"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "domenica, quattro febbraio"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "domenica, quattro febbraio, duemila diciotto"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "martedì, trentuno gennaio, duemila diciassette alle una e ventidue del pomeriggio"}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "martedì, trentuno gennaio, duemila diciassette alle tredici e ventidue"} + } +} diff --git a/ovos_date_parser/res/nl-nl/date_time.json b/ovos_date_parser/res/nl-nl/date_time.json new file mode 100644 index 0000000..b1f94fa --- /dev/null +++ b/ovos_date_parser/res/nl-nl/date_time.json @@ -0,0 +1,136 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1\\d$", "format": "{xx}"}, + "3": {"match": "^\\d0$", "format": "{x0}"}, + "4": {"match": "^[2-9]\\d$", "format": "{x} en {x0}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^1\\d{2}$", "format": "honderd"}, + "2": {"match": "^\\d{3}$", "format": "{x_in_x00} honderd"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^10\\d\\d$", "format": "duizend"}, + "2": {"match": "^\\d0\\d{2}$", "format": "{x_in_x000} duizend"}, + "3": {"match": "^1\\d00$", "format": "{xx_in_xx00} honderd"}, + "4": {"match": "^\\d{2}00$", "format": "{x_in_x00} en {x0_in_x000} honderd"}, + "5": {"match": "^\\d0\\d\\d$", "format": "{x_in_x000} duizend"}, + "6": {"match": "^1\\d{3}$", "format": "{xx_in_xx00}"}, + "7": {"match": "^\\d{4}$", "format": "{x_in_0x00} en {x0_in_x000}"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d{1}?$", "format": "{formatted_decade} {bc}"}, + "2": {"match": "^\\d{2}?$", "format": "{formatted_decade} {bc}"}, + "3": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, + "4": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, + "5": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"}, + "6": {"match": "^\\d00\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "7": {"match": "^\\d{2}0\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "8": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "default": "{year} {bc}", + "bc": "v.c." + }, + "date_format": { + "date_full": "{weekday}, {day} {month}, {formatted_year}", + "date_full_no_year": "{weekday}, {day} {month}", + "date_full_no_year_month": "{weekday}, {day} {month}", + "today": "vandaag", + "tomorrow": "morgen", + "yesterday": "gisteren" + }, + "time_format": { + "time_full": "{minutes} over {hour}" + }, + "date_time_format": { + "date_time": "{formatted_date} om {formatted_time}" + }, + "weekday": { + "0": "maandag", + "1": "dinsdag", + "2": "woensdag", + "3": "donderdag", + "4": "vrijdag", + "5": "zaterdag", + "6": "zondag" + }, + "date": { + "1": "een", + "2": "twee", + "3": "drie", + "4": "vier", + "5": "vijf", + "6": "zes", + "7": "zeven", + "8": "acht", + "9": "negen", + "10": "tien", + "11": "elf", + "12": "twaalf", + "13": "dertien", + "14": "veertien", + "15": "vijtien", + "16": "zestien", + "17": "zeventien", + "18": "achttien", + "19": "negentien", + "20": "twintig", + "21": "eenentwintig", + "22": "tweeentwintig", + "23": "drieentwintig", + "24": "vierentwintig", + "25": "vijfentwintig", + "26": "zesentwintig", + "27": "zevenentwintig", + "28": "achtentwintig", + "29": "negenentwintig", + "30": "dertig", + "31": "eenendertig" + }, + "month": { + "1": "januari", + "2": "februari", + "3": "maart", + "4": "april", + "5": "mei", + "6": "juni", + "7": "juli", + "8": "augustus", + "9": "september", + "10": "oktober", + "11": "november", + "12": "december" + }, + "number": { + "0": "nul", + "1": "een", + "2": "twee", + "3": "drie", + "4": "vier", + "5": "vijf", + "6": "zes", + "7": "zeven", + "8": "acht", + "9": "negen", + "10": "tien", + "11": "elf", + "12": "twaalf", + "13": "dertien", + "14": "veertien", + "15": "vijtien", + "16": "zestien", + "17": "zeventien", + "18": "achttien", + "19": "negentien", + "20": "twintig", + "30": "dertig", + "40": "veertig", + "50": "vijftig", + "60": "zestig", + "70": "zeventig", + "80": "tachtig", + "90": "negentig" + } +} diff --git a/ovos_date_parser/res/nl-nl/date_time_test.json b/ovos_date_parser/res/nl-nl/date_time_test.json new file mode 100644 index 0000000..2486c28 --- /dev/null +++ b/ovos_date_parser/res/nl-nl/date_time_test.json @@ -0,0 +1,43 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "een v.c." }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tien v.c." }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "twee en negentig v.c." }, + "4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "acht honderd drie" }, + "5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "acht honderd elf" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "vier honderd vier en vijftig" }, + "7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "duizend vijf" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "duizend twaalf" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "duizend zes en veertig" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "achttien zeven" }, + "11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "zeventien zeventien" }, + "12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "negentien acht en tachtig"}, + "13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "twee duizend negen"}, + "14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "twee duizend achttien"}, + "15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "twee duizend een en twintig"}, + "16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "twee duizend dertig"}, + "17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "een en twintig honderd" }, + "18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "duizend" }, + "19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "twee duizend" }, + "20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "een en dertig twintig v.c." }, + "21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "twee en dertig een en veertig v.c." }, + "22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "twee en vijftig honderd" }, + "23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "elf honderd" }, + "24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "een en twintig honderd" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "dinsdag, eenendertig januari, twee duizend zeventien"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "zondag, vier februari, twee duizend achttien"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "zondag, vier februari"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "zondag, vier februari"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "morgen"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "vandaag"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "gisteren"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "zondag, vier februari"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "zondag, vier februari, twee duizend achttien"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "dinsdag, eenendertig januari, twee duizend zeventien om tweeentwintig over één 's middags"}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "dinsdag, eenendertig januari, twee duizend zeventien om dertien uur tweeentwintig"} + } +} diff --git a/ovos_date_parser/res/pl-pl/date_time.json b/ovos_date_parser/res/pl-pl/date_time.json new file mode 100644 index 0000000..2930916 --- /dev/null +++ b/ovos_date_parser/res/pl-pl/date_time.json @@ -0,0 +1,129 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1\\d$", "format": "{xx}"}, + "3": {"match": "^\\d0$", "format": "{x0}"}, + "4": {"match": "^[2-9]\\d$", "format": "{x0} {x}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^\\d{3}$", "format": "{x_in_x00} hundred"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^\\d00\\d$", "format": "{x_in_x000} tysiąc"}, + "2": {"match": "^1\\d00$", "format": "{xx_in_xx00} hundred"}, + "3": {"match": "^\\d{2}00$", "format": "{x0_in_x000} {x_in_x00} hundred"}, + "4": {"match": "^(1\\d{3})|(\\d0\\d{2})$", "format": "{xx_in_xx00}"}, + "5": {"match": "^\\d{4}$", "format": "{x0_in_x000} {x_in_x00}"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"}, + "2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, + "3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, + "4": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"}, + "5": {"match": "^\\d00\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "6": {"match": "^\\d{2}0\\d$", "format": "{formatted_thousand} oh {formatted_decade} {bc}"}, + "7": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "default": "{year} {bc}", + "bc": "p.n.e." + }, + "date_format": { + "date_full": "{weekday}, {day} {month}, {formatted_year}", + "date_full_no_year": "{weekday}, {day} {month}", + "date_full_no_year_month": "{weekday}, {day}", + "today": "dziś", + "tomorrow": "jutro", + "yesterday": "wczoraj" + }, + "date_time_format": { + "date_time": "{formatted_date} at {formatted_time}" + }, + "weekday": { + "0": "poniedziałek", + "1": "wtorek", + "2": "środa", + "3": "czwartek", + "4": "piątek", + "5": "sobota", + "6": "niedziela" + }, + "date": { + "1": "pierwszy", + "2": "drugi", + "3": "trzeci", + "4": "czwarty", + "5": "piąty", + "6": "szósty", + "7": "siódmy", + "8": "ósmy", + "9": "dziewiąty", + "10": "dziesiąty", + "11": "jedenast", + "12": "dwunasty", + "13": "trzynasty", + "14": "czternasty", + "15": "piętnasty", + "16": "szesnasty", + "17": "siedemnasty", + "18": "osiemnasty", + "19": "dziewiętnasty", + "20": "dwudziesty", + "21": "dwudziesty pierwszy", + "22": "dwudziesty drugi", + "23": "dwudziesty trzeci", + "24": "dwudziesty czwarty", + "25": "dwudziesty piąty", + "26": "dwudziesty szósty", + "27": "dwudziesty siódmy", + "28": "dwudziesty ósmy", + "29": "dwudziesty dziewiąty", + "30": "trzydziesty", + "31": "trzydziesty pierwszy" + }, + "month": { + "1": "styczeń", + "2": "luty", + "3": "marzec", + "4": "kwiecień", + "5": "maj", + "6": "czerwiec", + "7": "lipiec", + "8": "sierpień", + "9": "wrzesień", + "10": "październik", + "11": "listopad", + "12": "grudzień" + }, + "number": { + "0": "zero", + "1": "jeden", + "2": "dwa", + "3": "trzy", + "4": "cztery", + "5": "pięć", + "6": "sześć", + "7": "siedem", + "8": "osiem", + "9": "dziewięc", + "10": "dziesięć", + "11": "jedenaście", + "12": "dwanaście", + "13": "trzynaście", + "14": "czternaście", + "15": "piętnaście", + "16": "szesnaście", + "17": "siedemnaście", + "18": "osiemnaście", + "19": "dziewiętnaście", + "20": "dwadzieścia", + "30": "trzydzieści", + "40": "czterdzieści", + "50": "pięćdziesiąt", + "60": "sześćdziesiąt", + "70": "siedemdziesiąt", + "80": "osiemdziesiąt", + "90": "dziewięćdziesiąt" + } +} diff --git a/ovos_date_parser/res/ru-ru/date_time.json b/ovos_date_parser/res/ru-ru/date_time.json new file mode 100644 index 0000000..6c83547 --- /dev/null +++ b/ovos_date_parser/res/ru-ru/date_time.json @@ -0,0 +1,149 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1\\d$", "format": "{xx}"}, + "3": {"match": "^\\d0$", "format": "{x0}"}, + "4": {"match": "^[2-9]\\d$", "format": "{x0} {x}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^1\\d{2}$", "format": "сто"}, + "2": {"match": "^2\\d{2}$", "format": "двести"}, + "3": {"match": "^[34]\\d{2}$", "format": "{x_in_x00}ста"}, + "4": {"match": "^\\d{3}$", "format": "{x_in_x00}сот"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^10\\d{2}$", "format": "тысяча"}, + "2": {"match": "^11\\d{2}$", "format": "тысяча сто"}, + "3": {"match": "^12\\d{2}$", "format": "тысяча двести"}, + "4": {"match": "^1[34]\\d{2}$", "format": "тысяча {x_in_x00}ста"}, + "5": {"match": "^1\\d{3}$", "format": "тысяча {x_in_x00}сот"}, + + "6": {"match": "^20\\d{2}$", "format": "две тысячи"}, + "7": {"match": "^21\\d{2}$", "format": "две тысячи сто"}, + "8": {"match": "^22\\d{2}$", "format": "две тысячи двести"}, + "9": {"match": "^2[34]\\d{2}$", "format": "две тысячи {x_in_x00}ста"}, + "10": {"match": "^2\\d{3}$", "format": "две тысячи {x_in_x00}сот"}, + + "11": {"match": "^[34]0\\d{2}$", "format": "{x_in_x000} тысячи"}, + "12": {"match": "^[34]1\\d{2}$", "format": "{x_in_x000} тысячи сто"}, + "13": {"match": "^[34]2\\d{2}$", "format": "{x_in_x000} тысячи двести"}, + "14": {"match": "^[34][34]\\d{2}$", "format": "{x_in_x000} тысячи {x_in_x00}ста"}, + "15": {"match": "^[34]\\d{3}$", "format": "{x_in_x000} тысячи {x_in_x00}сот"}, + + "16": {"match": "^[5-9]0\\d{2}$", "format": "{x_in_x000} тысяч"}, + "17": {"match": "^[5-9]1\\d{2}$", "format": "{x_in_x000} тысяч сто"}, + "18": {"match": "^[5-9]2\\d{2}$", "format": "{x_in_x000} тысяч двести"}, + "19": {"match": "^[5-9][34]\\d{2}$", "format": "{x_in_x000} тысяч {x_in_x00}ста"}, + "20": {"match": "^[5-9]\\d{3}$", "format": "{x_in_x000} тысяч {x_in_x00}сот"}, + + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"}, + "2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, + "3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, + "4": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"}, + "5": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "default": "{year} {bc}", + "bc": "до нашей эры" + }, + "date_format": { + "date_full": "{weekday}, {day} {month}, {formatted_year}", + "date_full_no_year": "{weekday}, {day} {month}", + "date_full_no_year_month": "{weekday}, {day}", + "today": "сегодня", + "tomorrow": "завтра", + "yesterday": "вчера" + }, + "date_time_format": { + "date_time": "{formatted_date} в {formatted_time}" + }, + "weekday": { + "0": "в понедельник", + "1": "во вторник", + "2": "в среду", + "3": "в четверг", + "4": "в пятницу", + "5": "в субботу", + "6": "в воскресенье" + }, + "date": { + "1": "первого", + "2": "второго", + "3": "третьего", + "4": "четвёртого", + "5": "пятого", + "6": "шестого", + "7": "седьмого", + "8": "восьмого", + "9": "девятого", + "10": "десятого", + "11": "одиннадцатого", + "12": "двенадцатого", + "13": "тринадцатого", + "14": "четырнадцатого", + "15": "пятнадцатого", + "16": "шестнадцатого", + "17": "семнадцатого", + "18": "восемнадцатого", + "19": "девятнадцатого", + "20": "двадцатого", + "21": "двадцать первого", + "22": "двадцать второго", + "23": "двадцать третьего", + "24": "двадцать четвёртого", + "25": "двадцать пятого", + "26": "двадцать шестого", + "27": "двадцать седьмого", + "28": "двадцать восьмого", + "29": "двадцать девятого", + "30": "тридцатого", + "31": "тридцать первого" + }, + "month": { + "1": "января", + "2": "февраля", + "3": "марта", + "4": "апреля", + "5": "мая", + "6": "июня", + "7": "июля", + "8": "августа", + "9": "сентября", + "10": "октября", + "11": "ноября", + "12": "декабря" + }, + "number": { + "0": "ноль", + "1": "один", + "2": "два", + "3": "три", + "4": "четыре", + "5": "пять", + "6": "шесть", + "7": "семь", + "8": "восемь", + "9": "девять", + "10": "десять", + "11": "одиннадцать", + "12": "двенадцать", + "13": "тринадцать", + "14": "четырнадцать", + "15": "пятнадцать", + "16": "шестнадцать", + "17": "семнадцать", + "18": "восемнадцать", + "19": "девятнадцать", + "20": "двадцать", + "30": "тридцать", + "40": "сорок", + "50": "пятьдесят", + "60": "шестьдесят", + "70": "семьдесят", + "80": "восемьдесят", + "90": "девяносто" + } +} diff --git a/ovos_date_parser/res/ru-ru/date_time_test.json b/ovos_date_parser/res/ru-ru/date_time_test.json new file mode 100644 index 0000000..ac82786 --- /dev/null +++ b/ovos_date_parser/res/ru-ru/date_time_test.json @@ -0,0 +1,43 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "один до нашей эры" }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "десять до нашей эры" }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "девяносто два до нашей эры" }, + "4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "восемьсот три" }, + "5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "восемьсот одиннадцать" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "четыреста пятьдесят четыре" }, + "7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "тысяча пять" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "тысяча двенадцать" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "тысяча сорок шесть" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "тысяча восемьсот семь" }, + "11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "тысяча семьсот семнадцать" }, + "12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "тысяча девятьсот восемьдесят восемь"}, + "13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "две тысячи девять"}, + "14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "две тысячи восемнадцать"}, + "15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "две тысячи двадцать один"}, + "16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "две тысячи тридцать"}, + "17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "две тысячи сто" }, + "18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "тысяча" }, + "19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "две тысячи" }, + "20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "три тысячи сто двадцать до нашей эры" }, + "21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "три тысячи двести сорок один до нашей эры" }, + "22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "пять тысяч двести" }, + "23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "тысяча сто" }, + "24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "две тысячи сто" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "во вторник, тридцать первого января, две тысячи семнадцать"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "в воскресенье, четвёртого февраля, две тысячи восемнадцать"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "в воскресенье, четвёртого февраля"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "в воскресенье, четвёртого"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "завтра"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "сегодня"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "вчера"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "в воскресенье, четвёртого февраля"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "в воскресенье, четвёртого февраля, две тысячи восемнадцать"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "во вторник, тридцать первого января, две тысячи семнадцать в час двадцать два дня"}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "во вторник, тридцать первого января, две тысячи семнадцать в тринадцать двадцать два"} + } +} diff --git a/ovos_date_parser/res/sl-si/date_time.json b/ovos_date_parser/res/sl-si/date_time.json new file mode 100644 index 0000000..9da33e4 --- /dev/null +++ b/ovos_date_parser/res/sl-si/date_time.json @@ -0,0 +1,123 @@ +{ + "decade_format": { + "1": {"match": "^0$", "format": ""}, + "2": {"match": "^\\d$", "format": "{x}"}, + "3": {"match": "^1\\d$", "format": "{xx}"}, + "4": {"match": "^\\d0$", "format": "{x0}"}, + "5": {"match": "^[2-9]\\d$", "format": "{x}in{x0}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^\\d{1,2}$", "format": "{formatted_decade}"}, + "2": {"match": "^1\\d{2}$", "format": "sto {formatted_decade}"}, + "3": {"match": "^2\\d{2}$", "format": "dvesto {formatted_decade}"}, + "4": {"match": "^\\d{3}$", "format": "{x_in_x00}sto {formatted_decade}"}, + "default": "{formatted_decade}" + }, + "thousand_format": { + "1": {"match": "^\\d{1,3}$", "format": "{formatted_hundreds}"}, + "2": {"match": "^1\\d{3}$", "format": "tisoč {formatted_hundreds}"}, + "default": "{x_in_x000} tisoč {formatted_hundreds}" + }, + "year_format": { + "default": "{formatted_thousand} {bc}", + "bc": "pr. n. št." + }, + "date_format": { + "date_full": "{weekday}, {day} {month} {formatted_year}", + "date_full_no_year": "{weekday}, {day} {month}", + "date_full_no_year_month": "{weekday}, {day}", + "today": "danes", + "tomorrow": "jutri", + "yesterday": "včeraj" + }, + "date_time_format": { + "date_time": "{formatted_date}, ob {formatted_time}" + }, + "weekday": { + "0": "ponedeljek", + "1": "torek", + "2": "sreda", + "3": "četrtek", + "4": "petek", + "5": "sobota", + "6": "nedelja" + }, + "date": { + "1": "prvi", + "2": "drugi", + "3": "tretji", + "4": "četrti", + "5": "peti", + "6": "šesti", + "7": "sedmi", + "8": "osmi", + "9": "deveti", + "10": "deseti", + "11": "enajsti", + "12": "dvanajsti", + "13": "trinajsti", + "14": "štirinajsti", + "15": "petjanjsti", + "16": "šestnajsti", + "17": "sedemnajsti", + "18": "osemnajsti", + "19": "devetnajsti", + "20": "dvajseti", + "21": "enaindvajseti", + "22": "dvaindvajseti", + "23": "triindvajseti", + "24": "štiriindvajseti", + "25": "petindvajseti", + "26": "šestindvajseti", + "27": "sedemindvajseti", + "28": "osemindvajseti", + "29": "devetindvajseti", + "30": "trideseti", + "31": "enaintrideseti" + }, + "month": { + "1": "januar", + "2": "februar", + "3": "marec", + "4": "april", + "5": "maj", + "6": "junij", + "7": "julij", + "8": "avgust", + "9": "september", + "10": "oktober", + "11": "november", + "12": "december" + }, + "number": { + "0": "nič", + "1": "ena", + "2": "dva", + "3": "tri", + "4": "štiri", + "5": "pet", + "6": "šest", + "7": "sedem", + "8": "osem", + "9": "devet", + "10": "deset", + "11": "enajst", + "12": "dvanajst", + "13": "trinajst", + "14": "štirinajst", + "15": "petnajst", + "16": "šestnajst", + "17": "sedemnajst", + "18": "osemnajst", + "19": "devetnajst", + "20": "dvajset", + "30": "trideset", + "40": "štirideset", + "50": "petdeset", + "60": "šestdeset", + "70": "sedemdeset", + "80": "osemdeset", + "90": "devetdeset" + } +} diff --git a/ovos_date_parser/res/sl-si/date_time_test.json b/ovos_date_parser/res/sl-si/date_time_test.json new file mode 100644 index 0000000..7bebae0 --- /dev/null +++ b/ovos_date_parser/res/sl-si/date_time_test.json @@ -0,0 +1,43 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "ena pr. n. št." }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "deset pr. n. št." }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "dvaindevetdeset pr. n. št." }, + "4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "osemsto tri" }, + "5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "osemsto enajst" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "štiristo štiriinpetdeset" }, + "7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tisoč pet" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tisoč dvanajst" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tisoč šestinštirideset" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "tisoč osemsto sedem" }, + "11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "tisoč sedemsto sedemnajst" }, + "12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "tisoč devetsto oseminosemdeset"}, + "13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dva tisoč devet"}, + "14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dva tisoč osemnajst"}, + "15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dva tisoč enaindvajset"}, + "16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dva tisoč trideset"}, + "17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dva tisoč sto" }, + "18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "tisoč" }, + "19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "dva tisoč" }, + "20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tri tisoč sto dvajset pr. n. št." }, + "21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tri tisoč dvesto enainštirideset pr. n. št." }, + "22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "pet tisoč dvesto" }, + "23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tisoč sto" }, + "24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "dva tisoč sto" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "torek, enaintrideseti januar dva tisoč sedemnajst"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "nedelja, četrti februar dva tisoč osemnajst"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "nedelja, četrti februar"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "nedelja, četrti"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "jutri"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "danes"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "včeraj"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "nedelja, četrti februar"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "nedelja, četrti februar dva tisoč osemnajst"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "torek, enaintrideseti januar dva tisoč sedemnajst, ob dvaindvajset čez ena p.m."}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "torek, enaintrideseti januar dva tisoč sedemnajst, ob trinajst dvaindvajset"} + } +} diff --git a/ovos_date_parser/res/sv-se/date_time.json b/ovos_date_parser/res/sv-se/date_time.json new file mode 100644 index 0000000..7c43912 --- /dev/null +++ b/ovos_date_parser/res/sv-se/date_time.json @@ -0,0 +1,129 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1\\d$", "format": "{xx}"}, + "3": {"match": "^\\d0$", "format": "{x0}"}, + "4": {"match": "^[2-9]\\d$", "format": "{x0} {x}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^\\d{3}$", "format": "{x_in_x00} hundra"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^\\d00\\d$", "format": "{x_in_x000} tusen"}, + "2": {"match": "^1\\d00$", "format": "{xx_in_xx00} hundra"}, + "3": {"match": "^\\d{2}00$", "format": "{x0_in_x000} {x_in_x00} hundra"}, + "4": {"match": "^(1\\d{3})|(\\d0\\d{2})$", "format": "{xx_in_xx00}"}, + "5": {"match": "^\\d{4}$", "format": "{x0_in_x000} {x_in_x00}"}, + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"}, + "2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, + "3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, + "4": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"}, + "5": {"match": "^\\d00\\d$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "6": {"match": "^\\d{2}0\\d$", "format": "{formatted_thousand} noll {formatted_decade} {bc}"}, + "7": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "default": "{year} {bc}", + "bc": "före kristus" + }, + "date_format": { + "date_full": "{weekday}, den {day} {month}, {formatted_year}", + "date_full_no_year": "{weekday}, den {day} {month}", + "date_full_no_year_month": "{weekday}, den {day}", + "today": "idag", + "tomorrow": "imorgon", + "yesterday": "igår" + }, + "date_time_format": { + "date_time": "{formatted_date} klockan {formatted_time}" + }, + "weekday": { + "0": "måndag", + "1": "tisdag", + "2": "onsdag", + "3": "torsdag", + "4": "fredag", + "5": "lördag", + "6": "söndag" + }, + "date": { + "1": "första", + "2": "andra", + "3": "tredje", + "4": "fjärde", + "5": "femte", + "6": "sjätte", + "7": "sjunde", + "8": "åttonde", + "9": "nionde", + "10": "tionde", + "11": "elfte", + "12": "tolfte", + "13": "trettonde", + "14": "fjortonde", + "15": "femtonde", + "16": "sextonde", + "17": "sjuttonde", + "18": "artonde", + "19": "nittonde", + "20": "tjugonde", + "21": "tjugoförsta", + "22": "tjugoandra", + "23": "tjugotredje", + "24": "tjugofjärde", + "25": "tjugofemte", + "26": "tjugosjätte", + "27": "tjugosjunde", + "28": "tjugoåttonde", + "29": "tjugonionde", + "30": "trettionde", + "31": "trettiförsta" + }, + "month": { + "1": "januari", + "2": "februari", + "3": "mars", + "4": "april", + "5": "maj", + "6": "juni", + "7": "juli", + "8": "augusti", + "9": "september", + "10": "oktober", + "11": "november", + "12": "december" + }, + "number": { + "0": "noll", + "1": "ett", + "2": "två", + "3": "tre", + "4": "fyra", + "5": "fem", + "6": "sex", + "7": "sju", + "8": "åtta", + "9": "nio", + "10": "tio", + "11": "elva", + "12": "tolv", + "13": "tretton", + "14": "fjorton", + "15": "femton", + "16": "sexton", + "17": "sjutton", + "18": "arton", + "19": "nitton", + "20": "tjugo", + "30": "trettio", + "40": "förtio", + "50": "femtio", + "60": "sextio", + "70": "sjuttio", + "80": "åttio", + "90": "nittio" + } +} diff --git a/ovos_date_parser/res/sv-se/date_time_test.json b/ovos_date_parser/res/sv-se/date_time_test.json new file mode 100644 index 0000000..0fda399 --- /dev/null +++ b/ovos_date_parser/res/sv-se/date_time_test.json @@ -0,0 +1,43 @@ +{ + "test_nice_year": { + "1": {"datetime_param": "1, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "ett före kristus" }, + "2": {"datetime_param": "10, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "tio före kristus" }, + "3": {"datetime_param": "92, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "nittio två före kristus" }, + "4": {"datetime_param": "803, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "åtta hundra tre" }, + "5": {"datetime_param": "811, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "åtta hundra elva" }, + "6": {"datetime_param": "454, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "fyra hundra femtio fyra" }, + "7": {"datetime_param": "1005, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "ett tusen fem" }, + "8": {"datetime_param": "1012, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tio tolv" }, + "9": {"datetime_param": "1046, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tio förtio sex" }, + "10": {"datetime_param": "1807, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "arton noll sju" }, + "11": {"datetime_param": "1717, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "sjutton sjutton" }, + "12": {"datetime_param": "1988, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "nitton åttio åtta"}, + "13": {"datetime_param": "2009, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "två tusen nio"}, + "14": {"datetime_param": "2018, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "tjugo arton"}, + "15": {"datetime_param": "2021, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "tjugo tjugo ett"}, + "16": {"datetime_param": "2030, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "tjugo trettio"}, + "17": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tjugo ett hundra" }, + "18": {"datetime_param": "1000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "ett tusen" }, + "19": {"datetime_param": "2000, 1, 31, 13, 22, 3", "bc": "None", "assertEqual": "två tusen" }, + "20": {"datetime_param": "3120, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "trettio ett tjugo före kristus" }, + "21": {"datetime_param": "3241, 1, 31, 13, 22, 3", "bc": "True", "assertEqual": "trettio två förtio ett före kristus" }, + "22": {"datetime_param": "5200, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "femtio två hundra" }, + "23": {"datetime_param": "1100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "elva hundra" }, + "24": {"datetime_param": "2100, 1, 31, 13, 22, 3", "bc": "False", "assertEqual": "tjugo ett hundra" } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "tisdag, den trettiförsta januari, tjugo sjutton"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "söndag, den fjärde februari, tjugo arton"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "söndag, den fjärde februari"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "söndag, den fjärde"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "imorgon"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "idag"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "igår"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 6, 0, 2, 3", "assertEqual": "söndag, den fjärde februari"}, + "9": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "söndag, den fjärde februari, tjugo arton"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "tisdag, den trettiförsta januari, tjugo sjutton klockan tjugotvå minuter över ett på eftermiddagen"}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "tisdag, den trettiförsta januari, tjugo sjutton klockan tretton tjugotvå"} + } +} diff --git a/ovos_date_parser/res/uk-ua/date_time.json b/ovos_date_parser/res/uk-ua/date_time.json new file mode 100644 index 0000000..87ff25c --- /dev/null +++ b/ovos_date_parser/res/uk-ua/date_time.json @@ -0,0 +1,150 @@ +{ + "decade_format": { + "1": {"match": "^\\d$", "format": "{x}"}, + "2": {"match": "^1\\d$", "format": "{xx}"}, + "3": {"match": "^\\d0$", "format": "{x0}"}, + "4": {"match": "^[2-9]\\d$", "format": "{x0} {x}"}, + "default": "{number}" + }, + "hundreds_format": { + "1": {"match": "^1\\d{2}$", "format": "сто"}, + "2": {"match": "^2\\d{2}$", "format": "двісті"}, + "3": {"match": "^[34]\\d{2}$", "format": "{x_in_x00}ста"}, + "4": {"match": "^\\d{3}$", "format": "{x_in_x00}сот"}, + "default": "{number}" + }, + "thousand_format": { + "1": {"match": "^10\\d{2}$", "format": "тисяча"}, + "2": {"match": "^11\\d{2}$", "format": "тисяча сто"}, + "3": {"match": "^12\\d{2}$", "format": "тисяча двісті"}, + "4": {"match": "^1[34]\\d{2}$", "format": "тисяча {x_in_x00}ста"}, + "5": {"match": "^1\\d{3}$", "format": "тисяча {x_in_x00}сот"}, + + "6": {"match": "^20\\d{2}$", "format": "дві тисячі"}, + "7": {"match": "^21\\d{2}$", "format": "дві тисячі сто"}, + "8": {"match": "^22\\d{2}$", "format": "дві тисячі двісті"}, + "9": {"match": "^2[34]\\d{2}$", "format": "дві тисячі {x_in_x00}ста"}, + "10": {"match": "^2\\d{3}$", "format": "дві тисячі {x_in_x00}сот"}, + + "11": {"match": "^[34]0\\d{2}$", "format": "{x_in_x000} тисячі"}, + "12": {"match": "^[34]1\\d{2}$", "format": "{x_in_x000} тисячі сто"}, + "13": {"match": "^[34]2\\d{2}$", "format": "{x_in_x000} тисячі двісті"}, + "14": {"match": "^[34][34]\\d{2}$", "format": "{x_in_x000} тисячі {x_in_x00}ста"}, + "15": {"match": "^[34]\\d{3}$", "format": "{x_in_x000} тисячі {x_in_x00}сот"}, + + "16": {"match": "^[5-9]0\\d{2}$", "format": "{x_in_x000} тисяч"}, + "17": {"match": "^[5-9]1\\d{2}$", "format": "{x_in_x000} тисяч сто"}, + "18": {"match": "^[5-9]2\\d{2}$", "format": "{x_in_x000} тисяч двісті"}, + "19": {"match": "^[5-9][34]\\d{2}$", "format": "{x_in_x000} тисяч {x_in_x00}ста"}, + "20": {"match": "^[5-9]\\d{3}$", "format": "{x_in_x000} тисяч {x_in_x00}сот"}, + + "default": "{number}" + }, + "year_format": { + "1": {"match": "^\\d\\d?$", "format": "{formatted_decade} {bc}"}, + "2": {"match": "^\\d00$", "format": "{formatted_hundreds} {bc}"}, + "3": {"match": "^\\d{3}$", "format": "{formatted_hundreds} {formatted_decade} {bc}"}, + "4": {"match": "^\\d{2}00$", "format": "{formatted_thousand} {bc}"}, + "5": {"match": "^\\d{4}$", "format": "{formatted_thousand} {formatted_decade} {bc}"}, + "default": "{year} {bc}", + "bc": "до нашої ери" + }, + "date_format": { + "date_full": "{weekday}, {day} {month}, {formatted_year}", + "date_full_no_year": "{weekday}, {day} {month}", + "date_full_no_year_month": "{weekday}, {day}", + "today": "сьогодні", + "tomorrow": "завтра", + "yesterday": "вчора" + }, + "date_time_format": { + "date_time": "{formatted_date}, {formatted_time}" + }, + "weekday": { + "0": "понеділок", + "1": "вівторок", + "2": "середа", + "3": "четвер", + "4": "п'ятниця", + "5": "субота", + "6": "неділя" + }, + "date": { + "1": "перше", + "2": "друге", + "3": "третє", + "4": "четверте", + "5": "п'яте", + "6": "шосте", + "7": "сьоме", + "8": "восьме", + "9": "дев'яте", + "10": "десяте", + "11": "одинадцяте", + "12": "дванадцяте", + "13": "тринадцяте", + "14": "чотирнадцяте", + "15": "п'ятнадцяте", + "16": "шістнадцяте", + "17": "сімнадцяте", + "18": "вісімнадцяте", + "19": "дев'ятнадцяте", + "20": "двадцяте", + "21": "двадцять перше", + "22": "двадцять друге", + "23": "двадцять третє", + "24": "двадцять четверте", + "25": "двадцять п'яте", + "26": "двадцять шосте", + "27": "двадцять сьоме", + "28": "двадцять восьме", + "29": "двадцять дев'яте", + "30": "тридцяте", + "31": "тридцять перше" + }, + "month": { + "1": "січня", + "2": "лютого", + "3": "березня", + "4": "квітня", + "5": "травня", + "6": "червня", + "7": "липня", + "8": "серпня", + "9": "вересня", + "10": "жовтня", + "11": "листопада", + "12": "грудня" + }, + + "number": { + "0": "нуль", + "1": "один", + "2": "два", + "3": "три", + "4": "чотири", + "5": "п'ять", + "6": "шість", + "7": "сім", + "8": "вісім", + "9": "дев'ять", + "10": "десять", + "11": "одинадцять", + "12": "дванадцять", + "13": "тринадцять", + "14": "чотирнадцять", + "15": "п'ятнадцять", + "16": "шестнадцать", + "17": "сімнадцять", + "18": "вісімнадцять", + "19": "дев'ятнадцять", + "20": "двадцять", + "30": "тридцять", + "40": "сорок", + "50": "п'ятдесят", + "60": "шістдесят", + "70": "сімдесят", + "80": "вісімдесят", + "90": "дев'яносто" + } +} diff --git a/ovos_date_parser/res/uk-ua/date_time_test.json b/ovos_date_parser/res/uk-ua/date_time_test.json new file mode 100644 index 0000000..c76700c --- /dev/null +++ b/ovos_date_parser/res/uk-ua/date_time_test.json @@ -0,0 +1,23 @@ +{ + "test_nice_year": { + "1": { + "datetime_param": "1, 1, 31, 13, 22, 3", + "bc": "True", + "assertEqual": "один до нашої ери" + } + }, + "test_nice_date": { + "1": {"datetime_param": "2017, 1, 31, 0, 2, 3", "now": "None", "assertEqual": "вівторок, тридцять перше січня, дві тисячі сімнадцять"}, + "2": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2017, 1, 1, 0, 2, 3", "assertEqual": "неділя, четверте лютого, дві тисячі вісімнадцять"}, + "3": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 1, 1, 0, 2, 3", "assertEqual": "неділя, четверте лютого"}, + "4": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 1, 0, 2, 3", "assertEqual": "неділя, четверте"}, + "5": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 3, 0, 2, 3", "assertEqual": "завтра"}, + "6": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 4, 0, 2, 3", "assertEqual": "сьогодні"}, + "7": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2018, 2, 5, 0, 2, 3", "assertEqual": "вчора"}, + "8": {"datetime_param": "2018, 2, 4, 0, 2, 3", "now": "2019, 2, 6, 0, 2, 3", "assertEqual": "неділя, четверте лютого, дві тисячі вісімнадцять"} + }, + "test_nice_date_time": { + "1": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "False", "use_ampm": "True", "assertEqual": "вівторок, тридцять перше січня, дві тисячі сімнадцять, перша година дня двадцять два"}, + "2": {"datetime_param": "2017, 1, 31, 13, 22, 3", "now": "None", "use_24hour": "True", "use_ampm": "False", "assertEqual": "вівторок, тридцять перше січня, дві тисячі сімнадцять, тринадцять двадцять два"} + } +}