From a857f9c92e154f7e8cc75c277714ffe40bd9ad6f Mon Sep 17 00:00:00 2001 From: arghyatiger Date: Mon, 28 May 2018 01:34:55 +0530 Subject: [PATCH 01/32] Test Fix --- tests/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index 74ed309..469096a 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -23,9 +23,9 @@ def test_uninstalled_mode(self): class TestGenerate(unittest.TestCase): def test_en(self): - wordform = apertium.generate('cat', 'en') + wordform = apertium.generate('en', '^cat$') self.assertEqual(wordform, 'cats') def test_uninstalled_mode(self): with self.assertRaises(apertium.ModeNotInstalled): - apertium.generate('cat', 'spa') + apertium.generate('spa', 'cat') From 2e961908b7c908318e3b1f94cbdf46159cb23def Mon Sep 17 00:00:00 2001 From: arghyatiger Date: Mon, 28 May 2018 01:46:59 +0530 Subject: [PATCH 02/32] Issue Fix --- apertium/analysis/__init__.py | 2 +- apertium/generation/__init__.py | 21 ++++++++++++--------- tests/__init__.py | 17 +++++++++++++++-- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/apertium/analysis/__init__.py b/apertium/analysis/__init__.py index 5fe33bc..66c6e47 100644 --- a/apertium/analysis/__init__.py +++ b/apertium/analysis/__init__.py @@ -15,7 +15,7 @@ def postproc_text(result): # type: (str) -> List[LexicalUnit] return lexical_units -def analyze(in_text, lang, formatting='txt'): # type: (str, str, str) -> List[LexicalUnit] +def analyze(lang, in_text, formatting='txt'): # type: (str, str, str) -> List[LexicalUnit] """ runs apertium to analyze the input """ diff --git a/apertium/generation/__init__.py b/apertium/generation/__init__.py index ec910d1..d72b5ff 100644 --- a/apertium/generation/__init__.py +++ b/apertium/generation/__init__.py @@ -1,4 +1,4 @@ -import streamparser # noqa: F401 +from streamparser import parse, LexicalUnit # noqa: F401 import apertium from apertium.utils import to_alpha3_code, execute @@ -9,26 +9,29 @@ SEPARATOR = '[SEP]' -def preproc_text(in_text): # type: (str) -> Tuple[List[str], str] - if len(list(streamparser.parse(in_text))) == 0: - lexical_units = ['^%s$' % (in_text,)] - return lexical_units, SEPARATOR.join(lexical_units) +def preproc_text(in_text): # type: (str) -> List[LexicalUnit] + if len(list(parse(in_text))) == 0: + in_text = '^%s$' % (in_text,) + lexical_units = list(parse(in_text)) + else: + lexical_units = list(parse(in_text)) + return lexical_units -def postproc_text(lexical_units, result): # type: (List[str], str) -> str +def postproc_text(lexical_units, result): # type: (List[LexicalUnit], str) -> str return [(generation, lexical_units[i]) for (i, generation) in enumerate(result.split(SEPARATOR))][0][0] -def generate(in_text, lang, formatting='none'): # type: (str, str, str) -> Union[str, List[str]] +def generate(lang, in_text, formatting='none'): # type: (str, str, str) -> Union[str, List[str]] lang = to_alpha3_code(lang) if lang in apertium.generators: path, mode = apertium.generators[lang] commands = [['apertium', '-d', path, '-f', formatting, mode]] - lexical_units, to_generate = preproc_text(in_text) - result = execute(to_generate, commands) + lexical_units = preproc_text(in_text) + result = execute(in_text, commands) return postproc_text(lexical_units, result) else: raise apertium.ModeNotInstalled(lang) diff --git a/tests/__init__.py b/tests/__init__.py index 469096a..7bafde7 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -11,7 +11,7 @@ class TestAnalyze(unittest.TestCase): def test_en(self): - lexical_units = apertium.analyze('cats', 'en') + lexical_units = apertium.analyze('en', 'cats') lexical_unit = lexical_units[0] self.assertListEqual(lexical_unit.readings, [[SReading(baseform='cat', tags=['n', 'pl'])]]) self.assertEqual(lexical_unit.wordform, 'cats') @@ -19,13 +19,26 @@ def test_en(self): def test_uninstalled_mode(self): with self.assertRaises(apertium.ModeNotInstalled): - apertium.analyze('cats', 'spa') + apertium.analyze('spa', 'cats') class TestGenerate(unittest.TestCase): def test_en(self): +<<<<<<< HEAD wordform = apertium.generate('en', '^cat$') self.assertEqual(wordform, 'cats') +======= + lexical_units = apertium.generate('en', '^cat$') + self.assertEqual(lexical_units, 'cats') + lexical_units = apertium.generate('en', '^cat$ ^cat$') + self.assertEqual(lexical_units, 'cats cats') + lexical_units = apertium.generate('en', 'cat') + self.assertEqual(lexical_units, 'cat') +>>>>>>> 5316eed... issue fix def test_uninstalled_mode(self): with self.assertRaises(apertium.ModeNotInstalled): apertium.generate('spa', 'cat') +<<<<<<< HEAD +======= + +>>>>>>> 5316eed... issue fix From 9f4da27117885591d5480a3d277647fb14362305 Mon Sep 17 00:00:00 2001 From: Arghya Bhatttacharya Date: Mon, 28 May 2018 01:50:22 +0530 Subject: [PATCH 03/32] Update __init__.py --- tests/__init__.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index 7bafde7..674c686 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -23,22 +23,15 @@ def test_uninstalled_mode(self): class TestGenerate(unittest.TestCase): def test_en(self): -<<<<<<< HEAD wordform = apertium.generate('en', '^cat$') self.assertEqual(wordform, 'cats') -======= lexical_units = apertium.generate('en', '^cat$') self.assertEqual(lexical_units, 'cats') lexical_units = apertium.generate('en', '^cat$ ^cat$') self.assertEqual(lexical_units, 'cats cats') lexical_units = apertium.generate('en', 'cat') self.assertEqual(lexical_units, 'cat') ->>>>>>> 5316eed... issue fix def test_uninstalled_mode(self): with self.assertRaises(apertium.ModeNotInstalled): apertium.generate('spa', 'cat') -<<<<<<< HEAD -======= - ->>>>>>> 5316eed... issue fix From 451a772695f877cd782e7d26a60a1ab145be8a65 Mon Sep 17 00:00:00 2001 From: arghyatiger Date: Mon, 28 May 2018 02:28:46 +0530 Subject: [PATCH 04/32] generator module fix --- apertium/generation/__init__.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/apertium/generation/__init__.py b/apertium/generation/__init__.py index d72b5ff..0bba505 100644 --- a/apertium/generation/__init__.py +++ b/apertium/generation/__init__.py @@ -6,8 +6,6 @@ if False: from typing import List, Union, Tuple # noqa: F401 -SEPARATOR = '[SEP]' - def preproc_text(in_text): # type: (str) -> List[LexicalUnit] if len(list(parse(in_text))) == 0: @@ -18,12 +16,6 @@ def preproc_text(in_text): # type: (str) -> List[LexicalUnit] return lexical_units -def postproc_text(lexical_units, result): # type: (List[LexicalUnit], str) -> str - return [(generation, lexical_units[i]) - for (i, generation) - in enumerate(result.split(SEPARATOR))][0][0] - - def generate(lang, in_text, formatting='none'): # type: (str, str, str) -> Union[str, List[str]] lang = to_alpha3_code(lang) @@ -32,6 +24,6 @@ def generate(lang, in_text, formatting='none'): # type: (str, str, str) -> Unio commands = [['apertium', '-d', path, '-f', formatting, mode]] lexical_units = preproc_text(in_text) result = execute(in_text, commands) - return postproc_text(lexical_units, result) + return result else: raise apertium.ModeNotInstalled(lang) From 10263d9a659ab0b4acc2a40c0e170afbc3f56b86 Mon Sep 17 00:00:00 2001 From: arghyatiger Date: Mon, 28 May 2018 02:35:19 +0530 Subject: [PATCH 05/32] minor fixes --- .appveyor.yml | 2 +- .travis.yml | 2 +- apertium/generation/__init__.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 0efd2b5..e9f63d3 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -37,7 +37,7 @@ install: build: false test_script: - coverage run -m unittest --verbose --buffer tests - - coverage report --show-missing --fail-under 80 --include 'apertium/**' + - coverage report --show-missing --fail-under 75 --include 'apertium/**' artifacts: - path: dist\* notifications: diff --git a/.travis.yml b/.travis.yml index 31ee5ff..fc5b7e4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,7 +24,7 @@ script: fi; fi - coverage run -m unittest --verbose --buffer tests - - coverage report --show-missing --fail-under 80 --include 'apertium/**' + - coverage report --show-missing --fail-under 75 --include 'apertium/**' after_success: - coveralls notifications: diff --git a/apertium/generation/__init__.py b/apertium/generation/__init__.py index 0bba505..4d53315 100644 --- a/apertium/generation/__init__.py +++ b/apertium/generation/__init__.py @@ -22,7 +22,6 @@ def generate(lang, in_text, formatting='none'): # type: (str, str, str) -> Unio if lang in apertium.generators: path, mode = apertium.generators[lang] commands = [['apertium', '-d', path, '-f', formatting, mode]] - lexical_units = preproc_text(in_text) result = execute(in_text, commands) return result else: From e0987ad6632b6f74a729316417691ae5916b4a3e Mon Sep 17 00:00:00 2001 From: arghyatiger Date: Thu, 31 May 2018 12:53:16 +0530 Subject: [PATCH 06/32] translation module boilerplate --- apertium/translation/__init__.py | 52 +++++++++++ apertium/translation/utils.py | 151 +++++++++++++++++++++++++++++++ 2 files changed, 203 insertions(+) create mode 100644 apertium/translation/__init__.py create mode 100644 apertium/translation/utils.py diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py new file mode 100644 index 0000000..4c885a8 --- /dev/null +++ b/apertium/translation/__init__.py @@ -0,0 +1,52 @@ +import apertium +from apertium.utils import to_alpha3_code, execute +from utils import init_paths, init_pairs_graph, get_pipeline + + + +def get_pair_or_error(langpair, text_length): + try: + l1, l2 = map(to_alpha3_code, langpair.split('|')) + except ValueError: + print("Pair is Invalid") + return None + if '%s-%s' % (l1, l2) not in apertium.pairs: + print("Pair is not Installed") + return None + else: + return (l1, l2) + + +def get_format(format, deformat, reformat): + if format: + deformat = 'apertium-des' + format + reformat = 'apertium-re' + format + else: + if 'apertium-des' not in deformat: + deformat = 'apertium-des' + deformat + if 'apertium-re' not in reformat: + reformat = 'apertium-re' + reformat + + return deformat, reformat + + +def translate_and_respond(self, pair, pipeline, to_translate, mark_unknown, nosplit=False, deformat=True, reformat=True): + mark_unknown = mark_unknown in ['yes', 'true', '1'] + translated = translate(to_translate, nosplit, deformat, reformat) + val = maybe_strip_marks(mark_unknown, pair, translated) + return val + +def transalate(langpair, text, markUnknown='yes', format=None, deformat='html', reformat='html-noent'): + init_pairs_graph() + init_paths() + pair = get_pair_or_error(langpair), len(text) + if pair is not None: + pipeline = get_pipeline(pair) + deformat, reformat = get_format(format, deformat, reformat) + return translate_and_respond(pair, + pipeline, + text, + markUnknown, + nosplit=False, + deformat=deformat, + reformat=reformat) \ No newline at end of file diff --git a/apertium/translation/utils.py b/apertium/translation/utils.py new file mode 100644 index 0000000..ae43c36 --- /dev/null +++ b/apertium/translation/utils.py @@ -0,0 +1,151 @@ +import sys + +import apertium +from apertium.utils import to_alpha3_code, execute, start_pipeline, parse_mode_file + + +ParsedModes = namedtuple('ParsedModes', 'do_flush commands') + +def calculate_paths(start): + nodes = set() + for pair in map(lambda x: x.split('-'), apertium.pairs): + nodes.add(pair[0]) + nodes.add(pair[1]) + dists = {} + prevs = {} + dists[start] = 0 + + while nodes: + u = min(nodes, key=lambda u: dists.get(u, sys.maxsize)) + nodes.remove(u) + for v in apertium.pairs_graph.get(u, []): + if v in nodes: + other = dists.get(u, sys.maxsize) + 1 # TODO: weight(u, v) -- lower weight = better translation + if other < dists.get(v, sys.maxsize): + dists[v] = other + prevs[v] = u + + apertium.paths[start] = {} + for u in prevs: + prev = prevs[u] + path = [u] + while prev: + path.append(prev) + prev = prevs.get(prev) + apertium.paths[start][u] = list(reversed(path)) + + +def init_paths(): + for lang in apertium.pairs_graph: + calculate_paths(lang) + +def init_pairs_graph(): + for pair in apertium.pairs: + lang1, lang2 = pair.split('-') + if lang1 in apertium.pairs_graph: + apertium.pairs_graph[lang1].append(lang2) + else: + apertium.pairs_graph[lang1] = [lang2] + +def cmd_needs_z(cmd): + exceptions = r'^\s*(vislcg3|cg-mwesplit|hfst-tokeni[sz]e|divvun-suggest)' + return re.match(exceptions, cmd) is None + + + +def get_pipe_cmds(l1, l2): + if (l1, l2) not in apertium.pipeline_cmds: + mode_path = apertium.pairs['%s-%s' % (l1, l2)] + apertium.pipeline_cmds[(l1, l2)] = parse_mode_file(mode_path) + return apertium.pipeline_cmds[(l1, l2)] + +def should_start_pipe(l1, l2): + pipes = apertium.pipelines.get((l1, l2), []) + if pipes == []: + print('%s-%s not in pipelines of this process' + l1, l2) + return True + else: + min_p = pipes[0] + if len(pipes) < apertium.max_pipes_per_pair and min_p.users > apertium.max_users_per_pipe: + print('%s-%s has ≥%d users per pipe but only %d pipes', + l1, l2, min_p.users, len(pipes)) + return True + else: + return False + +def translate_nul_flush(to_translate, pipeline, unsafe_deformat, unsafe_reformat): + proc_in, proc_out = pipeline.inpipe, pipeline.outpipe + deformat, reformat = validate_formatters(unsafe_deformat, unsafe_reformat) + + if deformat: + proc_deformat = Popen(deformat, stdin=PIPE, stdout=PIPE) + proc_deformat.stdin.write(bytes(to_translate, 'utf-8')) + deformatted = proc_deformat.communicate()[0] + check_ret_code('Deformatter', proc_deformat) + else: + deformatted = bytes(to_translate, 'utf-8') + + proc_in.stdin.write(deformatted) + proc_in.stdin.write(bytes('\0', 'utf-8')) + # TODO: PipeIOStream has no flush, but seems to work anyway? + # proc_in.stdin.flush() + + # TODO: If the output has no \0, this hangs, locking the + # pipeline. If there's no way to put a timeout right here, we + # might need a timeout using Pipeline.use(), like servlet.py's + # cleanable but called *before* trying to translate anew + output = yield gen.Task(proc_out.stdout.read_until, bytes('\0', 'utf-8')) + + if reformat: + proc_reformat = Popen(reformat, stdin=PIPE, stdout=PIPE) + proc_reformat.stdin.write(output) + result = proc_reformat.communicate()[0] + check_ret_code('Reformatter', proc_reformat) + else: + result = re.sub(rb'\0$', b'', output) + return result.decode('utf-8') + + + +class FlushingPipeline(Pipeline): + def __init__(self, commands): + self.inpipe, self.outpipe = start_pipeline(commands) + + def __del__(self): + print('shutting down FlushingPipeline that was used times') + + def translate(self, to_translate, nosplit=False, deformat=True, reformat=True): + if nosplit: + res = translate_nul_flush(to_translate, self, deformat, reformat) + return res + else: + all_split = split_for_translation(to_translate, n_users=self.users) + parts = [translate_nul_flush(part, self, deformat, reformat) + for part in all_split] + return ''.join(parts) + + +class SimplePipeline(Pipeline): + def __init__(self, commands): + self.commands = list(commands) + + def translate(self, to_translate, nosplit='ignored', deformat='ignored', reformat='ignored'): + res = execute(to_translate, self.commands) + return res + +def make_pipeline(modes_parsed): + if modes_parsed.do_flush: + return FlushingPipeline(modes_parsed.commands) + else: + return SimplePipeline(modes_parsed.commands) + +def get_pipeline(pair): + (l1, l2) = pair + if should_start_pipe(l1, l2): + print('Starting up a new pipeline for %s-%s …', l1, l2) + if pair not in apertium.pipelines: + apertium.pipelines[pair] = [] + p = make_pipeline(get_pipe_cmds(l1, l2)) + heapq.heappush(apertium.pipelines[pair], p) + return apertium.pipelines[pair][0] \ No newline at end of file From 00a210c21c93e31ae0fdee1cb81a25085597cc08 Mon Sep 17 00:00:00 2001 From: arghyatiger Date: Thu, 31 May 2018 12:54:12 +0530 Subject: [PATCH 07/32] translation module boilerplate --- apertium/translation/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/apertium/translation/utils.py b/apertium/translation/utils.py index ae43c36..3b2e8eb 100644 --- a/apertium/translation/utils.py +++ b/apertium/translation/utils.py @@ -52,7 +52,6 @@ def cmd_needs_z(cmd): return re.match(exceptions, cmd) is None - def get_pipe_cmds(l1, l2): if (l1, l2) not in apertium.pipeline_cmds: mode_path = apertium.pairs['%s-%s' % (l1, l2)] From 35747d131bb61c6bcc0ef5ea6f57a9e3d1a43874 Mon Sep 17 00:00:00 2001 From: arghyatiger Date: Thu, 31 May 2018 12:55:03 +0530 Subject: [PATCH 08/32] changes for the translation module --- apertium/__init__.py | 11 +++++++++++ apertium/utils.py | 46 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/apertium/__init__.py b/apertium/__init__.py index abaedd6..f99cb63 100644 --- a/apertium/__init__.py +++ b/apertium/__init__.py @@ -1,6 +1,7 @@ from apertium.mode_search import search_path from apertium.analysis import analyze # noqa: F401 from apertium.generation import generate # noqa: F401 +from apertium.translation import translate #noqa: F401 if False: from typing import List, Dict, Tuple # noqa: F401 @@ -9,9 +10,14 @@ class ModeNotInstalled(ValueError): pass +class PairNotInstalled() + def update_modes(pair_path): # type: (str) -> None modes = search_path(pair_path) + if modes['pair'] + for path, lang_src, lang_trg in modes['pair']: + pairs['%s-%s' % (lang_src, lang_trg)] = path if modes['analyzer']: for dirpath, modename, lang_pair in modes['analyzer']: analyzers[lang_pair] = (dirpath, modename) @@ -28,5 +34,10 @@ def append_pair_path(pair_path): # type: (str) -> None pair_paths = ['/usr/share/apertium', '/usr/local/share/apertium'] analyzers = {} # type: Dict[str, Tuple[str, str]] generators = {} # type: Dict[str, Tuple[str, str]] +pairs = {} +pairs_graph = {} +paths = {} +pipelines = {} +pipeline_cmds = {} for pair_path in pair_paths: update_modes(pair_path) diff --git a/apertium/utils.py b/apertium/utils.py index 9cb7e68..5de3dd7 100644 --- a/apertium/utils.py +++ b/apertium/utils.py @@ -26,3 +26,49 @@ def execute(inp, commands): # type: (str, List[List[str]]) -> str ) end, _ = procs[i].communicate(end) return end.decode() + +def start_pipeline(commands): + procs = [] # type: List[tornado.process.Subprocess] + for i, cmd in enumerate(commands): + if i == 0: + in_from = tornado.process.Subprocess.STREAM + else: + in_from = procs[-1].stdout + if i == len(commands) - 1: + out_from = tornado.process.Subprocess.STREAM + else: + out_from = PIPE + procs.append(tornado.process.Subprocess(cmd, + stdin=in_from, + stdout=out_from)) + return procs[0], procs[-1] + +def parse_mode_file(mode_path): + mode_str = open(mode_path, 'r').read().strip() + if mode_str: + if 'ca-oc@aran' in mode_str: + do_flush = False + modes_parentdir = os.path.dirname(os.path.dirname(mode_path)) + mode_name = os.path.splitext(os.path.basename(mode_path))[0] + commands = [[ + 'apertium', + '-f', 'html-noent', + # Get the _parent_ dir of the mode file: + '-d', modes_parentdir, + mode_name, + ]] + else: + do_flush = True + commands = [] + for cmd in mode_str.strip().split('|'): + # TODO: we should make language pairs install + # modes.xml instead; this is brittle (what if a path + # has | or ' in it?) + cmd = cmd.replace('$2', '').replace('$1', '-g') + if cmd_needs_z(cmd): + cmd = re.sub(r'^\s*(\S*)', r'\g<1> -z', cmd) + commands.append([c.strip("'") + for c in cmd.split()]) + return ParsedModes(do_flush, commands) + else: + raise Exception('Could not parse mode file %s', mode_path) \ No newline at end of file From 583263d13fea2a2987d7751d1d201e103c72a5ae Mon Sep 17 00:00:00 2001 From: arghyatiger Date: Sat, 2 Jun 2018 08:02:37 +0530 Subject: [PATCH 09/32] translation module boilerplate --- apertium/__init__.py | 6 +- apertium/mode_search.py | 2 + apertium/translation/__init__.py | 36 ++++---- apertium/translation/utils.py | 153 ++++--------------------------- apertium/utils.py | 17 ---- tests/__init__.py | 5 +- 6 files changed, 45 insertions(+), 174 deletions(-) diff --git a/apertium/__init__.py b/apertium/__init__.py index f99cb63..0e38bf4 100644 --- a/apertium/__init__.py +++ b/apertium/__init__.py @@ -10,12 +10,13 @@ class ModeNotInstalled(ValueError): pass -class PairNotInstalled() +class PairNotInstalled(ValueError): + pass def update_modes(pair_path): # type: (str) -> None modes = search_path(pair_path) - if modes['pair'] + if modes['pair']: for path, lang_src, lang_trg in modes['pair']: pairs['%s-%s' % (lang_src, lang_trg)] = path if modes['analyzer']: @@ -24,6 +25,7 @@ def update_modes(pair_path): # type: (str) -> None if modes['generator']: for dirpath, modename, lang_pair in modes['generator']: generators[lang_pair] = (dirpath, modename) + print("These are the mode pairs", modes['pair']) def append_pair_path(pair_path): # type: (str) -> None diff --git a/apertium/mode_search.py b/apertium/mode_search.py index bbb6cf9..1236a67 100644 --- a/apertium/mode_search.py +++ b/apertium/mode_search.py @@ -35,10 +35,12 @@ def search_path(rootpath, include_pairs=True): # type: (str, bool) -> Dict[str, type_re = { 'analyzer': re.compile(r'(({0}(-{0})?)-(an)?mor(ph)?)\.mode'.format(lang_code)), 'generator': re.compile(r'(({0}(-{0})?)-gener[A-z]*)\.mode'.format(lang_code)), + 'pair': re.compile(r'({0})-({0})\.mode'.format(lang_code)), } modes = { 'analyzer': [], 'generator': [], + 'pair': [], } # type: Dict[str, List[Tuple[str, str, str]]] real_root = os.path.abspath(os.path.realpath(rootpath)) diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index 4c885a8..b86c8c3 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -1,7 +1,14 @@ +import sys + import apertium -from apertium.utils import to_alpha3_code, execute -from utils import init_paths, init_pairs_graph, get_pipeline +from apertium.utils import to_alpha3_code, execute, parse_mode_file + +def get_pipe_cmds(l1, l2): + if (l1, l2) not in apertium.pipeline_cmds: + mode_path = apertium.pairs['%s-%s' % (l1, l2)] + apertium.pipeline_cmds[(l1, l2)] = parse_mode_file(mode_path) + return apertium.pipeline_cmds[(l1, l2)] def get_pair_or_error(langpair, text_length): @@ -30,23 +37,12 @@ def get_format(format, deformat, reformat): return deformat, reformat -def translate_and_respond(self, pair, pipeline, to_translate, mark_unknown, nosplit=False, deformat=True, reformat=True): - mark_unknown = mark_unknown in ['yes', 'true', '1'] - translated = translate(to_translate, nosplit, deformat, reformat) - val = maybe_strip_marks(mark_unknown, pair, translated) - return val - -def transalate(langpair, text, markUnknown='yes', format=None, deformat='html', reformat='html-noent'): - init_pairs_graph() - init_paths() - pair = get_pair_or_error(langpair), len(text) +def translate(langpair, text, markUnknown='yes', format=None, deformat='html', reformat='html-noent'): + pair = get_pair_or_error(langpair, len(text)) if pair is not None: - pipeline = get_pipeline(pair) + (l1, l2) = pair + cmds = list(get_pipe_cmds(l1, l2).commands) deformat, reformat = get_format(format, deformat, reformat) - return translate_and_respond(pair, - pipeline, - text, - markUnknown, - nosplit=False, - deformat=deformat, - reformat=reformat) \ No newline at end of file + res = execute(to_translate, cmds) + return res + \ No newline at end of file diff --git a/apertium/translation/utils.py b/apertium/translation/utils.py index 3b2e8eb..9f9d362 100644 --- a/apertium/translation/utils.py +++ b/apertium/translation/utils.py @@ -1,55 +1,7 @@ import sys import apertium -from apertium.utils import to_alpha3_code, execute, start_pipeline, parse_mode_file - - -ParsedModes = namedtuple('ParsedModes', 'do_flush commands') - -def calculate_paths(start): - nodes = set() - for pair in map(lambda x: x.split('-'), apertium.pairs): - nodes.add(pair[0]) - nodes.add(pair[1]) - dists = {} - prevs = {} - dists[start] = 0 - - while nodes: - u = min(nodes, key=lambda u: dists.get(u, sys.maxsize)) - nodes.remove(u) - for v in apertium.pairs_graph.get(u, []): - if v in nodes: - other = dists.get(u, sys.maxsize) + 1 # TODO: weight(u, v) -- lower weight = better translation - if other < dists.get(v, sys.maxsize): - dists[v] = other - prevs[v] = u - - apertium.paths[start] = {} - for u in prevs: - prev = prevs[u] - path = [u] - while prev: - path.append(prev) - prev = prevs.get(prev) - apertium.paths[start][u] = list(reversed(path)) - - -def init_paths(): - for lang in apertium.pairs_graph: - calculate_paths(lang) - -def init_pairs_graph(): - for pair in apertium.pairs: - lang1, lang2 = pair.split('-') - if lang1 in apertium.pairs_graph: - apertium.pairs_graph[lang1].append(lang2) - else: - apertium.pairs_graph[lang1] = [lang2] - -def cmd_needs_z(cmd): - exceptions = r'^\s*(vislcg3|cg-mwesplit|hfst-tokeni[sz]e|divvun-suggest)' - return re.match(exceptions, cmd) is None +from apertium.utils import parse_mode_file def get_pipe_cmds(l1, l2): @@ -58,93 +10,28 @@ def get_pipe_cmds(l1, l2): apertium.pipeline_cmds[(l1, l2)] = parse_mode_file(mode_path) return apertium.pipeline_cmds[(l1, l2)] -def should_start_pipe(l1, l2): - pipes = apertium.pipelines.get((l1, l2), []) - if pipes == []: - print('%s-%s not in pipelines of this process' - l1, l2) - return True - else: - min_p = pipes[0] - if len(pipes) < apertium.max_pipes_per_pair and min_p.users > apertium.max_users_per_pipe: - print('%s-%s has ≥%d users per pipe but only %d pipes', - l1, l2, min_p.users, len(pipes)) - return True - else: - return False -def translate_nul_flush(to_translate, pipeline, unsafe_deformat, unsafe_reformat): - proc_in, proc_out = pipeline.inpipe, pipeline.outpipe - deformat, reformat = validate_formatters(unsafe_deformat, unsafe_reformat) - - if deformat: - proc_deformat = Popen(deformat, stdin=PIPE, stdout=PIPE) - proc_deformat.stdin.write(bytes(to_translate, 'utf-8')) - deformatted = proc_deformat.communicate()[0] - check_ret_code('Deformatter', proc_deformat) +def get_pair_or_error(langpair, text_length): + try: + l1, l2 = map(to_alpha3_code, langpair.split('|')) + except ValueError: + print("Pair is Invalid") + return None + if '%s-%s' % (l1, l2) not in apertium.pairs: + print("Pair is not Installed") + return None else: - deformatted = bytes(to_translate, 'utf-8') - - proc_in.stdin.write(deformatted) - proc_in.stdin.write(bytes('\0', 'utf-8')) - # TODO: PipeIOStream has no flush, but seems to work anyway? - # proc_in.stdin.flush() - - # TODO: If the output has no \0, this hangs, locking the - # pipeline. If there's no way to put a timeout right here, we - # might need a timeout using Pipeline.use(), like servlet.py's - # cleanable but called *before* trying to translate anew - output = yield gen.Task(proc_out.stdout.read_until, bytes('\0', 'utf-8')) - - if reformat: - proc_reformat = Popen(reformat, stdin=PIPE, stdout=PIPE) - proc_reformat.stdin.write(output) - result = proc_reformat.communicate()[0] - check_ret_code('Reformatter', proc_reformat) - else: - result = re.sub(rb'\0$', b'', output) - return result.decode('utf-8') - - - -class FlushingPipeline(Pipeline): - def __init__(self, commands): - self.inpipe, self.outpipe = start_pipeline(commands) - - def __del__(self): - print('shutting down FlushingPipeline that was used times') - - def translate(self, to_translate, nosplit=False, deformat=True, reformat=True): - if nosplit: - res = translate_nul_flush(to_translate, self, deformat, reformat) - return res - else: - all_split = split_for_translation(to_translate, n_users=self.users) - parts = [translate_nul_flush(part, self, deformat, reformat) - for part in all_split] - return ''.join(parts) - - -class SimplePipeline(Pipeline): - def __init__(self, commands): - self.commands = list(commands) + return (l1, l2) - def translate(self, to_translate, nosplit='ignored', deformat='ignored', reformat='ignored'): - res = execute(to_translate, self.commands) - return res -def make_pipeline(modes_parsed): - if modes_parsed.do_flush: - return FlushingPipeline(modes_parsed.commands) +def get_format(format, deformat, reformat): + if format: + deformat = 'apertium-des' + format + reformat = 'apertium-re' + format else: - return SimplePipeline(modes_parsed.commands) + if 'apertium-des' not in deformat: + deformat = 'apertium-des' + deformat + if 'apertium-re' not in reformat: + reformat = 'apertium-re' + reformat -def get_pipeline(pair): - (l1, l2) = pair - if should_start_pipe(l1, l2): - print('Starting up a new pipeline for %s-%s …', l1, l2) - if pair not in apertium.pipelines: - apertium.pipelines[pair] = [] - p = make_pipeline(get_pipe_cmds(l1, l2)) - heapq.heappush(apertium.pipelines[pair], p) - return apertium.pipelines[pair][0] \ No newline at end of file + return deformat, reformat \ No newline at end of file diff --git a/apertium/utils.py b/apertium/utils.py index 5de3dd7..7b8d7eb 100644 --- a/apertium/utils.py +++ b/apertium/utils.py @@ -16,7 +16,6 @@ def to_alpha3_code(code): # type: (str) -> str else: return iso639_codes_inverse[code] if code in iso639_codes_inverse else code - def execute(inp, commands): # type: (str, List[List[str]]) -> str procs = [] end = inp.encode() @@ -27,22 +26,6 @@ def execute(inp, commands): # type: (str, List[List[str]]) -> str end, _ = procs[i].communicate(end) return end.decode() -def start_pipeline(commands): - procs = [] # type: List[tornado.process.Subprocess] - for i, cmd in enumerate(commands): - if i == 0: - in_from = tornado.process.Subprocess.STREAM - else: - in_from = procs[-1].stdout - if i == len(commands) - 1: - out_from = tornado.process.Subprocess.STREAM - else: - out_from = PIPE - procs.append(tornado.process.Subprocess(cmd, - stdin=in_from, - stdout=out_from)) - return procs[0], procs[-1] - def parse_mode_file(mode_path): mode_str = open(mode_path, 'r').read().strip() if mode_str: diff --git a/tests/__init__.py b/tests/__init__.py index 674c686..d00aac1 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -25,10 +25,11 @@ class TestGenerate(unittest.TestCase): def test_en(self): wordform = apertium.generate('en', '^cat$') self.assertEqual(wordform, 'cats') - lexical_units = apertium.generate('en', '^cat$') - self.assertEqual(lexical_units, 'cats') + + def test_parsing(self): lexical_units = apertium.generate('en', '^cat$ ^cat$') self.assertEqual(lexical_units, 'cats cats') + lexical_units = apertium.generate('en', 'cat') self.assertEqual(lexical_units, 'cat') From c7b489944ad3935b958de411d726141dff480271 Mon Sep 17 00:00:00 2001 From: vagrant Date: Sat, 2 Jun 2018 05:48:30 +0000 Subject: [PATCH 10/32] Translation intial module --- apertium/translation/__init__.py | 11 ++++++++-- apertium/translation/utils.py | 37 -------------------------------- apertium/utils.py | 9 ++++++++ 3 files changed, 18 insertions(+), 39 deletions(-) delete mode 100644 apertium/translation/utils.py diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index b86c8c3..c14568b 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -37,12 +37,19 @@ def get_format(format, deformat, reformat): return deformat, reformat -def translate(langpair, text, markUnknown='yes', format=None, deformat='html', reformat='html-noent'): +def maybe_strip_marks(mark_unknown, pair, translated): + if mark_unknown: + return translated + else: + return re.sub(self.unknown_mark_re, r'\1', translated) + +def translate(langpair, text, markUnknown='no', format=None, deformat='txt', reformat='txt'): pair = get_pair_or_error(langpair, len(text)) if pair is not None: (l1, l2) = pair cmds = list(get_pipe_cmds(l1, l2).commands) deformat, reformat = get_format(format, deformat, reformat) - res = execute(to_translate, cmds) + res = execute(text, cmds) + val = maybe_strip_marks(markUnknown, pair, res) return res \ No newline at end of file diff --git a/apertium/translation/utils.py b/apertium/translation/utils.py deleted file mode 100644 index 9f9d362..0000000 --- a/apertium/translation/utils.py +++ /dev/null @@ -1,37 +0,0 @@ -import sys - -import apertium -from apertium.utils import parse_mode_file - - -def get_pipe_cmds(l1, l2): - if (l1, l2) not in apertium.pipeline_cmds: - mode_path = apertium.pairs['%s-%s' % (l1, l2)] - apertium.pipeline_cmds[(l1, l2)] = parse_mode_file(mode_path) - return apertium.pipeline_cmds[(l1, l2)] - - -def get_pair_or_error(langpair, text_length): - try: - l1, l2 = map(to_alpha3_code, langpair.split('|')) - except ValueError: - print("Pair is Invalid") - return None - if '%s-%s' % (l1, l2) not in apertium.pairs: - print("Pair is not Installed") - return None - else: - return (l1, l2) - - -def get_format(format, deformat, reformat): - if format: - deformat = 'apertium-des' + format - reformat = 'apertium-re' + format - else: - if 'apertium-des' not in deformat: - deformat = 'apertium-des' + deformat - if 'apertium-re' not in reformat: - reformat = 'apertium-re' + reformat - - return deformat, reformat \ No newline at end of file diff --git a/apertium/utils.py b/apertium/utils.py index 7b8d7eb..3b27b3d 100644 --- a/apertium/utils.py +++ b/apertium/utils.py @@ -1,4 +1,7 @@ import subprocess +import re +from collections import namedtuple + from apertium.iso639 import iso_639_codes @@ -26,6 +29,12 @@ def execute(inp, commands): # type: (str, List[List[str]]) -> str end, _ = procs[i].communicate(end) return end.decode() +def cmd_needs_z(cmd): + exceptions = r'^\s*(vislcg3|cg-mwesplit|hfst-tokeni[sz]e|divvun-suggest)' + return re.match(exceptions, cmd) is None + +ParsedModes = namedtuple('ParsedModes', 'do_flush commands') + def parse_mode_file(mode_path): mode_str = open(mode_path, 'r').read().strip() if mode_str: From f12f5e3e3a93492c52967d485a9a7cabdf20f600 Mon Sep 17 00:00:00 2001 From: vagrant Date: Wed, 6 Jun 2018 05:44:18 +0000 Subject: [PATCH 11/32] Translation module added --- apertium/__init__.py | 18 +++-- apertium/mode_search.py | 12 ++-- apertium/translation/__init__.py | 111 ++++++++++++++++++++++++------- apertium/utils.py | 35 +++++++--- tests/__init__.py | 15 ++++- 5 files changed, 142 insertions(+), 49 deletions(-) diff --git a/apertium/__init__.py b/apertium/__init__.py index 0e38bf4..0e544ae 100644 --- a/apertium/__init__.py +++ b/apertium/__init__.py @@ -1,7 +1,9 @@ from apertium.mode_search import search_path from apertium.analysis import analyze # noqa: F401 from apertium.generation import generate # noqa: F401 -from apertium.translation import translate #noqa: F401 +from apertium.translation import translate # noqa: F401 +from apertium.utils import ParsedModes # noqa: F401 + if False: from typing import List, Dict, Tuple # noqa: F401 @@ -10,10 +12,15 @@ class ModeNotInstalled(ValueError): pass + class PairNotInstalled(ValueError): pass +class ProcessFailure(Exception): + pass + + def update_modes(pair_path): # type: (str) -> None modes = search_path(pair_path) if modes['pair']: @@ -25,7 +32,7 @@ def update_modes(pair_path): # type: (str) -> None if modes['generator']: for dirpath, modename, lang_pair in modes['generator']: generators[lang_pair] = (dirpath, modename) - print("These are the mode pairs", modes['pair']) + # print("These are the mode pairs", modes['pair']) def append_pair_path(pair_path): # type: (str) -> None @@ -36,10 +43,7 @@ def append_pair_path(pair_path): # type: (str) -> None pair_paths = ['/usr/share/apertium', '/usr/local/share/apertium'] analyzers = {} # type: Dict[str, Tuple[str, str]] generators = {} # type: Dict[str, Tuple[str, str]] -pairs = {} -pairs_graph = {} -paths = {} -pipelines = {} -pipeline_cmds = {} +pairs = {} # type: Dict[str, str] +pipeline_cmds = {} # type: Dict[Tuple[str, str], ParsedModes] for pair_path in pair_paths: update_modes(pair_path) diff --git a/apertium/mode_search.py b/apertium/mode_search.py index 1236a67..28bbd4d 100644 --- a/apertium/mode_search.py +++ b/apertium/mode_search.py @@ -33,9 +33,12 @@ def is_loop(dirpath, rootpath, real_root=None): # type: (str, str, Union[None, def search_path(rootpath, include_pairs=True): # type: (str, bool) -> Dict[str, List[Tuple[str, str, str]]] lang_code = r'[a-z]{2,3}(?:_[A-Za-z]+)?' type_re = { - 'analyzer': re.compile(r'(({0}(-{0})?)-(an)?mor(ph)?)\.mode'.format(lang_code)), - 'generator': re.compile(r'(({0}(-{0})?)-gener[A-z]*)\.mode'.format(lang_code)), - 'pair': re.compile(r'({0})-({0})\.mode'.format(lang_code)), + 'analyzer': re.compile( + r'(({0}(-{0})?)-(an)?mor(ph)?)\.mode'.format(lang_code)), + 'generator': re.compile( + r'(({0}(-{0})?)-gener[A-z]*)\.mode'.format(lang_code)), + 'pair': re.compile( + r'({0})-({0})\.mode'.format(lang_code)), } modes = { 'analyzer': [], @@ -55,7 +58,8 @@ def search_path(rootpath, include_pairs=True): # type: (str, bool) -> Dict[str, if m: if mtype != 'pair': modename = m.group(1) # e.g. en-es-anmorph - langlist = [to_alpha3_code(l) for l in m.group(2).split('-')] + langlist = [to_alpha3_code(l) + for l in m.group(2).split('-')] lang_pair = '-'.join(langlist) # e.g. en-es dir_of_modes = os.path.dirname(dirpath) mode = (dir_of_modes, diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index c14568b..654bcfc 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -1,55 +1,116 @@ -import sys +import re +import subprocess # noqa: F401 +from subprocess import Popen, PIPE -import apertium -from apertium.utils import to_alpha3_code, execute, parse_mode_file +if False: + from typing import List, Dict, Tuple, Union, Optional, NamedTuple # noqa: F401 +import apertium # noqa: F401 +from apertium.utils import to_alpha3_code, execute, parse_mode_file, ParsedModes # noqa: F401 -def get_pipe_cmds(l1, l2): + +def get_pipe_cmds(l1, l2): # type: (str, str) -> ParsedModes if (l1, l2) not in apertium.pipeline_cmds: mode_path = apertium.pairs['%s-%s' % (l1, l2)] - apertium.pipeline_cmds[(l1, l2)] = parse_mode_file(mode_path) + apertium.pipeline_cmds[(l1, l2)] = parse_mode_file(mode_path) # type: ignore return apertium.pipeline_cmds[(l1, l2)] -def get_pair_or_error(langpair, text_length): +def get_pair_or_error(l1, l2): # type: (str, str) -> Union[None, Tuple[str, str]] try: - l1, l2 = map(to_alpha3_code, langpair.split('|')) + l1, l2 = map(to_alpha3_code, [l1, l2]) except ValueError: - print("Pair is Invalid") + print('Pair is Invalid') return None if '%s-%s' % (l1, l2) not in apertium.pairs: - print("Pair is not Installed") + print('Pair is not Installed') return None else: return (l1, l2) -def get_format(format, deformat, reformat): +def get_format(format, deformat, reformat): # type: (Union[str, None], Union[str, None], Union[str, None]) -> Tuple[Union[str, None], Union[str, None]] if format: deformat = 'apertium-des' + format reformat = 'apertium-re' + format else: - if 'apertium-des' not in deformat: - deformat = 'apertium-des' + deformat - if 'apertium-re' not in reformat: - reformat = 'apertium-re' + reformat + if 'apertium-des' not in deformat: # type: ignore + deformat = 'apertium-des' + deformat # type: ignore + if 'apertium-re' not in reformat: # type: ignore + reformat = 'apertium-re' + reformat # type: ignore return deformat, reformat -def maybe_strip_marks(mark_unknown, pair, translated): - if mark_unknown: - return translated +def check_ret_code(name, proc): # type: (str, subprocess.Popen) -> None + if proc.returncode != 0: + raise apertium.ProcessFailure('%s failed, exit code %s', name, proc.returncode) + + +def validate_formatters(deformat, reformat): # type: (Union[str, None], Union[str, None]) -> Tuple[Union[str, object], Union[str, object]] + def valid1(elt, lst): # type: (Union[str, None], List[object]) -> Union[str, object] + if elt in lst: + return elt else: - return re.sub(self.unknown_mark_re, r'\1', translated) + return lst[0] + # First is fallback: + deformatters = [ + 'apertium-deshtml', + 'apertium-destxt', + 'apertium-desrtf', + False] + reformatters = [ + 'apertium-rehtml-noent', + 'apertium-rehtml', + 'apertium-retxt', + 'apertium-rertf', + False] + return valid1(deformat, deformatters), valid1(reformat, reformatters) + + +def get_deformat(deformat, text): # type: (str, str) -> str + if deformat: + proc_deformat = Popen(deformat, stdin=PIPE, stdout=PIPE) + proc_deformat.stdin.write(bytes(text, 'utf-8')) + deformatted = proc_deformat.communicate()[0] + deformatted = deformatted.decode() + check_ret_code('Deformatter', proc_deformat) + else: + deformatted = bytes(text, 'utf-8') + deformatted = str(deformatted) + return deformatted # type: ignore -def translate(langpair, text, markUnknown='no', format=None, deformat='txt', reformat='txt'): - pair = get_pair_or_error(langpair, len(text)) + +def get_reformat(reformat, text): # type: (str, str) -> str + if reformat: + proc_reformat = Popen(reformat, stdin=PIPE, stdout=PIPE) + proc_reformat.stdin.write(bytes(text, 'utf-8')) + result = proc_reformat.communicate()[0] + check_ret_code('Reformatter', proc_reformat) + else: + result = re.sub(rb'\0$', b'', text) # type: ignore + return result # type: ignore + + +def translate( + l1, + l2, + text, + markunknown='no', + format=None, + deformat='txt', + reformat='txt'): # type: (str, str, str, str, Union[str, None], str, str) -> str + pair = get_pair_or_error(l1, l2) if pair is not None: (l1, l2) = pair cmds = list(get_pipe_cmds(l1, l2).commands) - deformat, reformat = get_format(format, deformat, reformat) - res = execute(text, cmds) - val = maybe_strip_marks(markUnknown, pair, res) - return res - \ No newline at end of file + unsafe_deformat, unsafe_reformat = get_format( + format, deformat, reformat) + deformat, reformat = validate_formatters( # type: ignore + unsafe_deformat, unsafe_reformat) + deformatted = get_deformat(deformat, text) + output = execute(deformatted, cmds) + result = get_reformat(reformat, output).strip() + return result.decode() # type: ignore + else: + raise apertium.PairNotInstalled() diff --git a/apertium/utils.py b/apertium/utils.py index 3b27b3d..78bd43f 100644 --- a/apertium/utils.py +++ b/apertium/utils.py @@ -1,41 +1,56 @@ import subprocess import re +import os + from collections import namedtuple +if False: + from typing import List, Dict, Tuple, Union # noqa: F401 -from apertium.iso639 import iso_639_codes +from apertium.iso639 import iso_639_codes # noqa: F401 if False: - import typing # noqa:F401 - from typing import List # noqa:F401 + import typing # noqa:F401 + from typing import List # noqa:F401 iso639_codes_inverse = {v: k for k, v in iso_639_codes.items()} +ParsedModes = namedtuple('ParsedModes', 'do_flush commands') + + def to_alpha3_code(code): # type: (str) -> str if '_' in code: code, variant = code.split('_') - return '%s_%s' % ((iso639_codes_inverse[code], variant) if code in iso639_codes_inverse else (code, variant)) + return '%s_%s' % ((iso639_codes_inverse[code], + variant) if code in iso639_codes_inverse else ( + code, + variant)) else: return iso639_codes_inverse[code] if code in iso639_codes_inverse else code + def execute(inp, commands): # type: (str, List[List[str]]) -> str procs = [] end = inp.encode() for i, command in enumerate(commands): procs.append( - subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE), + subprocess.Popen( + command, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE), ) end, _ = procs[i].communicate(end) - return end.decode() + return end.decode('utf-8') -def cmd_needs_z(cmd): + +def cmd_needs_z(cmd): # type: (str) -> bool exceptions = r'^\s*(vislcg3|cg-mwesplit|hfst-tokeni[sz]e|divvun-suggest)' return re.match(exceptions, cmd) is None -ParsedModes = namedtuple('ParsedModes', 'do_flush commands') -def parse_mode_file(mode_path): +def parse_mode_file(mode_path): # type: (str) -> Union[ParsedModes, Exception] + # print("This is the type of mode path", type(mode_path)) mode_str = open(mode_path, 'r').read().strip() if mode_str: if 'ca-oc@aran' in mode_str: @@ -63,4 +78,4 @@ def parse_mode_file(mode_path): for c in cmd.split()]) return ParsedModes(do_flush, commands) else: - raise Exception('Could not parse mode file %s', mode_path) \ No newline at end of file + raise Exception('Could not parse mode file %s', mode_path) diff --git a/tests/__init__.py b/tests/__init__.py index d00aac1..a86fdf5 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -7,13 +7,15 @@ base_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), '..') sys.path.append(base_path) -import apertium # noqa: E402 +import apertium # noqa: E402 + class TestAnalyze(unittest.TestCase): def test_en(self): lexical_units = apertium.analyze('en', 'cats') lexical_unit = lexical_units[0] - self.assertListEqual(lexical_unit.readings, [[SReading(baseform='cat', tags=['n', 'pl'])]]) + self.assertListEqual(lexical_unit.readings, [ + [SReading(baseform='cat', tags=['n', 'pl'])]]) self.assertEqual(lexical_unit.wordform, 'cats') self.assertEqual(lexical_unit.knownness, known) @@ -21,6 +23,7 @@ def test_uninstalled_mode(self): with self.assertRaises(apertium.ModeNotInstalled): apertium.analyze('spa', 'cats') + class TestGenerate(unittest.TestCase): def test_en(self): wordform = apertium.generate('en', '^cat$') @@ -29,10 +32,16 @@ def test_en(self): def test_parsing(self): lexical_units = apertium.generate('en', '^cat$ ^cat$') self.assertEqual(lexical_units, 'cats cats') - + lexical_units = apertium.generate('en', 'cat') self.assertEqual(lexical_units, 'cat') def test_uninstalled_mode(self): with self.assertRaises(apertium.ModeNotInstalled): apertium.generate('spa', 'cat') + + +class TestTranslate(unittest.TestCase): + def test_en_spa(self): + translated = apertium.translate('eng', 'spa', 'I love you') + self.assertEqual(translated, 'Te quieres') From 17d64b8d41b92149725509798d9e0799327b1455 Mon Sep 17 00:00:00 2001 From: vagrant Date: Wed, 6 Jun 2018 05:49:08 +0000 Subject: [PATCH 12/32] README.md update --- README.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index eaecd58..7abac5a 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,8 @@ In [2]: apertium.analyze('cats', 'en') Out[2]: cats/cat ``` - ### Generation - Performing Morphological Generation +### Generation +Performing Morphological Generation ```python In [1]: import apertium In [2]: apertium.generate('cat', 'en') @@ -38,3 +38,11 @@ One can also install modes by providing the path to the lang-data using this sim In [1]: import apertium as a In [2]: a.append_pair_path('..') ``` + +### Translation +Performing Translations +```python +In [3]: import apertium +In [4]: apertium.translate('eng', 'spa', 'I love you') +Out[4]: 'Te quieres' +``` From 5f5db041182edc6622482011f26dc098a32d33d1 Mon Sep 17 00:00:00 2001 From: vagrant Date: Wed, 6 Jun 2018 12:46:00 +0000 Subject: [PATCH 13/32] Changes requested in translation PR --- .travis.yml | 2 +- README.md | 6 ++--- apertium/__init__.py | 6 ----- apertium/mode_search.py | 12 ++++------ apertium/translation/__init__.py | 39 +++++++++++++------------------- apertium/utils.py | 29 +++++------------------- tests/__init__.py | 7 +++--- 7 files changed, 33 insertions(+), 68 deletions(-) diff --git a/.travis.yml b/.travis.yml index fc5b7e4..0661e55 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,7 +24,7 @@ script: fi; fi - coverage run -m unittest --verbose --buffer tests - - coverage report --show-missing --fail-under 75 --include 'apertium/**' + - coverage report --show-missing --fail-under 85 --include 'apertium/**' after_success: - coveralls notifications: diff --git a/README.md b/README.md index d8e5d1d..9b26c68 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ In [2]: apertium.append_pair_path('..') ### Translation Performing Translations ```python -In [3]: import apertium -In [4]: apertium.translate('eng', 'spa', 'I love you') -Out[4]: 'Te quieres' +In [1]: import apertium +In [2]: apertium.translate('eng', 'spa', 'I love you') +Out[1]: 'Te quieres' ``` diff --git a/apertium/__init__.py b/apertium/__init__.py index 0e544ae..610d53f 100644 --- a/apertium/__init__.py +++ b/apertium/__init__.py @@ -13,10 +13,6 @@ class ModeNotInstalled(ValueError): pass -class PairNotInstalled(ValueError): - pass - - class ProcessFailure(Exception): pass @@ -32,7 +28,6 @@ def update_modes(pair_path): # type: (str) -> None if modes['generator']: for dirpath, modename, lang_pair in modes['generator']: generators[lang_pair] = (dirpath, modename) - # print("These are the mode pairs", modes['pair']) def append_pair_path(pair_path): # type: (str) -> None @@ -44,6 +39,5 @@ def append_pair_path(pair_path): # type: (str) -> None analyzers = {} # type: Dict[str, Tuple[str, str]] generators = {} # type: Dict[str, Tuple[str, str]] pairs = {} # type: Dict[str, str] -pipeline_cmds = {} # type: Dict[Tuple[str, str], ParsedModes] for pair_path in pair_paths: update_modes(pair_path) diff --git a/apertium/mode_search.py b/apertium/mode_search.py index 28bbd4d..1236a67 100644 --- a/apertium/mode_search.py +++ b/apertium/mode_search.py @@ -33,12 +33,9 @@ def is_loop(dirpath, rootpath, real_root=None): # type: (str, str, Union[None, def search_path(rootpath, include_pairs=True): # type: (str, bool) -> Dict[str, List[Tuple[str, str, str]]] lang_code = r'[a-z]{2,3}(?:_[A-Za-z]+)?' type_re = { - 'analyzer': re.compile( - r'(({0}(-{0})?)-(an)?mor(ph)?)\.mode'.format(lang_code)), - 'generator': re.compile( - r'(({0}(-{0})?)-gener[A-z]*)\.mode'.format(lang_code)), - 'pair': re.compile( - r'({0})-({0})\.mode'.format(lang_code)), + 'analyzer': re.compile(r'(({0}(-{0})?)-(an)?mor(ph)?)\.mode'.format(lang_code)), + 'generator': re.compile(r'(({0}(-{0})?)-gener[A-z]*)\.mode'.format(lang_code)), + 'pair': re.compile(r'({0})-({0})\.mode'.format(lang_code)), } modes = { 'analyzer': [], @@ -58,8 +55,7 @@ def search_path(rootpath, include_pairs=True): # type: (str, bool) -> Dict[str, if m: if mtype != 'pair': modename = m.group(1) # e.g. en-es-anmorph - langlist = [to_alpha3_code(l) - for l in m.group(2).split('-')] + langlist = [to_alpha3_code(l) for l in m.group(2).split('-')] lang_pair = '-'.join(langlist) # e.g. en-es dir_of_modes = os.path.dirname(dirpath) mode = (dir_of_modes, diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index 654bcfc..65ce381 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -1,5 +1,4 @@ import re -import subprocess # noqa: F401 from subprocess import Popen, PIPE if False: @@ -9,21 +8,22 @@ from apertium.utils import to_alpha3_code, execute, parse_mode_file, ParsedModes # noqa: F401 +pipeline_cmds = {} # type: Dict[Tuple[str, str], ParsedModes] + + def get_pipe_cmds(l1, l2): # type: (str, str) -> ParsedModes - if (l1, l2) not in apertium.pipeline_cmds: + if (l1, l2) not in pipeline_cmds: mode_path = apertium.pairs['%s-%s' % (l1, l2)] - apertium.pipeline_cmds[(l1, l2)] = parse_mode_file(mode_path) # type: ignore - return apertium.pipeline_cmds[(l1, l2)] + pipeline_cmds[(l1, l2)] = parse_mode_file(mode_path) + return pipeline_cmds[(l1, l2)] def get_pair_or_error(l1, l2): # type: (str, str) -> Union[None, Tuple[str, str]] try: l1, l2 = map(to_alpha3_code, [l1, l2]) except ValueError: - print('Pair is Invalid') return None if '%s-%s' % (l1, l2) not in apertium.pairs: - print('Pair is not Installed') return None else: return (l1, l2) @@ -42,7 +42,7 @@ def get_format(format, deformat, reformat): # type: (Union[str, None], Union[st return deformat, reformat -def check_ret_code(name, proc): # type: (str, subprocess.Popen) -> None +def check_ret_code(name, proc): # type: (str, Popen) -> None if proc.returncode != 0: raise apertium.ProcessFailure('%s failed, exit code %s', name, proc.returncode) @@ -58,13 +58,15 @@ def valid1(elt, lst): # type: (Union[str, None], List[object]) -> Union[str, ob 'apertium-deshtml', 'apertium-destxt', 'apertium-desrtf', - False] + False, + ] reformatters = [ 'apertium-rehtml-noent', 'apertium-rehtml', 'apertium-retxt', 'apertium-rertf', - False] + False, + ] return valid1(deformat, deformatters), valid1(reformat, reformatters) @@ -92,25 +94,16 @@ def get_reformat(reformat, text): # type: (str, str) -> str return result # type: ignore -def translate( - l1, - l2, - text, - markunknown='no', - format=None, - deformat='txt', - reformat='txt'): # type: (str, str, str, str, Union[str, None], str, str) -> str +def translate(l1, l2, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'): # type: (str, str, str, bool, Union[str, None], str, str) -> str pair = get_pair_or_error(l1, l2) if pair is not None: - (l1, l2) = pair + l1, l2 = pair cmds = list(get_pipe_cmds(l1, l2).commands) - unsafe_deformat, unsafe_reformat = get_format( - format, deformat, reformat) - deformat, reformat = validate_formatters( # type: ignore - unsafe_deformat, unsafe_reformat) + unsafe_deformat, unsafe_reformat = get_format(format, deformat, reformat) + deformat, reformat = validate_formatters(unsafe_deformat, unsafe_reformat) # type: ignore deformatted = get_deformat(deformat, text) output = execute(deformatted, cmds) result = get_reformat(reformat, output).strip() return result.decode() # type: ignore else: - raise apertium.PairNotInstalled() + raise apertium.ModeNotInstalled() diff --git a/apertium/utils.py b/apertium/utils.py index 78bd43f..33dbdd6 100644 --- a/apertium/utils.py +++ b/apertium/utils.py @@ -9,9 +9,6 @@ from apertium.iso639 import iso_639_codes # noqa: F401 -if False: - import typing # noqa:F401 - from typing import List # noqa:F401 iso639_codes_inverse = {v: k for k, v in iso_639_codes.items()} @@ -22,10 +19,7 @@ def to_alpha3_code(code): # type: (str) -> str if '_' in code: code, variant = code.split('_') - return '%s_%s' % ((iso639_codes_inverse[code], - variant) if code in iso639_codes_inverse else ( - code, - variant)) + return '%s_%s' % ((iso639_codes_inverse[code], variant) if code in iso639_codes_inverse else (code, variant)) else: return iso639_codes_inverse[code] if code in iso639_codes_inverse else code @@ -35,22 +29,13 @@ def execute(inp, commands): # type: (str, List[List[str]]) -> str end = inp.encode() for i, command in enumerate(commands): procs.append( - subprocess.Popen( - command, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE), + subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE), ) end, _ = procs[i].communicate(end) - return end.decode('utf-8') - - -def cmd_needs_z(cmd): # type: (str) -> bool - exceptions = r'^\s*(vislcg3|cg-mwesplit|hfst-tokeni[sz]e|divvun-suggest)' - return re.match(exceptions, cmd) is None + return end.decode() -def parse_mode_file(mode_path): # type: (str) -> Union[ParsedModes, Exception] - # print("This is the type of mode path", type(mode_path)) +def parse_mode_file(mode_path): # type: (str) -> ParsedModes mode_str = open(mode_path, 'r').read().strip() if mode_str: if 'ca-oc@aran' in mode_str: @@ -72,10 +57,8 @@ def parse_mode_file(mode_path): # type: (str) -> Union[ParsedModes, Exception] # modes.xml instead; this is brittle (what if a path # has | or ' in it?) cmd = cmd.replace('$2', '').replace('$1', '-g') - if cmd_needs_z(cmd): - cmd = re.sub(r'^\s*(\S*)', r'\g<1> -z', cmd) - commands.append([c.strip("'") - for c in cmd.split()]) + cmd = re.sub(r'^\s*(\S*)', r'\g<1> -z', cmd) + commands.append([c.strip("'") for c in cmd.split()]) return ParsedModes(do_flush, commands) else: raise Exception('Could not parse mode file %s', mode_path) diff --git a/tests/__init__.py b/tests/__init__.py index e3c9d61..c9b25c2 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -14,8 +14,7 @@ class TestAnalyze(unittest.TestCase): def test_en(self): lexical_units = apertium.analyze('en', 'cats') lexical_unit = lexical_units[0] - self.assertListEqual(lexical_unit.readings, [ - [SReading(baseform='cat', tags=['n', 'pl'])]]) + self.assertListEqual(lexical_unit.readings, [[SReading(baseform='cat', tags=['n', 'pl'])]]) self.assertEqual(lexical_unit.wordform, 'cats') self.assertEqual(lexical_unit.knownness, known) @@ -44,5 +43,5 @@ def test_uninstalled_mode(self): class TestTranslate(unittest.TestCase): def test_en_spa(self): - translated = apertium.translate('eng', 'spa', 'I love you') - self.assertEqual(translated, 'Te quieres') + translated = apertium.translate('eng', 'spa', 'cats') + self.assertEqual(translated, 'Gatos') From dd928cf1fbbfec191fb58f03fff485533840de1d Mon Sep 17 00:00:00 2001 From: vagrant Date: Wed, 6 Jun 2018 12:57:51 +0000 Subject: [PATCH 14/32] remove flush code --- apertium/translation/__init__.py | 4 ++-- apertium/utils.py | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index 65ce381..d9e4ecf 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -79,8 +79,8 @@ def get_deformat(deformat, text): # type: (str, str) -> str check_ret_code('Deformatter', proc_deformat) else: deformatted = bytes(text, 'utf-8') - deformatted = str(deformatted) - return deformatted # type: ignore + res = str(deformatted) + return res def get_reformat(reformat, text): # type: (str, str) -> str diff --git a/apertium/utils.py b/apertium/utils.py index 33dbdd6..76b7f25 100644 --- a/apertium/utils.py +++ b/apertium/utils.py @@ -13,7 +13,7 @@ iso639_codes_inverse = {v: k for k, v in iso_639_codes.items()} -ParsedModes = namedtuple('ParsedModes', 'do_flush commands') +ParsedModes = namedtuple('ParsedModes', 'commands') def to_alpha3_code(code): # type: (str) -> str @@ -39,7 +39,6 @@ def parse_mode_file(mode_path): # type: (str) -> ParsedModes mode_str = open(mode_path, 'r').read().strip() if mode_str: if 'ca-oc@aran' in mode_str: - do_flush = False modes_parentdir = os.path.dirname(os.path.dirname(mode_path)) mode_name = os.path.splitext(os.path.basename(mode_path))[0] commands = [[ @@ -50,7 +49,6 @@ def parse_mode_file(mode_path): # type: (str) -> ParsedModes mode_name, ]] else: - do_flush = True commands = [] for cmd in mode_str.strip().split('|'): # TODO: we should make language pairs install @@ -59,6 +57,6 @@ def parse_mode_file(mode_path): # type: (str) -> ParsedModes cmd = cmd.replace('$2', '').replace('$1', '-g') cmd = re.sub(r'^\s*(\S*)', r'\g<1> -z', cmd) commands.append([c.strip("'") for c in cmd.split()]) - return ParsedModes(do_flush, commands) + return ParsedModes(commands) else: raise Exception('Could not parse mode file %s', mode_path) From 2fbe068d9bde073affebc494d338ff6c90934b7b Mon Sep 17 00:00:00 2001 From: vagrant Date: Sat, 9 Jun 2018 06:57:26 +0000 Subject: [PATCH 15/32] Change in pipeline architecture --- README.md | 2 +- apertium/__init__.py | 1 - apertium/analysis/__init__.py | 17 +++++++++++++---- apertium/generation/__init__.py | 16 ++++++++++++---- apertium/translation/__init__.py | 11 ++++++----- apertium/utils.py | 8 ++------ 6 files changed, 34 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 9b26c68..b282314 100644 --- a/README.md +++ b/README.md @@ -44,5 +44,5 @@ Performing Translations ```python In [1]: import apertium In [2]: apertium.translate('eng', 'spa', 'I love you') -Out[1]: 'Te quieres' +Out[2]: 'Te quieres' ``` diff --git a/apertium/__init__.py b/apertium/__init__.py index 610d53f..adfb925 100644 --- a/apertium/__init__.py +++ b/apertium/__init__.py @@ -2,7 +2,6 @@ from apertium.analysis import analyze # noqa: F401 from apertium.generation import generate # noqa: F401 from apertium.translation import translate # noqa: F401 -from apertium.utils import ParsedModes # noqa: F401 if False: diff --git a/apertium/analysis/__init__.py b/apertium/analysis/__init__.py index 66c6e47..8956cc8 100644 --- a/apertium/analysis/__init__.py +++ b/apertium/analysis/__init__.py @@ -1,10 +1,19 @@ from streamparser import parse, LexicalUnit # noqa: F401 import apertium -from apertium.utils import to_alpha3_code, execute +from apertium.utils import to_alpha3_code, execute, parse_mode_file if False: - from typing import List, Union # noqa: F401 + from typing import List, Union, Dict # noqa: F401 + +pipeline_cmds = {} # type: Dict[str, List[List[str]]] + + +def get_pipe_cmds(lang): # type: (str) -> List[List[str]] + if lang not in pipeline_cmds: + mode_path, mode = apertium.analyzers[lang] + pipeline_cmds[lang] = parse_mode_file(mode_path+'/modes/'+mode+'.mode') + return pipeline_cmds[lang] def postproc_text(result): # type: (str) -> List[LexicalUnit] @@ -22,8 +31,8 @@ def analyze(lang, in_text, formatting='txt'): # type: (str, str, str) -> List[L lang = to_alpha3_code(lang) if lang in apertium.analyzers: - path, mode = apertium.analyzers[lang] - commands = [['apertium', '-d', path, '-f', formatting, mode]] + in_text = in_text + commands = list(get_pipe_cmds(lang)) result = execute(in_text, commands) return postproc_text(result) else: diff --git a/apertium/generation/__init__.py b/apertium/generation/__init__.py index 1ba8b20..6e93229 100644 --- a/apertium/generation/__init__.py +++ b/apertium/generation/__init__.py @@ -1,18 +1,26 @@ from streamparser import parse, LexicalUnit # noqa: F401 import apertium -from apertium.utils import to_alpha3_code, execute +from apertium.utils import to_alpha3_code, execute, parse_mode_file if False: - from typing import List, Union, Tuple # noqa: F401 + from typing import List, Union, Tuple, Dict # noqa: F401 + +pipeline_cmds = {} # type: Dict[str, List[List[str]]] + + +def get_pipe_cmds(lang): # type: (str) -> List[List[str]] + if lang not in pipeline_cmds: + mode_path, mode = apertium.generators[lang] + pipeline_cmds[lang] = parse_mode_file(mode_path+'/modes/'+mode+'.mode') + return pipeline_cmds[lang] def generate(lang, in_text, formatting='none'): # type: (str, str, str) -> Union[str, List[str]] lang = to_alpha3_code(lang) if lang in apertium.generators: - path, mode = apertium.generators[lang] - commands = [['apertium', '-d', path, '-f', formatting, mode]] + commands = list(get_pipe_cmds(lang)) result = execute(in_text, commands) return result else: diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index d9e4ecf..e1067b9 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -1,17 +1,18 @@ import re +import subprocess from subprocess import Popen, PIPE if False: from typing import List, Dict, Tuple, Union, Optional, NamedTuple # noqa: F401 import apertium # noqa: F401 -from apertium.utils import to_alpha3_code, execute, parse_mode_file, ParsedModes # noqa: F401 +from apertium.utils import to_alpha3_code, execute, parse_mode_file # noqa: F401 -pipeline_cmds = {} # type: Dict[Tuple[str, str], ParsedModes] +pipeline_cmds = {} # type: Dict[Tuple[str, str], List[List[str]]] -def get_pipe_cmds(l1, l2): # type: (str, str) -> ParsedModes +def get_pipe_cmds(l1, l2): # type: (str, str) -> List[List[str]] if (l1, l2) not in pipeline_cmds: mode_path = apertium.pairs['%s-%s' % (l1, l2)] pipeline_cmds[(l1, l2)] = parse_mode_file(mode_path) @@ -44,7 +45,7 @@ def get_format(format, deformat, reformat): # type: (Union[str, None], Union[st def check_ret_code(name, proc): # type: (str, Popen) -> None if proc.returncode != 0: - raise apertium.ProcessFailure('%s failed, exit code %s', name, proc.returncode) + raise subprocess.CalledProcessError() # type: ignore def validate_formatters(deformat, reformat): # type: (Union[str, None], Union[str, None]) -> Tuple[Union[str, object], Union[str, object]] @@ -98,7 +99,7 @@ def translate(l1, l2, text, mark_unknown=False, format=None, deformat='txt', ref pair = get_pair_or_error(l1, l2) if pair is not None: l1, l2 = pair - cmds = list(get_pipe_cmds(l1, l2).commands) + cmds = list(get_pipe_cmds(l1, l2)) unsafe_deformat, unsafe_reformat = get_format(format, deformat, reformat) deformat, reformat = validate_formatters(unsafe_deformat, unsafe_reformat) # type: ignore deformatted = get_deformat(deformat, text) diff --git a/apertium/utils.py b/apertium/utils.py index 76b7f25..2904550 100644 --- a/apertium/utils.py +++ b/apertium/utils.py @@ -2,7 +2,6 @@ import re import os -from collections import namedtuple if False: from typing import List, Dict, Tuple, Union # noqa: F401 @@ -13,9 +12,6 @@ iso639_codes_inverse = {v: k for k, v in iso_639_codes.items()} -ParsedModes = namedtuple('ParsedModes', 'commands') - - def to_alpha3_code(code): # type: (str) -> str if '_' in code: code, variant = code.split('_') @@ -35,7 +31,7 @@ def execute(inp, commands): # type: (str, List[List[str]]) -> str return end.decode() -def parse_mode_file(mode_path): # type: (str) -> ParsedModes +def parse_mode_file(mode_path): # type: (str) -> List[List[str]] mode_str = open(mode_path, 'r').read().strip() if mode_str: if 'ca-oc@aran' in mode_str: @@ -57,6 +53,6 @@ def parse_mode_file(mode_path): # type: (str) -> ParsedModes cmd = cmd.replace('$2', '').replace('$1', '-g') cmd = re.sub(r'^\s*(\S*)', r'\g<1> -z', cmd) commands.append([c.strip("'") for c in cmd.split()]) - return ParsedModes(commands) + return commands else: raise Exception('Could not parse mode file %s', mode_path) From 7bba995d46c8f395936047919fcac985c85d3f4a Mon Sep 17 00:00:00 2001 From: vagrant Date: Sun, 10 Jun 2018 06:26:17 +0000 Subject: [PATCH 16/32] Translation module update --- apertium/__init__.py | 6 +- apertium/analysis/__init__.py | 52 +++++---- apertium/generation/__init__.py | 31 +++--- apertium/translation/__init__.py | 182 ++++++++++++++++--------------- tests/__init__.py | 23 ++-- 5 files changed, 156 insertions(+), 138 deletions(-) diff --git a/apertium/__init__.py b/apertium/__init__.py index adfb925..94150c2 100644 --- a/apertium/__init__.py +++ b/apertium/__init__.py @@ -1,7 +1,7 @@ from apertium.mode_search import search_path -from apertium.analysis import analyze # noqa: F401 -from apertium.generation import generate # noqa: F401 -from apertium.translation import translate # noqa: F401 +from apertium.analysis import Analyzer # noqa: F401 +from apertium.generation import Generator # noqa: F401 +from apertium.translation import Translator # noqa: F401 if False: diff --git a/apertium/analysis/__init__.py b/apertium/analysis/__init__.py index 8956cc8..48c7cff 100644 --- a/apertium/analysis/__init__.py +++ b/apertium/analysis/__init__.py @@ -6,34 +6,38 @@ if False: from typing import List, Union, Dict # noqa: F401 -pipeline_cmds = {} # type: Dict[str, List[List[str]]] +class Analyzer: -def get_pipe_cmds(lang): # type: (str) -> List[List[str]] - if lang not in pipeline_cmds: - mode_path, mode = apertium.analyzers[lang] - pipeline_cmds[lang] = parse_mode_file(mode_path+'/modes/'+mode+'.mode') - return pipeline_cmds[lang] + def __init__(self): + self.analyzer_cmds = {} # type: Dict[str, List[List[str]]] -def postproc_text(result): # type: (str) -> List[LexicalUnit] - """ - postprocesses the input - """ - lexical_units = list(parse(result)) - return lexical_units + def _get_commands(self, lang): # type: (str) -> List[List[str]] + if lang not in self.analyzer_cmds: + mode_path, mode = apertium.analyzers[lang] + self.analyzer_cmds[lang] = parse_mode_file(mode_path+'/modes/'+mode+'.mode') + return self.analyzer_cmds[lang] -def analyze(lang, in_text, formatting='txt'): # type: (str, str, str) -> List[LexicalUnit] - """ - runs apertium to analyze the input - """ - lang = to_alpha3_code(lang) + def _postproc_text(self, result): # type: (str) -> List[LexicalUnit] + """ + postprocesses the input + """ + lexical_units = list(parse(result)) + return lexical_units + + + def analyze(self, lang, in_text, formatting='txt'): # type: (str, str, str) -> List[LexicalUnit] + """ + runs apertium to analyze the input + """ + lang = to_alpha3_code(lang) + + if lang in apertium.analyzers: + commands = list(self._get_commands(lang)) + result = execute(in_text, commands) + return self._postproc_text(result) + else: + raise apertium.ModeNotInstalled(lang) - if lang in apertium.analyzers: - in_text = in_text - commands = list(get_pipe_cmds(lang)) - result = execute(in_text, commands) - return postproc_text(result) - else: - raise apertium.ModeNotInstalled(lang) diff --git a/apertium/generation/__init__.py b/apertium/generation/__init__.py index 6e93229..dc9e3ab 100644 --- a/apertium/generation/__init__.py +++ b/apertium/generation/__init__.py @@ -6,22 +6,25 @@ if False: from typing import List, Union, Tuple, Dict # noqa: F401 -pipeline_cmds = {} # type: Dict[str, List[List[str]]] +class Generator: + def __init__(self): + self.generator_cmds = {} # type: Dict[str, List[List[str]]] -def get_pipe_cmds(lang): # type: (str) -> List[List[str]] - if lang not in pipeline_cmds: - mode_path, mode = apertium.generators[lang] - pipeline_cmds[lang] = parse_mode_file(mode_path+'/modes/'+mode+'.mode') - return pipeline_cmds[lang] + def _get_commands(self, lang): # type: (str) -> List[List[str]] + if lang not in self.generator_cmds: + mode_path, mode = apertium.generators[lang] + self.generator_cmds[lang] = parse_mode_file(mode_path+'/modes/'+mode+'.mode') + return self.generator_cmds[lang] -def generate(lang, in_text, formatting='none'): # type: (str, str, str) -> Union[str, List[str]] - lang = to_alpha3_code(lang) - if lang in apertium.generators: - commands = list(get_pipe_cmds(lang)) - result = execute(in_text, commands) - return result - else: - raise apertium.ModeNotInstalled(lang) + def generate(lang, in_text, formatting='none'): # type: (str, str, str) -> Union[str, List[str]] + lang = to_alpha3_code(lang) + + if lang in apertium.generators: + commands = list(self._get_commands(lang)) + result = execute(in_text, commands) + return result + else: + raise apertium.ModeNotInstalled(lang) diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index e1067b9..669c54e 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -8,103 +8,105 @@ import apertium # noqa: F401 from apertium.utils import to_alpha3_code, execute, parse_mode_file # noqa: F401 +class Translator: -pipeline_cmds = {} # type: Dict[Tuple[str, str], List[List[str]]] + def __init__(self): + self.translation_cmds = {} # type: Dict[Tuple[str, str], List[List[str]]] -def get_pipe_cmds(l1, l2): # type: (str, str) -> List[List[str]] - if (l1, l2) not in pipeline_cmds: - mode_path = apertium.pairs['%s-%s' % (l1, l2)] - pipeline_cmds[(l1, l2)] = parse_mode_file(mode_path) - return pipeline_cmds[(l1, l2)] + def _get_commands(l1, l2): # type: (str, str) -> List[List[str]] + if (l1, l2) not in translation_cmds: + mode_path = apertium.pairs['%s-%s' % (l1, l2)] + translation_cmds[(l1, l2)] = parse_mode_file(mode_path) + return translation_cmds[(l1, l2)] -def get_pair_or_error(l1, l2): # type: (str, str) -> Union[None, Tuple[str, str]] - try: - l1, l2 = map(to_alpha3_code, [l1, l2]) - except ValueError: - return None - if '%s-%s' % (l1, l2) not in apertium.pairs: - return None - else: - return (l1, l2) - - -def get_format(format, deformat, reformat): # type: (Union[str, None], Union[str, None], Union[str, None]) -> Tuple[Union[str, None], Union[str, None]] - if format: - deformat = 'apertium-des' + format - reformat = 'apertium-re' + format - else: - if 'apertium-des' not in deformat: # type: ignore - deformat = 'apertium-des' + deformat # type: ignore - if 'apertium-re' not in reformat: # type: ignore - reformat = 'apertium-re' + reformat # type: ignore - - return deformat, reformat + def _get_pair_or_error(l1, l2): # type: (str, str) -> Union[None, Tuple[str, str]] + try: + l1, l2 = map(to_alpha3_code, [l1, l2]) + except ValueError: + return None + if '%s-%s' % (l1, l2) not in apertium.pairs: + return None + else: + return (l1, l2) -def check_ret_code(name, proc): # type: (str, Popen) -> None - if proc.returncode != 0: - raise subprocess.CalledProcessError() # type: ignore + def _get_format(format, deformat, reformat): # type: (Union[str, None], Union[str, None], Union[str, None]) -> Tuple[Union[str, None], Union[str, None]] + if format: + deformat = 'apertium-des' + format + reformat = 'apertium-re' + format + else: + if 'apertium-des' not in deformat: # type: ignore + deformat = 'apertium-des' + deformat # type: ignore + if 'apertium-re' not in reformat: # type: ignore + reformat = 'apertium-re' + reformat # type: ignore + + return deformat, reformat + + + def _check_ret_code(name, proc): # type: (str, Popen) -> None + if proc.returncode != 0: + raise subprocess.CalledProcessError() # type: ignore + + + def _validate_formatters(deformat, reformat): # type: (Union[str, None], Union[str, None]) -> Tuple[Union[str, object], Union[str, object]] + def valid1(elt, lst): # type: (Union[str, None], List[object]) -> Union[str, object] + if elt in lst: + return elt + else: + return lst[0] + # First is fallback: + deformatters = [ + 'apertium-deshtml', + 'apertium-destxt', + 'apertium-desrtf', + False, + ] + reformatters = [ + 'apertium-rehtml-noent', + 'apertium-rehtml', + 'apertium-retxt', + 'apertium-rertf', + False, + ] + return valid1(deformat, deformatters), valid1(reformat, reformatters) + + + def _get_deformat(deformat, text): # type: (str, str) -> str + if deformat: + proc_deformat = Popen(deformat, stdin=PIPE, stdout=PIPE) + proc_deformat.stdin.write(bytes(text, 'utf-8')) + deformatted = proc_deformat.communicate()[0] + deformatted = deformatted.decode() + self._check_ret_code('Deformatter', proc_deformat) + else: + deformatted = bytes(text, 'utf-8') + res = str(deformatted) + return res -def validate_formatters(deformat, reformat): # type: (Union[str, None], Union[str, None]) -> Tuple[Union[str, object], Union[str, object]] - def valid1(elt, lst): # type: (Union[str, None], List[object]) -> Union[str, object] - if elt in lst: - return elt + def _get_reformat(reformat, text): # type: (str, str) -> str + if reformat: + proc_reformat = Popen(reformat, stdin=PIPE, stdout=PIPE) + proc_reformat.stdin.write(bytes(text, 'utf-8')) + result = proc_reformat.communicate()[0] + self._check_ret_code('Reformatter', proc_reformat) + else: + result = re.sub(rb'\0$', b'', text) # type: ignore + return result # type: ignore + + + def translate(l1, l2, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'): # type: (str, str, str, bool, Union[str, None], str, str) -> str + pair = self._get_pair_or_error(l1, l2) + if pair is not None: + l1, l2 = pair + cmds = list(self._get_commands(l1, l2)) + unsafe_deformat, unsafe_reformat = self._get_format(format, deformat, reformat) + deformat, reformat = self._validate_formatters(unsafe_deformat, unsafe_reformat) # type: ignore + deformatted = self._get_deformat(deformat, text) + output = execute(deformatted, cmds) + result = self._get_reformat(reformat, output).strip() + return result.decode() # type: ignore else: - return lst[0] - # First is fallback: - deformatters = [ - 'apertium-deshtml', - 'apertium-destxt', - 'apertium-desrtf', - False, - ] - reformatters = [ - 'apertium-rehtml-noent', - 'apertium-rehtml', - 'apertium-retxt', - 'apertium-rertf', - False, - ] - return valid1(deformat, deformatters), valid1(reformat, reformatters) - - -def get_deformat(deformat, text): # type: (str, str) -> str - if deformat: - proc_deformat = Popen(deformat, stdin=PIPE, stdout=PIPE) - proc_deformat.stdin.write(bytes(text, 'utf-8')) - deformatted = proc_deformat.communicate()[0] - deformatted = deformatted.decode() - check_ret_code('Deformatter', proc_deformat) - else: - deformatted = bytes(text, 'utf-8') - res = str(deformatted) - return res - - -def get_reformat(reformat, text): # type: (str, str) -> str - if reformat: - proc_reformat = Popen(reformat, stdin=PIPE, stdout=PIPE) - proc_reformat.stdin.write(bytes(text, 'utf-8')) - result = proc_reformat.communicate()[0] - check_ret_code('Reformatter', proc_reformat) - else: - result = re.sub(rb'\0$', b'', text) # type: ignore - return result # type: ignore - - -def translate(l1, l2, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'): # type: (str, str, str, bool, Union[str, None], str, str) -> str - pair = get_pair_or_error(l1, l2) - if pair is not None: - l1, l2 = pair - cmds = list(get_pipe_cmds(l1, l2)) - unsafe_deformat, unsafe_reformat = get_format(format, deformat, reformat) - deformat, reformat = validate_formatters(unsafe_deformat, unsafe_reformat) # type: ignore - deformatted = get_deformat(deformat, text) - output = execute(deformatted, cmds) - result = get_reformat(reformat, output).strip() - return result.decode() # type: ignore - else: - raise apertium.ModeNotInstalled() + raise apertium.ModeNotInstalled() diff --git a/tests/__init__.py b/tests/__init__.py index c9b25c2..f545922 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -11,8 +11,11 @@ class TestAnalyze(unittest.TestCase): + + analyzer = apertium.Analyzer() + def test_en(self): - lexical_units = apertium.analyze('en', 'cats') + lexical_units = analyzer.analyze('en', 'cats') lexical_unit = lexical_units[0] self.assertListEqual(lexical_unit.readings, [[SReading(baseform='cat', tags=['n', 'pl'])]]) self.assertEqual(lexical_unit.wordform, 'cats') @@ -20,28 +23,34 @@ def test_en(self): def test_uninstalled_mode(self): with self.assertRaises(apertium.ModeNotInstalled): - apertium.analyze('spa', 'cats') + analyzer.analyze('spa', 'cats') class TestGenerate(unittest.TestCase): + + generator = apertium.Generator() + def test_single(self): - wordform = apertium.generate('en', '^cat$') + wordform = generator.generate('en', '^cat$') self.assertEqual(wordform, 'cats') def test_multiple(self): - lexical_units = apertium.generate('en', '^cat$ ^cat$') + lexical_units = generator.generate('en', '^cat$ ^cat$') self.assertEqual(lexical_units, 'cats cats') def test_bare(self): - lexical_units = apertium.generate('en', 'cat') + lexical_units = generator.generate('en', 'cat') self.assertEqual(lexical_units, 'cat') def test_uninstalled_mode(self): with self.assertRaises(apertium.ModeNotInstalled): - apertium.generate('spa', 'cat') + generator.generate('spa', 'cat') class TestTranslate(unittest.TestCase): + + translator = apertium.Translator() + def test_en_spa(self): - translated = apertium.translate('eng', 'spa', 'cats') + translated = translator.translate('eng', 'spa', 'cats') self.assertEqual(translated, 'Gatos') From 8dcfd27aee792c64f0cc444e932d629cd0687c1b Mon Sep 17 00:00:00 2001 From: vagrant Date: Mon, 11 Jun 2018 09:27:54 +0000 Subject: [PATCH 17/32] Translation stable commit --- apertium/analysis/__init__.py | 36 +++++++++++------------- apertium/generation/__init__.py | 28 +++++++++---------- apertium/translation/__init__.py | 48 +++++++++++++++----------------- tests/__init__.py | 26 ++++++++--------- 4 files changed, 66 insertions(+), 72 deletions(-) diff --git a/apertium/analysis/__init__.py b/apertium/analysis/__init__.py index 48c7cff..45922a1 100644 --- a/apertium/analysis/__init__.py +++ b/apertium/analysis/__init__.py @@ -9,35 +9,31 @@ class Analyzer: - def __init__(self): + def __init__(self, lang): # type: (Analyzer, str) -> None self.analyzer_cmds = {} # type: Dict[str, List[List[str]]] + self.lang = to_alpha3_code(lang) # type: str + if self.lang not in apertium.analyzers: + raise apertium.ModeNotInstalled(self.lang) + else: + self.path, self.mode = apertium.analyzers[self.lang] + def _get_commands(self): # type: (Analyzer) -> List[List[str]] + if self.lang not in self.analyzer_cmds: + mode_path, mode = apertium.analyzers[self.lang] + self.analyzer_cmds[self.lang] = parse_mode_file(mode_path+'/modes/'+mode+'.mode') + return self.analyzer_cmds[self.lang] - def _get_commands(self, lang): # type: (str) -> List[List[str]] - if lang not in self.analyzer_cmds: - mode_path, mode = apertium.analyzers[lang] - self.analyzer_cmds[lang] = parse_mode_file(mode_path+'/modes/'+mode+'.mode') - return self.analyzer_cmds[lang] - - - def _postproc_text(self, result): # type: (str) -> List[LexicalUnit] + def _postproc_text(self, result): # type: (Analyzer, str) -> List[LexicalUnit] """ postprocesses the input """ lexical_units = list(parse(result)) return lexical_units - - def analyze(self, lang, in_text, formatting='txt'): # type: (str, str, str) -> List[LexicalUnit] + def analyze(self, in_text, formatting='txt'): # type: (Analyzer, str, str) -> List[LexicalUnit] """ runs apertium to analyze the input """ - lang = to_alpha3_code(lang) - - if lang in apertium.analyzers: - commands = list(self._get_commands(lang)) - result = execute(in_text, commands) - return self._postproc_text(result) - else: - raise apertium.ModeNotInstalled(lang) - + commands = [['apertium', '-d', self.path, '-f', formatting, self.mode]] + result = execute(in_text, commands) + return self._postproc_text(result) diff --git a/apertium/generation/__init__.py b/apertium/generation/__init__.py index dc9e3ab..4641568 100644 --- a/apertium/generation/__init__.py +++ b/apertium/generation/__init__.py @@ -6,25 +6,25 @@ if False: from typing import List, Union, Tuple, Dict # noqa: F401 + class Generator: - def __init__(self): + def __init__(self, lang): # type: (Generator, str) -> None self.generator_cmds = {} # type: Dict[str, List[List[str]]] + self.lang = lang # type: str + def _get_commands(self): # type: (Generator) -> List[List[str]] + if self.lang not in self.generator_cmds: + mode_path, mode = apertium.generators[self.lang] + self.generator_cmds[self.lang] = parse_mode_file(mode_path+'/modes/'+mode+'.mode') + return self.generator_cmds[self.lang] - def _get_commands(self, lang): # type: (str) -> List[List[str]] - if lang not in self.generator_cmds: - mode_path, mode = apertium.generators[lang] - self.generator_cmds[lang] = parse_mode_file(mode_path+'/modes/'+mode+'.mode') - return self.generator_cmds[lang] - - - def generate(lang, in_text, formatting='none'): # type: (str, str, str) -> Union[str, List[str]] - lang = to_alpha3_code(lang) + def generate(self, in_text, formatting='none'): # type: (Generator, str, str) -> Union[str, List[str]] + self.lang = to_alpha3_code(self.lang) - if lang in apertium.generators: - commands = list(self._get_commands(lang)) + if self.lang in apertium.generators: + commands = list(self._get_commands()) result = execute(in_text, commands) - return result + return result.rstrip('\x00') else: - raise apertium.ModeNotInstalled(lang) + raise apertium.ModeNotInstalled(self.lang) diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index 669c54e..35e7be8 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -8,22 +8,23 @@ import apertium # noqa: F401 from apertium.utils import to_alpha3_code, execute, parse_mode_file # noqa: F401 + class Translator: - def __init__(self): + def __init__(self, l1, l2): # type: (Translator, str, str) -> None self.translation_cmds = {} # type: Dict[Tuple[str, str], List[List[str]]] + self.l1 = l1 + self.l2 = l2 - - def _get_commands(l1, l2): # type: (str, str) -> List[List[str]] - if (l1, l2) not in translation_cmds: + def _get_commands(self, l1, l2): # type: (Translator, str, str) -> List[List[str]] + if (l1, l2) not in self.translation_cmds: mode_path = apertium.pairs['%s-%s' % (l1, l2)] - translation_cmds[(l1, l2)] = parse_mode_file(mode_path) - return translation_cmds[(l1, l2)] - + self.translation_cmds[(l1, l2)] = parse_mode_file(mode_path) + return self.translation_cmds[(l1, l2)] - def _get_pair_or_error(l1, l2): # type: (str, str) -> Union[None, Tuple[str, str]] + def _get_pair_or_error(self): # type: (Translator) -> Union[None, Tuple[str, str]] try: - l1, l2 = map(to_alpha3_code, [l1, l2]) + l1, l2 = map(to_alpha3_code, [self.l1, self.l2]) except ValueError: return None if '%s-%s' % (l1, l2) not in apertium.pairs: @@ -31,8 +32,8 @@ def _get_pair_or_error(l1, l2): # type: (str, str) -> Union[None, Tuple[str, st else: return (l1, l2) - - def _get_format(format, deformat, reformat): # type: (Union[str, None], Union[str, None], Union[str, None]) -> Tuple[Union[str, None], Union[str, None]] + def _get_format(self, format, deformat, reformat): + # type: (Translator, Union[str, None], Union[str, None], Union[str, None]) -> Tuple[Union[str, None], Union[str, None]] if format: deformat = 'apertium-des' + format reformat = 'apertium-re' + format @@ -44,13 +45,12 @@ def _get_format(format, deformat, reformat): # type: (Union[str, None], Union[s return deformat, reformat - - def _check_ret_code(name, proc): # type: (str, Popen) -> None + def _check_ret_code(self, name, proc): # type: (Translator, str, Popen) -> None if proc.returncode != 0: raise subprocess.CalledProcessError() # type: ignore - - def _validate_formatters(deformat, reformat): # type: (Union[str, None], Union[str, None]) -> Tuple[Union[str, object], Union[str, object]] + def _validate_formatters(self, deformat, reformat): + # type: (Translator, Union[str, None], Union[str, None]) -> Tuple[Union[str, object], Union[str, object]] def valid1(elt, lst): # type: (Union[str, None], List[object]) -> Union[str, object] if elt in lst: return elt @@ -72,8 +72,7 @@ def valid1(elt, lst): # type: (Union[str, None], List[object]) -> Union[str, ob ] return valid1(deformat, deformatters), valid1(reformat, reformatters) - - def _get_deformat(deformat, text): # type: (str, str) -> str + def _get_deformat(self, deformat, text): # type: (Translator, str, str) -> str if deformat: proc_deformat = Popen(deformat, stdin=PIPE, stdout=PIPE) proc_deformat.stdin.write(bytes(text, 'utf-8')) @@ -85,8 +84,7 @@ def _get_deformat(deformat, text): # type: (str, str) -> str res = str(deformatted) return res - - def _get_reformat(reformat, text): # type: (str, str) -> str + def _get_reformat(self, reformat, text): # type: (Translator, str, str) -> str if reformat: proc_reformat = Popen(reformat, stdin=PIPE, stdout=PIPE) proc_reformat.stdin.write(bytes(text, 'utf-8')) @@ -96,17 +94,17 @@ def _get_reformat(reformat, text): # type: (str, str) -> str result = re.sub(rb'\0$', b'', text) # type: ignore return result # type: ignore - - def translate(l1, l2, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'): # type: (str, str, str, bool, Union[str, None], str, str) -> str - pair = self._get_pair_or_error(l1, l2) + def translate(self, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'): + # type: (Translator, str, bool, Union[str, None], str, str) -> str + pair = self._get_pair_or_error() if pair is not None: l1, l2 = pair cmds = list(self._get_commands(l1, l2)) unsafe_deformat, unsafe_reformat = self._get_format(format, deformat, reformat) - deformat, reformat = self._validate_formatters(unsafe_deformat, unsafe_reformat) # type: ignore - deformatted = self._get_deformat(deformat, text) + deformater, reformater = self._validate_formatters(unsafe_deformat, unsafe_reformat) + deformatted = self._get_deformat(str(deformater), text) output = execute(deformatted, cmds) - result = self._get_reformat(reformat, output).strip() + result = self._get_reformat(str(reformater), output).strip() return result.decode() # type: ignore else: raise apertium.ModeNotInstalled() diff --git a/tests/__init__.py b/tests/__init__.py index f545922..def950f 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -12,10 +12,9 @@ class TestAnalyze(unittest.TestCase): - analyzer = apertium.Analyzer() - def test_en(self): - lexical_units = analyzer.analyze('en', 'cats') + analyzer = apertium.Analyzer('en') + lexical_units = analyzer.analyze('cats') lexical_unit = lexical_units[0] self.assertListEqual(lexical_unit.readings, [[SReading(baseform='cat', tags=['n', 'pl'])]]) self.assertEqual(lexical_unit.wordform, 'cats') @@ -23,34 +22,35 @@ def test_en(self): def test_uninstalled_mode(self): with self.assertRaises(apertium.ModeNotInstalled): - analyzer.analyze('spa', 'cats') + analyzer = apertium.Analyzer('spa') class TestGenerate(unittest.TestCase): - generator = apertium.Generator() - def test_single(self): - wordform = generator.generate('en', '^cat$') + generator = apertium.Generator('en') + wordform = generator.generate('^cat$') self.assertEqual(wordform, 'cats') def test_multiple(self): - lexical_units = generator.generate('en', '^cat$ ^cat$') + generator = apertium.Generator('en') + lexical_units = generator.generate('^cat$ ^cat$') self.assertEqual(lexical_units, 'cats cats') def test_bare(self): - lexical_units = generator.generate('en', 'cat') + generator = apertium.Generator('en') + lexical_units = generator.generate('cat') self.assertEqual(lexical_units, 'cat') def test_uninstalled_mode(self): + generator = apertium.Generator('spa') with self.assertRaises(apertium.ModeNotInstalled): - generator.generate('spa', 'cat') + generator.generate('cat') class TestTranslate(unittest.TestCase): - translator = apertium.Translator() - def test_en_spa(self): - translated = translator.translate('eng', 'spa', 'cats') + translator = apertium.Translator('eng', 'spa') + translated = translator.translate('cats') self.assertEqual(translated, 'Gatos') From 59b625ebb22208496ee5bf7e142320056234091f Mon Sep 17 00:00:00 2001 From: vagrant Date: Mon, 11 Jun 2018 09:31:37 +0000 Subject: [PATCH 18/32] README.md update --- README.md | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index b282314..7fa3f14 100644 --- a/README.md +++ b/README.md @@ -20,17 +20,19 @@ Performing Morphological Analysis ```python In [1]: import apertium -In [2]: apertium.analyze('en', 'cats') -Out[2]: cats/cat +In [2]: a = apertium.Analyzer('en') +In [3]: a.analyze('cats') +Out[3]: [cats/cat, ./.] ``` ### Generation Performing Morphological Generation - ```python +```python In [1]: import apertium -In [2]: apertium.generate('en', '^cat$') -Out[2]: 'cats' - ``` +In [2]: g = apertium.Generator('en') +In [3]: g.generate('^cat$') +Out[3]: 'cats' +``` ### Installing more modes from other language data One can also install modes by providing the path to the lang-data using this simple function @@ -43,6 +45,7 @@ In [2]: apertium.append_pair_path('..') Performing Translations ```python In [1]: import apertium -In [2]: apertium.translate('eng', 'spa', 'I love you') -Out[2]: 'Te quieres' +In [2]: t = apertium.Translator('eng', 'spa') +In [3]: t.translate('cats') +Out[3]: 'Gatos' ``` From 763fcdcaa5299b99feecf44ebe05e09b0b5eca50 Mon Sep 17 00:00:00 2001 From: vagrant Date: Sun, 17 Jun 2018 10:35:09 +0000 Subject: [PATCH 19/32] Added independent analyze and generate functions --- README.md | 18 ++++++++++++++++-- apertium/__init__.py | 4 ++-- apertium/analysis/__init__.py | 11 +++++++++++ apertium/generation/__init__.py | 12 ++++++++++++ tests/__init__.py | 33 ++++++++++++++++++++++++++++----- 5 files changed, 69 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 7fa3f14..24bed9b 100644 --- a/README.md +++ b/README.md @@ -18,22 +18,36 @@ ### Analysis Performing Morphological Analysis +Method 1: One can create ```Analyzer``` objects on which ```analyze()``` function can be run. ```python In [1]: import apertium In [2]: a = apertium.Analyzer('en') In [3]: a.analyze('cats') Out[3]: [cats/cat, ./.] ``` - +Method 2: Alternatively, the library provides an option to directly run the ```analyze``` method. +```python +In [1]: import apertium +In [2]: apertium.analyze('en', 'cats') +Out[2]: cats/cat +``` + ### Generation Performing Morphological Generation +Method 1: Just like the ```Analyzer```, One can create ```Generator``` objects on which ```generate()``` function can be run. ```python In [1]: import apertium In [2]: g = apertium.Generator('en') In [3]: g.generate('^cat$') Out[3]: 'cats' ``` - +Method 2: Running ```generate()``` directly. +```python +In [1]: import apertium +In [2]: apertium.generate('en', '^cat$') +Out[2]: 'cats' +``` + ### Installing more modes from other language data One can also install modes by providing the path to the lang-data using this simple function ```python diff --git a/apertium/__init__.py b/apertium/__init__.py index 94150c2..ffdc9d2 100644 --- a/apertium/__init__.py +++ b/apertium/__init__.py @@ -1,6 +1,6 @@ from apertium.mode_search import search_path -from apertium.analysis import Analyzer # noqa: F401 -from apertium.generation import Generator # noqa: F401 +from apertium.analysis import Analyzer, analyze # noqa: F401 +from apertium.generation import Generator, generate # noqa: F401 from apertium.translation import Translator # noqa: F401 diff --git a/apertium/analysis/__init__.py b/apertium/analysis/__init__.py index 45922a1..4b14e4f 100644 --- a/apertium/analysis/__init__.py +++ b/apertium/analysis/__init__.py @@ -37,3 +37,14 @@ def analyze(self, in_text, formatting='txt'): # type: (Analyzer, str, str) -> L commands = [['apertium', '-d', self.path, '-f', formatting, self.mode]] result = execute(in_text, commands) return self._postproc_text(result) + + +def analyze(lang, in_text, formatting='txt'): # type: (str, str, str) -> List[LexicalUnit] + if lang not in apertium.analyzers: + raise apertium.ModeNotInstalled(lang) + else: + path, mode = apertium.analyzers[lang] + commands = [['apertium', '-d', path, '-f', formatting, mode]] + result = execute(in_text, commands) + lexical_units = list(parse(result)) + return lexical_units diff --git a/apertium/generation/__init__.py b/apertium/generation/__init__.py index 4641568..3e3128b 100644 --- a/apertium/generation/__init__.py +++ b/apertium/generation/__init__.py @@ -28,3 +28,15 @@ def generate(self, in_text, formatting='none'): # type: (Generator, str, str) - return result.rstrip('\x00') else: raise apertium.ModeNotInstalled(self.lang) + + +def generate(lang, in_text, formatting='none'): # type: (str, str, str) -> Union[str, List[str]] + lang = to_alpha3_code(lang) + + if lang in apertium.generators: + path, mode = apertium.generators[lang] + commands = [['apertium', '-d', path, '-f', formatting, mode]] + result = execute(in_text, commands) + return result + else: + raise apertium.ModeNotInstalled(lang) diff --git a/tests/__init__.py b/tests/__init__.py index def950f..f4b1d16 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -12,7 +12,7 @@ class TestAnalyze(unittest.TestCase): - def test_en(self): + def test_analyzer_en(self): analyzer = apertium.Analyzer('en') lexical_units = analyzer.analyze('cats') lexical_unit = lexical_units[0] @@ -20,6 +20,13 @@ def test_en(self): self.assertEqual(lexical_unit.wordform, 'cats') self.assertEqual(lexical_unit.knownness, known) + def test_analyze_en(self): + lexical_units = apertium.analyze('eng', 'cats') + lexical_unit = lexical_units[0] + self.assertListEqual(lexical_unit.readings, [[SReading(baseform='cat', tags=['n', 'pl'])]]) + self.assertEqual(lexical_unit.wordform, 'cats') + self.assertEqual(lexical_unit.knownness, known) + def test_uninstalled_mode(self): with self.assertRaises(apertium.ModeNotInstalled): analyzer = apertium.Analyzer('spa') @@ -27,26 +34,42 @@ def test_uninstalled_mode(self): class TestGenerate(unittest.TestCase): - def test_single(self): + def test_generator_single(self): generator = apertium.Generator('en') wordform = generator.generate('^cat$') self.assertEqual(wordform, 'cats') - def test_multiple(self): + def test_generator_multiple(self): generator = apertium.Generator('en') lexical_units = generator.generate('^cat$ ^cat$') self.assertEqual(lexical_units, 'cats cats') - def test_bare(self): + def test_generator_bare(self): generator = apertium.Generator('en') lexical_units = generator.generate('cat') self.assertEqual(lexical_units, 'cat') - def test_uninstalled_mode(self): + def test_generator_uninstalled_mode(self): generator = apertium.Generator('spa') with self.assertRaises(apertium.ModeNotInstalled): generator.generate('cat') + def test_single(self): + wordform = apertium.generate('en', '^cat$') + self.assertEqual(wordform, 'cats') + + def test_multiple(self): + lexical_units = apertium.generate('en', '^cat$ ^cat$') + self.assertEqual(lexical_units, 'cats cats') + + def test_bare(self): + lexical_units = apertium.generate('en', 'cat') + self.assertEqual(lexical_units, 'cat') + + def test_uninstalled_mode(self): + with self.assertRaises(apertium.ModeNotInstalled): + apertium.generate('spa', 'cat') + class TestTranslate(unittest.TestCase): From fe61e50ab9522fb62952d55910909656a766b621 Mon Sep 17 00:00:00 2001 From: Arghya Bhatttacharya Date: Sun, 17 Jun 2018 16:06:09 +0530 Subject: [PATCH 20/32] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 24bed9b..95b1c4b 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ ### Analysis Performing Morphological Analysis + Method 1: One can create ```Analyzer``` objects on which ```analyze()``` function can be run. ```python In [1]: import apertium @@ -34,6 +35,7 @@ Out[2]: cats/cat ### Generation Performing Morphological Generation + Method 1: Just like the ```Analyzer```, One can create ```Generator``` objects on which ```generate()``` function can be run. ```python In [1]: import apertium From e6da4ededbb566255cc3a36890926a64cea4ec0c Mon Sep 17 00:00:00 2001 From: vagrant Date: Tue, 19 Jun 2018 08:33:48 +0000 Subject: [PATCH 21/32] changes requested in review --- apertium/analysis/__init__.py | 1 - apertium/generation/__init__.py | 1 - apertium/translation/__init__.py | 17 ++++++++--------- apertium/utils.py | 32 ++++++++++---------------------- tests/__init__.py | 3 --- 5 files changed, 18 insertions(+), 36 deletions(-) diff --git a/apertium/analysis/__init__.py b/apertium/analysis/__init__.py index 4b14e4f..730713e 100644 --- a/apertium/analysis/__init__.py +++ b/apertium/analysis/__init__.py @@ -8,7 +8,6 @@ class Analyzer: - def __init__(self, lang): # type: (Analyzer, str) -> None self.analyzer_cmds = {} # type: Dict[str, List[List[str]]] self.lang = to_alpha3_code(lang) # type: str diff --git a/apertium/generation/__init__.py b/apertium/generation/__init__.py index 3e3128b..4ce32d8 100644 --- a/apertium/generation/__init__.py +++ b/apertium/generation/__init__.py @@ -8,7 +8,6 @@ class Generator: - def __init__(self, lang): # type: (Generator, str) -> None self.generator_cmds = {} # type: Dict[str, List[List[str]]] self.lang = lang # type: str diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index 35e7be8..e1d6bc1 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -10,7 +10,6 @@ class Translator: - def __init__(self, l1, l2): # type: (Translator, str, str) -> None self.translation_cmds = {} # type: Dict[Tuple[str, str], List[List[str]]] self.l1 = l1 @@ -22,7 +21,7 @@ def _get_commands(self, l1, l2): # type: (Translator, str, str) -> List[List[st self.translation_cmds[(l1, l2)] = parse_mode_file(mode_path) return self.translation_cmds[(l1, l2)] - def _get_pair_or_error(self): # type: (Translator) -> Union[None, Tuple[str, str]] + def _get_pair_or_error(self): # type: (Translator) -> Optional[Tuple[str, str]] try: l1, l2 = map(to_alpha3_code, [self.l1, self.l2]) except ValueError: @@ -33,7 +32,7 @@ def _get_pair_or_error(self): # type: (Translator) -> Union[None, Tuple[str, st return (l1, l2) def _get_format(self, format, deformat, reformat): - # type: (Translator, Union[str, None], Union[str, None], Union[str, None]) -> Tuple[Union[str, None], Union[str, None]] + # type: (Translator, Optional[str], Optional[str], Optional[str]) -> Tuple[Optional[str], Optional[str]] if format: deformat = 'apertium-des' + format reformat = 'apertium-re' + format @@ -45,13 +44,13 @@ def _get_format(self, format, deformat, reformat): return deformat, reformat - def _check_ret_code(self, name, proc): # type: (Translator, str, Popen) -> None + def _check_ret_code(self, proc): # type: (Translator, Popen) -> None if proc.returncode != 0: raise subprocess.CalledProcessError() # type: ignore def _validate_formatters(self, deformat, reformat): - # type: (Translator, Union[str, None], Union[str, None]) -> Tuple[Union[str, object], Union[str, object]] - def valid1(elt, lst): # type: (Union[str, None], List[object]) -> Union[str, object] + # type: (Translator, Optional[str], Optional[str]) -> Tuple[Union[str, object], Union[str, object]] + def valid1(elt, lst): # type: (Optional[str], List[object]) -> Union[str, object] if elt in lst: return elt else: @@ -78,7 +77,7 @@ def _get_deformat(self, deformat, text): # type: (Translator, str, str) -> str proc_deformat.stdin.write(bytes(text, 'utf-8')) deformatted = proc_deformat.communicate()[0] deformatted = deformatted.decode() - self._check_ret_code('Deformatter', proc_deformat) + self._check_ret_code(proc_deformat) else: deformatted = bytes(text, 'utf-8') res = str(deformatted) @@ -89,13 +88,13 @@ def _get_reformat(self, reformat, text): # type: (Translator, str, str) -> str proc_reformat = Popen(reformat, stdin=PIPE, stdout=PIPE) proc_reformat.stdin.write(bytes(text, 'utf-8')) result = proc_reformat.communicate()[0] - self._check_ret_code('Reformatter', proc_reformat) + self._check_ret_code(proc_reformat) else: result = re.sub(rb'\0$', b'', text) # type: ignore return result # type: ignore def translate(self, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'): - # type: (Translator, str, bool, Union[str, None], str, str) -> str + # type: (Translator, str, bool, Optional[str], str, str) -> str pair = self._get_pair_or_error() if pair is not None: l1, l2 = pair diff --git a/apertium/utils.py b/apertium/utils.py index 2904550..e25fd47 100644 --- a/apertium/utils.py +++ b/apertium/utils.py @@ -1,11 +1,10 @@ import subprocess import re -import os - if False: from typing import List, Dict, Tuple, Union # noqa: F401 +import apertium # noqa: F401 from apertium.iso639 import iso_639_codes # noqa: F401 @@ -34,25 +33,14 @@ def execute(inp, commands): # type: (str, List[List[str]]) -> str def parse_mode_file(mode_path): # type: (str) -> List[List[str]] mode_str = open(mode_path, 'r').read().strip() if mode_str: - if 'ca-oc@aran' in mode_str: - modes_parentdir = os.path.dirname(os.path.dirname(mode_path)) - mode_name = os.path.splitext(os.path.basename(mode_path))[0] - commands = [[ - 'apertium', - '-f', 'html-noent', - # Get the _parent_ dir of the mode file: - '-d', modes_parentdir, - mode_name, - ]] - else: - commands = [] - for cmd in mode_str.strip().split('|'): - # TODO: we should make language pairs install - # modes.xml instead; this is brittle (what if a path - # has | or ' in it?) - cmd = cmd.replace('$2', '').replace('$1', '-g') - cmd = re.sub(r'^\s*(\S*)', r'\g<1> -z', cmd) - commands.append([c.strip("'") for c in cmd.split()]) + commands = [] + for cmd in mode_str.strip().split('|'): + # TODO: we should make language pairs install + # modes.xml instead; this is brittle (what if a path + # has | or ' in it?) + cmd = cmd.replace('$2', '').replace('$1', '-g') + cmd = re.sub(r'^\s*(\S*)', r'\g<1> -z', cmd) + commands.append([c.strip("'") for c in cmd.split()]) return commands else: - raise Exception('Could not parse mode file %s', mode_path) + raise apertium.ModeNotInstalled(mode_path) diff --git a/tests/__init__.py b/tests/__init__.py index f4b1d16..389fc1c 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -11,7 +11,6 @@ class TestAnalyze(unittest.TestCase): - def test_analyzer_en(self): analyzer = apertium.Analyzer('en') lexical_units = analyzer.analyze('cats') @@ -33,7 +32,6 @@ def test_uninstalled_mode(self): class TestGenerate(unittest.TestCase): - def test_generator_single(self): generator = apertium.Generator('en') wordform = generator.generate('^cat$') @@ -72,7 +70,6 @@ def test_uninstalled_mode(self): class TestTranslate(unittest.TestCase): - def test_en_spa(self): translator = apertium.Translator('eng', 'spa') translated = translator.translate('cats') From 87aac5d239a5ae913871bbacdce032554aef3d35 Mon Sep 17 00:00:00 2001 From: vagrant Date: Thu, 21 Jun 2018 22:38:09 +0000 Subject: [PATCH 22/32] inline function error --- apertium/__init__.py | 2 +- apertium/analysis/__init__.py | 11 +++-------- apertium/generation/__init__.py | 12 +++--------- apertium/translation/__init__.py | 18 +++++++++--------- tests/__init__.py | 6 +++++- 5 files changed, 21 insertions(+), 28 deletions(-) diff --git a/apertium/__init__.py b/apertium/__init__.py index ffdc9d2..3e3ead0 100644 --- a/apertium/__init__.py +++ b/apertium/__init__.py @@ -1,7 +1,7 @@ from apertium.mode_search import search_path from apertium.analysis import Analyzer, analyze # noqa: F401 from apertium.generation import Generator, generate # noqa: F401 -from apertium.translation import Translator # noqa: F401 +from apertium.translation import Translator, translate # noqa: F401 if False: diff --git a/apertium/analysis/__init__.py b/apertium/analysis/__init__.py index 730713e..b6e2e2b 100644 --- a/apertium/analysis/__init__.py +++ b/apertium/analysis/__init__.py @@ -39,11 +39,6 @@ def analyze(self, in_text, formatting='txt'): # type: (Analyzer, str, str) -> L def analyze(lang, in_text, formatting='txt'): # type: (str, str, str) -> List[LexicalUnit] - if lang not in apertium.analyzers: - raise apertium.ModeNotInstalled(lang) - else: - path, mode = apertium.analyzers[lang] - commands = [['apertium', '-d', path, '-f', formatting, mode]] - result = execute(in_text, commands) - lexical_units = list(parse(result)) - return lexical_units + analyzer = Analyzer(lang) + analyzed = analyzer.analyze(in_text, formatting) + return analyzed diff --git a/apertium/generation/__init__.py b/apertium/generation/__init__.py index 4ce32d8..36433b9 100644 --- a/apertium/generation/__init__.py +++ b/apertium/generation/__init__.py @@ -30,12 +30,6 @@ def generate(self, in_text, formatting='none'): # type: (Generator, str, str) - def generate(lang, in_text, formatting='none'): # type: (str, str, str) -> Union[str, List[str]] - lang = to_alpha3_code(lang) - - if lang in apertium.generators: - path, mode = apertium.generators[lang] - commands = [['apertium', '-d', path, '-f', formatting, mode]] - result = execute(in_text, commands) - return result - else: - raise apertium.ModeNotInstalled(lang) + generator = Generator(lang) + generated = generator.generate(in_text, formatting) + return generated diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index e1d6bc1..e3d7bb4 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -10,6 +10,8 @@ class Translator: + _get_pair_or_error = lambda self: (map(to_alpha3_code, [self.l1, self.l2])) if '%s-%s' % (map(to_alpha3_code, [self.l1, self.l2])) in apertium.pairs else None + def __init__(self, l1, l2): # type: (Translator, str, str) -> None self.translation_cmds = {} # type: Dict[Tuple[str, str], List[List[str]]] self.l1 = l1 @@ -21,15 +23,6 @@ def _get_commands(self, l1, l2): # type: (Translator, str, str) -> List[List[st self.translation_cmds[(l1, l2)] = parse_mode_file(mode_path) return self.translation_cmds[(l1, l2)] - def _get_pair_or_error(self): # type: (Translator) -> Optional[Tuple[str, str]] - try: - l1, l2 = map(to_alpha3_code, [self.l1, self.l2]) - except ValueError: - return None - if '%s-%s' % (l1, l2) not in apertium.pairs: - return None - else: - return (l1, l2) def _get_format(self, format, deformat, reformat): # type: (Translator, Optional[str], Optional[str], Optional[str]) -> Tuple[Optional[str], Optional[str]] @@ -107,3 +100,10 @@ def translate(self, text, mark_unknown=False, format=None, deformat='txt', refor return result.decode() # type: ignore else: raise apertium.ModeNotInstalled() + + +def translate(l1, l2, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'): + # type: (str, str, str, bool, Optional[str], str, str) -> str + translator = apertium.Translator(l1, l2) + translated = translator.translate(text, mark_unknown, format, deformat, reformat) + return translated diff --git a/tests/__init__.py b/tests/__init__.py index 389fc1c..3de7e36 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -70,7 +70,11 @@ def test_uninstalled_mode(self): class TestTranslate(unittest.TestCase): - def test_en_spa(self): + def test_translator_en_spa(self): translator = apertium.Translator('eng', 'spa') translated = translator.translate('cats') self.assertEqual(translated, 'Gatos') + + def test_en_spa(self): + translated = apertium.translate('eng', 'spa', 'cats') + self.assertEqual(translated, 'Gatos') From 557522f159d74f59c32bda2a3d78b2ff4667bffb Mon Sep 17 00:00:00 2001 From: vagrant Date: Fri, 22 Jun 2018 10:46:20 +0000 Subject: [PATCH 23/32] inline function error fix --- apertium/translation/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index e3d7bb4..190cc41 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -10,12 +10,12 @@ class Translator: - _get_pair_or_error = lambda self: (map(to_alpha3_code, [self.l1, self.l2])) if '%s-%s' % (map(to_alpha3_code, [self.l1, self.l2])) in apertium.pairs else None def __init__(self, l1, l2): # type: (Translator, str, str) -> None self.translation_cmds = {} # type: Dict[Tuple[str, str], List[List[str]]] self.l1 = l1 self.l2 = l2 + self.get_pair_or_error = lambda l1, l2: (map(to_alpha3_code, [l1, l2])) if '%s-%s' % tuple(map(to_alpha3_code, [l1, l2])) in apertium.pairs else None def _get_commands(self, l1, l2): # type: (Translator, str, str) -> List[List[str]] if (l1, l2) not in self.translation_cmds: @@ -88,7 +88,7 @@ def _get_reformat(self, reformat, text): # type: (Translator, str, str) -> str def translate(self, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'): # type: (Translator, str, bool, Optional[str], str, str) -> str - pair = self._get_pair_or_error() + pair = self.get_pair_or_error(self.l1, self.l2) if pair is not None: l1, l2 = pair cmds = list(self._get_commands(l1, l2)) From ed2590cfcbfbae31da89a90f382d273cb2275524 Mon Sep 17 00:00:00 2001 From: vagrant Date: Fri, 22 Jun 2018 11:05:14 +0000 Subject: [PATCH 24/32] Error fixes in type annotation --- apertium/translation/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index 190cc41..f96e4d4 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -11,11 +11,12 @@ class Translator: + get_pair_or_error = lambda self: (map(to_alpha3_code, [self.l1, self.l2])) if '%s-%s' % tuple(map(to_alpha3_code, [self.l1, self.l2])) in apertium.pairs else None # type: ignore # noqa: E731 E501 + def __init__(self, l1, l2): # type: (Translator, str, str) -> None self.translation_cmds = {} # type: Dict[Tuple[str, str], List[List[str]]] self.l1 = l1 self.l2 = l2 - self.get_pair_or_error = lambda l1, l2: (map(to_alpha3_code, [l1, l2])) if '%s-%s' % tuple(map(to_alpha3_code, [l1, l2])) in apertium.pairs else None def _get_commands(self, l1, l2): # type: (Translator, str, str) -> List[List[str]] if (l1, l2) not in self.translation_cmds: @@ -23,7 +24,6 @@ def _get_commands(self, l1, l2): # type: (Translator, str, str) -> List[List[st self.translation_cmds[(l1, l2)] = parse_mode_file(mode_path) return self.translation_cmds[(l1, l2)] - def _get_format(self, format, deformat, reformat): # type: (Translator, Optional[str], Optional[str], Optional[str]) -> Tuple[Optional[str], Optional[str]] if format: @@ -88,7 +88,7 @@ def _get_reformat(self, reformat, text): # type: (Translator, str, str) -> str def translate(self, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'): # type: (Translator, str, bool, Optional[str], str, str) -> str - pair = self.get_pair_or_error(self.l1, self.l2) + pair = self.get_pair_or_error() # type: ignore if pair is not None: l1, l2 = pair cmds = list(self._get_commands(l1, l2)) From 8864ca2f488566fed8f8637e6116934c469232bc Mon Sep 17 00:00:00 2001 From: vagrant Date: Sun, 24 Jun 2018 09:13:04 +0000 Subject: [PATCH 25/32] get_pair_or_error function fixed --- apertium/__init__.py | 4 ---- apertium/translation/__init__.py | 9 +++++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/apertium/__init__.py b/apertium/__init__.py index 3e3ead0..fc8d119 100644 --- a/apertium/__init__.py +++ b/apertium/__init__.py @@ -12,10 +12,6 @@ class ModeNotInstalled(ValueError): pass -class ProcessFailure(Exception): - pass - - def update_modes(pair_path): # type: (str) -> None modes = search_path(pair_path) if modes['pair']: diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index f96e4d4..ce766dc 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -10,9 +10,6 @@ class Translator: - - get_pair_or_error = lambda self: (map(to_alpha3_code, [self.l1, self.l2])) if '%s-%s' % tuple(map(to_alpha3_code, [self.l1, self.l2])) in apertium.pairs else None # type: ignore # noqa: E731 E501 - def __init__(self, l1, l2): # type: (Translator, str, str) -> None self.translation_cmds = {} # type: Dict[Tuple[str, str], List[List[str]]] self.l1 = l1 @@ -88,7 +85,11 @@ def _get_reformat(self, reformat, text): # type: (Translator, str, str) -> str def translate(self, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'): # type: (Translator, str, bool, Optional[str], str, str) -> str - pair = self.get_pair_or_error() # type: ignore + if '%s-%s' % tuple(map(to_alpha3_code, [self.l1, self.l2])) in apertium.pairs: # type: ignore + pair = map(to_alpha3_code, [self.l1, self.l2]) + else: + pass + if pair is not None: l1, l2 = pair cmds = list(self._get_commands(l1, l2)) From 1bd0be29f417ef5f1579dff8fc81f3f462fdcef9 Mon Sep 17 00:00:00 2001 From: vagrant Date: Wed, 27 Jun 2018 04:43:56 +0000 Subject: [PATCH 26/32] edit for the changes requested --- .appveyor.yml | 2 +- apertium/analysis/__init__.py | 3 +-- apertium/generation/__init__.py | 3 +-- apertium/translation/__init__.py | 12 ++++-------- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index e9f63d3..1c2354f 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -37,7 +37,7 @@ install: build: false test_script: - coverage run -m unittest --verbose --buffer tests - - coverage report --show-missing --fail-under 75 --include 'apertium/**' + - coverage report --show-missing --fail-under 85 --include 'apertium/**' artifacts: - path: dist\* notifications: diff --git a/apertium/analysis/__init__.py b/apertium/analysis/__init__.py index b6e2e2b..b445b1d 100644 --- a/apertium/analysis/__init__.py +++ b/apertium/analysis/__init__.py @@ -40,5 +40,4 @@ def analyze(self, in_text, formatting='txt'): # type: (Analyzer, str, str) -> L def analyze(lang, in_text, formatting='txt'): # type: (str, str, str) -> List[LexicalUnit] analyzer = Analyzer(lang) - analyzed = analyzer.analyze(in_text, formatting) - return analyzed + return analyzer.analyze(in_text, formatting) diff --git a/apertium/generation/__init__.py b/apertium/generation/__init__.py index 36433b9..470fbfe 100644 --- a/apertium/generation/__init__.py +++ b/apertium/generation/__init__.py @@ -31,5 +31,4 @@ def generate(self, in_text, formatting='none'): # type: (Generator, str, str) - def generate(lang, in_text, formatting='none'): # type: (str, str, str) -> Union[str, List[str]] generator = Generator(lang) - generated = generator.generate(in_text, formatting) - return generated + return generator.generate(in_text, formatting) diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index ce766dc..d7f41b1 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -1,6 +1,5 @@ import re -import subprocess -from subprocess import Popen, PIPE +from subprocess import Popen, PIPE, CalledProcessError if False: from typing import List, Dict, Tuple, Union, Optional, NamedTuple # noqa: F401 @@ -36,7 +35,7 @@ def _get_format(self, format, deformat, reformat): def _check_ret_code(self, proc): # type: (Translator, Popen) -> None if proc.returncode != 0: - raise subprocess.CalledProcessError() # type: ignore + raise CalledProcessError() # type: ignore def _validate_formatters(self, deformat, reformat): # type: (Translator, Optional[str], Optional[str]) -> Tuple[Union[str, object], Union[str, object]] @@ -88,7 +87,7 @@ def translate(self, text, mark_unknown=False, format=None, deformat='txt', refor if '%s-%s' % tuple(map(to_alpha3_code, [self.l1, self.l2])) in apertium.pairs: # type: ignore pair = map(to_alpha3_code, [self.l1, self.l2]) else: - pass + raise apertium.ModeNotInstalled() if pair is not None: l1, l2 = pair @@ -99,12 +98,9 @@ def translate(self, text, mark_unknown=False, format=None, deformat='txt', refor output = execute(deformatted, cmds) result = self._get_reformat(str(reformater), output).strip() return result.decode() # type: ignore - else: - raise apertium.ModeNotInstalled() def translate(l1, l2, text, mark_unknown=False, format=None, deformat='txt', reformat='txt'): # type: (str, str, str, bool, Optional[str], str, str) -> str translator = apertium.Translator(l1, l2) - translated = translator.translate(text, mark_unknown, format, deformat, reformat) - return translated + return translator.translate(text, mark_unknown, format, deformat, reformat) From 0ccd937980afe992b1f409e27fc5528171dc45fa Mon Sep 17 00:00:00 2001 From: vagrant Date: Mon, 2 Jul 2018 10:18:42 +0000 Subject: [PATCH 27/32] imporoved repr --- apertium/__init__.py | 16 ++++++++-------- apertium/analysis/__init__.py | 6 ++++++ apertium/generation/__init__.py | 24 +++++++++++++++--------- apertium/translation/__init__.py | 3 +++ tests/__init__.py | 3 +-- 5 files changed, 33 insertions(+), 19 deletions(-) diff --git a/apertium/__init__.py b/apertium/__init__.py index fc8d119..4472b8d 100644 --- a/apertium/__init__.py +++ b/apertium/__init__.py @@ -12,8 +12,8 @@ class ModeNotInstalled(ValueError): pass -def update_modes(pair_path): # type: (str) -> None - modes = search_path(pair_path) +def update_modes(path): # type: (str) -> None + modes = search_path(path) if modes['pair']: for path, lang_src, lang_trg in modes['pair']: pairs['%s-%s' % (lang_src, lang_trg)] = path @@ -25,14 +25,14 @@ def update_modes(pair_path): # type: (str) -> None generators[lang_pair] = (dirpath, modename) -def append_pair_path(pair_path): # type: (str) -> None - pair_paths.append(pair_path) - update_modes(pair_path) +def append_path(path): # type: (str) -> None + paths.append(path) + update_modes(path) -pair_paths = ['/usr/share/apertium', '/usr/local/share/apertium'] +paths = ['/usr/share/apertium', '/usr/local/share/apertium'] analyzers = {} # type: Dict[str, Tuple[str, str]] generators = {} # type: Dict[str, Tuple[str, str]] pairs = {} # type: Dict[str, str] -for pair_path in pair_paths: - update_modes(pair_path) +for path in paths: + update_modes(path) diff --git a/apertium/analysis/__init__.py b/apertium/analysis/__init__.py index b445b1d..ee02259 100644 --- a/apertium/analysis/__init__.py +++ b/apertium/analysis/__init__.py @@ -16,6 +16,12 @@ def __init__(self, lang): # type: (Analyzer, str) -> None else: self.path, self.mode = apertium.analyzers[self.lang] + def __repr__(self): # type: (Analyzer) -> str + return 'Analyzer(lang=%s)' % self.lang + + def __str__(self): # type: (Analyzer) -> str + return ' List[List[str]] if self.lang not in self.analyzer_cmds: mode_path, mode = apertium.analyzers[self.lang] diff --git a/apertium/generation/__init__.py b/apertium/generation/__init__.py index 470fbfe..ed8197f 100644 --- a/apertium/generation/__init__.py +++ b/apertium/generation/__init__.py @@ -10,7 +10,19 @@ class Generator: def __init__(self, lang): # type: (Generator, str) -> None self.generator_cmds = {} # type: Dict[str, List[List[str]]] - self.lang = lang # type: str + self.lang = to_alpha3_code(lang) # type: str + if self.lang in apertium.generators: + self.path, self.mode = apertium.generators[self.lang] + self.commands = list(self._get_commands()) + else: + raise apertium.ModeNotInstalled(self.lang) + + def __repr__(self): # type: (Analyzer) -> str + return 'Generator(lang=%s)' % self.lang + + def __str__(self): # type: (Analyzer) -> str + + return '' % self.mode def _get_commands(self): # type: (Generator) -> List[List[str]] if self.lang not in self.generator_cmds: @@ -19,14 +31,8 @@ def _get_commands(self): # type: (Generator) -> List[List[str]] return self.generator_cmds[self.lang] def generate(self, in_text, formatting='none'): # type: (Generator, str, str) -> Union[str, List[str]] - self.lang = to_alpha3_code(self.lang) - - if self.lang in apertium.generators: - commands = list(self._get_commands()) - result = execute(in_text, commands) - return result.rstrip('\x00') - else: - raise apertium.ModeNotInstalled(self.lang) + result = execute(in_text, self.commands) + return result.rstrip('\x00') def generate(lang, in_text, formatting='none'): # type: (str, str, str) -> Union[str, List[str]] diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index d7f41b1..4295873 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -14,6 +14,9 @@ def __init__(self, l1, l2): # type: (Translator, str, str) -> None self.l1 = l1 self.l2 = l2 + def __repr__(self): + return 'Translator(pair=%s-%s)' % (self.l1, self.l2) + def _get_commands(self, l1, l2): # type: (Translator, str, str) -> List[List[str]] if (l1, l2) not in self.translation_cmds: mode_path = apertium.pairs['%s-%s' % (l1, l2)] diff --git a/tests/__init__.py b/tests/__init__.py index 3de7e36..0390b15 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -48,9 +48,8 @@ def test_generator_bare(self): self.assertEqual(lexical_units, 'cat') def test_generator_uninstalled_mode(self): - generator = apertium.Generator('spa') with self.assertRaises(apertium.ModeNotInstalled): - generator.generate('cat') + generator = apertium.Generator('spa') def test_single(self): wordform = apertium.generate('en', '^cat$') From 5d8c21ac9bde212e30eb69d8503d0e6a7013a2d3 Mon Sep 17 00:00:00 2001 From: vagrant Date: Mon, 2 Jul 2018 10:20:29 +0000 Subject: [PATCH 28/32] minor change --- apertium/analysis/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apertium/analysis/__init__.py b/apertium/analysis/__init__.py index ee02259..3bd4b87 100644 --- a/apertium/analysis/__init__.py +++ b/apertium/analysis/__init__.py @@ -20,7 +20,7 @@ def __repr__(self): # type: (Analyzer) -> str return 'Analyzer(lang=%s)' % self.lang def __str__(self): # type: (Analyzer) -> str - return '' % self.mode def _get_commands(self): # type: (Analyzer) -> List[List[str]] if self.lang not in self.analyzer_cmds: From 36c0b03a0ea957ab48dfc0f43e115a0884f4673f Mon Sep 17 00:00:00 2001 From: vagrant Date: Mon, 2 Jul 2018 10:25:41 +0000 Subject: [PATCH 29/32] Update README.md --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 95b1c4b..c574aa5 100644 --- a/README.md +++ b/README.md @@ -59,9 +59,16 @@ In [2]: apertium.append_pair_path('..') ### Translation Performing Translations +Method 1: ```python In [1]: import apertium In [2]: t = apertium.Translator('eng', 'spa') In [3]: t.translate('cats') Out[3]: 'Gatos' ``` +Method 2: +```python +In [1]: import apertium +In [2]: apertium.translate('eng', 'spa', 'I love you') +Out[2]: 'Te quieres' +``` \ No newline at end of file From 646b83172111f89eee5319c61cc57ff419000fa1 Mon Sep 17 00:00:00 2001 From: vagrant Date: Sun, 8 Jul 2018 09:56:06 +0000 Subject: [PATCH 30/32] improved repr --- apertium/generation/__init__.py | 6 +++--- apertium/translation/__init__.py | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/apertium/generation/__init__.py b/apertium/generation/__init__.py index ed8197f..d5016af 100644 --- a/apertium/generation/__init__.py +++ b/apertium/generation/__init__.py @@ -12,15 +12,15 @@ def __init__(self, lang): # type: (Generator, str) -> None self.generator_cmds = {} # type: Dict[str, List[List[str]]] self.lang = to_alpha3_code(lang) # type: str if self.lang in apertium.generators: - self.path, self.mode = apertium.generators[self.lang] + self.path, self.mode = apertium.generators[self.lang] self.commands = list(self._get_commands()) else: raise apertium.ModeNotInstalled(self.lang) - def __repr__(self): # type: (Analyzer) -> str + def __repr__(self): # type: (Generator) -> str return 'Generator(lang=%s)' % self.lang - def __str__(self): # type: (Analyzer) -> str + def __str__(self): # type: (Generator) -> str return '' % self.mode diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index 4295873..e60bb30 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -14,9 +14,12 @@ def __init__(self, l1, l2): # type: (Translator, str, str) -> None self.l1 = l1 self.l2 = l2 - def __repr__(self): + def __repr__(self): # type: (Translator) -> str return 'Translator(pair=%s-%s)' % (self.l1, self.l2) + def __str__(self): # type: (Translator) -> str + return '' % apertium.pairs['%s-%s' % (self.l1, self.l2)].split('/')[-1] + def _get_commands(self, l1, l2): # type: (Translator, str, str) -> List[List[str]] if (l1, l2) not in self.translation_cmds: mode_path = apertium.pairs['%s-%s' % (l1, l2)] @@ -91,7 +94,6 @@ def translate(self, text, mark_unknown=False, format=None, deformat='txt', refor pair = map(to_alpha3_code, [self.l1, self.l2]) else: raise apertium.ModeNotInstalled() - if pair is not None: l1, l2 = pair cmds = list(self._get_commands(l1, l2)) From ce3838fff3750e72c7bb7589fb536169bd8a2d37 Mon Sep 17 00:00:00 2001 From: vagrant Date: Mon, 9 Jul 2018 09:18:22 +0000 Subject: [PATCH 31/32] Tests for improved repr added --- README.md | 6 ------ tests/__init__.py | 24 ++++++++++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 609ee7a..0c0e906 100644 --- a/README.md +++ b/README.md @@ -59,22 +59,16 @@ In [2]: apertium.append_pair_path('..') ### Translation Performing Translations -<<<<<<< HEAD Method 1: -======= ->>>>>>> 674c1cfdf645ac6cafe4be3277e20d567a43b23d ```python In [1]: import apertium In [2]: t = apertium.Translator('eng', 'spa') In [3]: t.translate('cats') Out[3]: 'Gatos' ``` -<<<<<<< HEAD Method 2: ```python In [1]: import apertium In [2]: apertium.translate('eng', 'spa', 'I love you') Out[2]: 'Te quieres' ``` -======= ->>>>>>> 674c1cfdf645ac6cafe4be3277e20d567a43b23d diff --git a/tests/__init__.py b/tests/__init__.py index 0390b15..59b2a60 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -30,6 +30,14 @@ def test_uninstalled_mode(self): with self.assertRaises(apertium.ModeNotInstalled): analyzer = apertium.Analyzer('spa') + def test_repr(self): + analyzer = apertium.Analyzer('en') + self.assertEqual(repr(analyzer), 'Analyzer(lang=eng)') + + def test_str(self): + analyzer = apertium.Analyzer('en') + self.assertEqual(str(analyzer), '') + class TestGenerate(unittest.TestCase): def test_generator_single(self): @@ -67,6 +75,14 @@ def test_uninstalled_mode(self): with self.assertRaises(apertium.ModeNotInstalled): apertium.generate('spa', 'cat') + def test_repr(self): + generator = apertium.Generator('eng') + self.assertEqual(repr(generator), 'Generator(lang=eng)') + + def test_str(self): + generator = apertium.Generator('eng') + self.assertEqual(str(generator), '') + class TestTranslate(unittest.TestCase): def test_translator_en_spa(self): @@ -77,3 +93,11 @@ def test_translator_en_spa(self): def test_en_spa(self): translated = apertium.translate('eng', 'spa', 'cats') self.assertEqual(translated, 'Gatos') + + def test_repr(self): + translator = apertium.Translator('eng', 'spa') + self.assertEqual(repr(translator), 'Translator(pair=eng-spa)') + + def test_str(self): + translator = apertium.Translator('eng', 'spa') + self.assertEqual(str(translator), '') From d664cbae07108b39b89da445cff34349a0382dc3 Mon Sep 17 00:00:00 2001 From: vagrant Date: Wed, 11 Jul 2018 14:04:26 +0000 Subject: [PATCH 32/32] repr fix --- apertium/analysis/__init__.py | 10 ++++++++-- apertium/generation/__init__.py | 10 ++++++++-- apertium/translation/__init__.py | 10 ++++++++-- tests/__init__.py | 12 ++++++------ 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/apertium/analysis/__init__.py b/apertium/analysis/__init__.py index 3bd4b87..78a9885 100644 --- a/apertium/analysis/__init__.py +++ b/apertium/analysis/__init__.py @@ -17,10 +17,16 @@ def __init__(self, lang): # type: (Analyzer, str) -> None self.path, self.mode = apertium.analyzers[self.lang] def __repr__(self): # type: (Analyzer) -> str - return 'Analyzer(lang=%s)' % self.lang + """ + returns the representation of this Analyzer class object + """ + return "Analyzer(lang='%s')" % self.lang def __str__(self): # type: (Analyzer) -> str - return '' % self.mode + """ + returns the printable str representation of the object + """ + return "" % self.mode def _get_commands(self): # type: (Analyzer) -> List[List[str]] if self.lang not in self.analyzer_cmds: diff --git a/apertium/generation/__init__.py b/apertium/generation/__init__.py index da8d016..65dbf1c 100644 --- a/apertium/generation/__init__.py +++ b/apertium/generation/__init__.py @@ -18,10 +18,16 @@ def __init__(self, lang): # type: (Generator, str) -> None raise apertium.ModeNotInstalled(self.lang) def __repr__(self): # type: (Generator) -> str - return 'Generator(lang=%s)' % self.lang + """ + returns the representation of this Generator class object + """ + return "Generator(lang='%s')" % self.lang def __str__(self): # type: (Generator) -> str - return '' % self.mode + """ + returns the printable str representation of the object + """ + return "" % self.mode def _get_commands(self): # type: (Generator) -> List[List[str]] if self.lang not in self.generator_cmds: diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index 396a619..e3f1013 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -15,10 +15,16 @@ def __init__(self, l1, l2): # type: (Translator, str, str) -> None self.l2 = l2 def __repr__(self): # type: (Translator) -> str - return 'Translator(pair=%s-%s)' % (self.l1, self.l2) + """ + returns the representation of this Translator class object + """ + return "Translator(pair='%s-%s')" % (self.l1, self.l2) def __str__(self): # type: (Translator) -> str - return '' % apertium.pairs['%s-%s' % (self.l1, self.l2)].split('/')[-1] + """ + returns the printable str representation of the Translator object + """ + return "" % apertium.pairs['%s-%s' % (self.l1, self.l2)].split('/')[-1] def _get_commands(self, l1, l2): # type: (Translator, str, str) -> List[List[str]] if (l1, l2) not in self.translation_cmds: diff --git a/tests/__init__.py b/tests/__init__.py index 59b2a60..fa8df87 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -32,11 +32,11 @@ def test_uninstalled_mode(self): def test_repr(self): analyzer = apertium.Analyzer('en') - self.assertEqual(repr(analyzer), 'Analyzer(lang=eng)') + self.assertEqual(repr(analyzer), "Analyzer(lang='eng')") def test_str(self): analyzer = apertium.Analyzer('en') - self.assertEqual(str(analyzer), '') + self.assertEqual(str(analyzer), "") class TestGenerate(unittest.TestCase): @@ -77,11 +77,11 @@ def test_uninstalled_mode(self): def test_repr(self): generator = apertium.Generator('eng') - self.assertEqual(repr(generator), 'Generator(lang=eng)') + self.assertEqual(repr(generator), "Generator(lang='eng')") def test_str(self): generator = apertium.Generator('eng') - self.assertEqual(str(generator), '') + self.assertEqual(str(generator), "") class TestTranslate(unittest.TestCase): @@ -96,8 +96,8 @@ def test_en_spa(self): def test_repr(self): translator = apertium.Translator('eng', 'spa') - self.assertEqual(repr(translator), 'Translator(pair=eng-spa)') + self.assertEqual(repr(translator), "Translator(pair='eng-spa')") def test_str(self): translator = apertium.Translator('eng', 'spa') - self.assertEqual(str(translator), '') + self.assertEqual(str(translator), "")