Skip to content

Commit 82d0498

Browse files
committed
v0.2.7b1
1 parent a8b1f65 commit 82d0498

File tree

3 files changed

+103
-32
lines changed

3 files changed

+103
-32
lines changed

translator/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from translator.translate import Translator
22
from translator.language import get_nllb_lang
33

4-
__version__ = "0.2.6b4"
4+
__version__ = "0.2.7b1"
55

66
LANGS = get_nllb_lang()

translator/language.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
from langcodes import closest_supported_match
3+
from langcodes.tag_parser import LanguageTagError
34

45
_LANGS = [
56
"ace_Arab", "ace_Latn", "acm_Arab", "acq_Arab", "aeb_Arab", "afr_Latn", "ajp_Arab", "aka_Latn", "amh_Ethi", "apc_Arab", "arb_Arab",
@@ -27,7 +28,10 @@ def get_nllb_lang(lang = None):
2728
if not lang:
2829
return _LANGS
2930
else:
30-
return closest_supported_match(lang, _LANGS)
31+
try:
32+
return closest_supported_match(lang, _LANGS)
33+
except LanguageTagError as lte:
34+
return lang
3135

3236
def get_sys_lang_format():
3337
i18n = os.environ.get('LANG', "en_EN.UTF-8").split(".")[0]

translator/main.py

Lines changed: 97 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
def parse_arguments():
1818
argument_parse = ArgumentParser(description="Translate [FROM one language] [TO another], [any SENTENCE you would like].")
1919
argument_parse.add_argument('-v', '--version', action='store_true', help="shows the current version of translator")
20-
argument_parse.add_argument('_from', nargs='?', default=["eng_Latn"], help="Source language to translate from.")
21-
argument_parse.add_argument('_to', nargs='?', default=[get_sys_lang_format()], help="Target language to translate towards.")
22-
argument_parse.add_argument('sentences', nargs="*", default=["Translator version:"], help="Sentences to translate.")
20+
argument_parse.add_argument('_from', nargs='?', default=[], help="Source language to translate from.")
21+
argument_parse.add_argument('_to', nargs='?', default=[], help="Target language to translate towards.")
22+
argument_parse.add_argument('sentences', nargs="*", default=[], help="Sentences to translate.")
2323
argument_parse.add_argument('-d', '--directory', type=str, help="Path to directory to translate in batch instead of unique sentence.")
2424
argument_parse.add_argument('-S', '--save', type=str, help="Path to text file to save translations.")
2525
argument_parse.add_argument('-l', '--max_length', default=500, help="Max length of output.")
@@ -40,35 +40,100 @@ def main():
4040

4141
spinner = Halo(spinner="dots12")
4242

43-
if args.language_list:
44-
print("Language list:")
45-
for l in get_nllb_lang():
46-
print(f"- {l}")
47-
print()
48-
sys.exit(0)
43+
if args.version:
44+
_version = "Translator version:"
45+
_lang = "eng_Latn"
4946

50-
_from, _to = "".join(args._from), "".join(args._to)
47+
_to = args._to or get_sys_lang_format()
5148

52-
if args.version:
53-
if _from == _to == "eng_Latn":
54-
print(f"Translator version: {__version__}")
49+
if _to == _lang:
50+
spinner.info(f"{_version} {__version__}")
5551
else:
5652
spinner.start()
57-
translator = Translator(_from, _to, args.max_length, args.model_id, args.pipeline)
58-
version = translate_sentence(args.sentence, translator)
53+
translator = Translator(_lang, _to, args.max_length, args.model_id, args.pipeline)
54+
version = translate_sentence(_version, translator)
5955
spinner.stop()
60-
print(version[0], " ", __version__)
56+
spinner.info(f"{version[0]} {__version__}")
57+
sys.exit(0)
58+
59+
if args.language_list:
60+
spinner.info("Language list:")
61+
if args.model_id == "facebook/nllb-200-distilled-600M":
62+
for l in get_nllb_lang(): print(f"- {l}")
63+
else:
64+
raise NotImplementedError(f"{model_id=} language list not implemented.")
65+
print()
6166
sys.exit(0)
6267

68+
_from, _to, _sentences = "".join(args._from), "".join(args._to), args.sentences
69+
70+
if _from and _to and not _sentences:
71+
if _to not in get_nllb_lang() and _to == get_nllb_lang(_to):
72+
_sentences = [args._to]
73+
_to = get_sys_lang_format()
74+
spinner.info(f"Target language was not provided. Translating to \'{_to}\'.")
75+
elif not args.directory:
76+
spinner.fail(f"Missing sentences to translate.")
77+
sys.exit(1)
78+
79+
if not _to:
80+
if not args.directory:
81+
spinner.fail(f"Missing \'_to\' argument.")
82+
print("Please choose a target language or at least give a sentence or a directory to translate.")
83+
print("Type \'translate --help\' to get help.")
84+
sys.exit(1)
85+
else:
86+
_to = get_sys_lang_format()
87+
spinner.info(f"Target language was not provided. Translating to \'{_to}\'.")
88+
89+
if not _from:
90+
spinner.fail(f"Missing \'_from\' argument.")
91+
print("Please provide at least a source language.")
92+
sys.exit(1)
93+
6394
for _lang in [_from, _to]:
6495
if _lang not in get_nllb_lang() and args.model_id == "facebook/nllb-200-distilled-600M":
65-
print(f"Warning! {_lang=} is not in listed as supported by the current model.")
66-
print("There is a high probability translation operations will fail.")
96+
spinner.warn(f"Warning! {_lang} is not listed as supported language by the current model {args.model_id}.")
97+
print("There is a high probability translation will fail.")
6798
print("Type translate --language_list to get the full list of supported languages.")
68-
print("Or type translate --help to get help.")
99+
print("Or type \'translate --help\' to get help.")
69100
_nllb_lang = get_nllb_lang(_lang)
70-
print(f"Using {_nllb_lang} instead of {_lang}.")
71-
101+
if _lang == _from:
102+
_from = _nllb_lang
103+
elif _lang == _to:
104+
_to = _nllb_lang
105+
spinner.info(f"Using {_nllb_lang} instead of {_lang}.")
106+
107+
if _from == _to:
108+
spinner.warn(f"Warning! {_from=} == {_to=} ")
109+
print("Translating to the same language is computationally wasteful for no valid reason.")
110+
spinner.info("Using Hitchens's razor to shortcut translation.")
111+
if not args.directory:
112+
if not args.save:
113+
for sentence in _sentences: print(sentence)
114+
else:
115+
if not Path(args.save).exists():
116+
utils.save_txt(_sentences, Path(args.save))
117+
else:
118+
print(f"{args.save} exists already.")
119+
print("Please mind the following fact:")
120+
print("Translated sentences will be added at the end of the file.")
121+
utils.save_txt(_sentences, Path(args.save), append=True)
122+
else:
123+
txt_files = list(set(utils.glob_files_from_dir(args.directory, suffix=".txt")) - set([args.save, f"{args.directory}/{args.save}"]) - set(utils.glob_files_from_dir(f"{args.save.replace('.txt', f'.{_from}.{_to}.tmp.cache')}", suffix="*")))
124+
if not txt_files:
125+
spinner.fail(f"No files to translate in \'{args.directory}\'.")
126+
sys.exit(1)
127+
if args.save:
128+
with open(args.save, 'w') as outfile:
129+
for fname in txt_files:
130+
with open(fname) as infile:
131+
for line in infile:
132+
outfile.write(line)
133+
else:
134+
for fname in txt_files:
135+
with open(fname) as infile: print(infile.read())
136+
sys.exit(0)
72137

73138
spinner.info("Preparing to translate...")
74139
spinner.start()
@@ -106,7 +171,7 @@ def main():
106171
txt_files = list(set(utils.glob_files_from_dir(source_path, suffix=".txt")) - set([output_path, f"{source_path}/{output_path}"]) - set(utils.glob_files_from_dir(cache, suffix="*")))
107172
_l = len(txt_files)
108173
if _l == 0:
109-
spinner.info("No files to translate.")
174+
spinner.fail(f"No files to translate in \'{source_path}\'.")
110175
sys.exit(1)
111176
spinner.info(f"Found {_l} text file{'s' if _l > 1 else ''}.")
112177
spinner.stop()
@@ -177,13 +242,13 @@ def main():
177242
time_after_3 = time.perf_counter()
178243
_td_3 = time_after_3 - time_before_3
179244
spinner.text = ""
180-
spinner.info("Translation completed.")
245+
spinner.succeed("Translation completed.")
181246
spinner.info(f"Took {_td_3:.1f} second(s) to translate {_ut_ds:n} sentences.")
182247

183248
# Report translation
184249
time_after = time.perf_counter()
185250
_td = time_after - time_before
186-
spinner.info(f"All files in {args.directory} have been translated from {_from} to {_to}.")
251+
spinner.succeed(f"All files in {args.directory} have been translated from {_from} to {_to}.")
187252
_sgb = _ut_ds >> 30
188253
if _sgb > 0:
189254
spinner.info(f"Took {_td:.1f} second(s) to translate over {_sgb} GB (~ {float(_ut_ds >> 27)/_td:.1f} Gb/s).")
@@ -202,25 +267,27 @@ def main():
202267
p.parent.mkdir(parents=True, exist_ok=True)
203268
utils.save_txt(_translated, p)
204269
utils.save_txt(translations, Path(output_path))
205-
spinner.info("Partial translation has been saved.")
270+
spinner.succeed(f"Partial translation has been saved under {output_path}.")
206271
sys.exit(1)
207272
else:
208-
translation = translate_sentence(args.sentences, translator)
273+
translation = translate_sentence(_sentences, translator)
209274
for t in translation: print(t)
210275
translations.append(translation)
211276

212277
if args.save:
213278
if not Path(args.save).exists():
214279
utils.save_txt(translations, Path(args.save))
215280
else:
216-
print(f"{args.save} exists already.")
217-
print("Please mind the following fact:")
218-
print("Translated sentences will be added at the end of the file.")
281+
spinner.warn(f"{args.save} exists already.")
282+
spinner.info("Translated sentences will be added at the end of the file.")
219283
utils.save_txt(translations, Path(args.save), append=True)
220284

221285
if __name__ == "__main__":
222286
try:
223287
main()
224288
sys.exit(0)
289+
except NotImplementedError as e:
290+
print(str(e))
291+
sys.exit(2)
225292
except KeyboardInterrupt:
226-
sys.exit(1)
293+
sys.exit(1)

0 commit comments

Comments
 (0)