Skip to content

Commit 6261ae9

Browse files
animalizezooba
authored andcommitted
bpo-32174: Let .chm document display non-ASCII characters properly (GH-9758)
Let .chm document display non-ASCII characters properly Escape the `body` part of .chm source file to 7-bit ASCII, to fix visual effect on some MBCS Windows systems.
1 parent 60d230c commit 6261ae9

File tree

3 files changed

+42
-1
lines changed

3 files changed

+42
-1
lines changed

Doc/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# ---------------------
1515

1616
extensions = ['sphinx.ext.coverage', 'sphinx.ext.doctest',
17-
'pyspecific', 'c_annotations']
17+
'pyspecific', 'c_annotations', 'escape4chm']
1818

1919
# General substitutions.
2020
project = 'Python'

Doc/tools/extensions/escape4chm.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
"""
2+
Escape the `body` part of .chm source file to 7-bit ASCII, to fix visual
3+
effect on some MBCS Windows systems.
4+
5+
https://bugs.python.org/issue32174
6+
"""
7+
8+
import re
9+
from html.entities import codepoint2name
10+
11+
# escape the characters which codepoint > 0x7F
12+
def _process(string):
13+
def escape(matchobj):
14+
codepoint = ord(matchobj.group(0))
15+
16+
name = codepoint2name.get(codepoint)
17+
if name is None:
18+
return '&#%d;' % codepoint
19+
else:
20+
return '&%s;' % name
21+
22+
return re.sub(r'[^\x00-\x7F]', escape, string)
23+
24+
def escape_for_chm(app, pagename, templatename, context, doctree):
25+
# only works for .chm output
26+
if not hasattr(app.builder, 'name') or app.builder.name != 'htmlhelp':
27+
return
28+
29+
# escape the `body` part to 7-bit ASCII
30+
body = context.get('body')
31+
if body is not None:
32+
context['body'] = _process(body)
33+
34+
def setup(app):
35+
# `html-page-context` event emitted when the HTML builder has
36+
# created a context dictionary to render a template with.
37+
app.connect('html-page-context', escape_for_chm)
38+
39+
return {'version': '1.0', 'parallel_read_safe': True}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
chm document displays non-ASCII charaters properly on some MBCS Windows
2+
systems.

0 commit comments

Comments
 (0)