Skip to content

Commit 446ae83

Browse files
committed
Release #302 and #313.
2 parents 6928782 + 541ff72 commit 446ae83

File tree

7 files changed

+82
-18
lines changed

7 files changed

+82
-18
lines changed

web_monitoring/diffing_server.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -307,16 +307,37 @@ def _extract_encoding(headers, content):
307307
prolog_match = XML_PROLOG_PATTERN.search(content, endpos=2048)
308308
if prolog_match:
309309
encoding = prolog_match.group(1).decode('ascii', errors='ignore')
310+
# Handle common mistakes and errors in encoding names
311+
if encoding == 'iso-8559-1':
312+
encoding = 'iso-8859-1'
313+
# Windows-1252 is so commonly mislabeled, WHATWG recommends assuming it's a
314+
# mistake: https://encoding.spec.whatwg.org/#names-and-labels
315+
if encoding == 'iso-8859-1' and 'html' in content_type:
316+
encoding = 'windows-1252'
310317
return encoding
311318

312319

313320
def _decode_body(response, name, raise_if_binary=True):
314321
encoding = _extract_encoding(response.headers, response.body) or 'UTF-8'
315-
text = response.body.decode(encoding, errors='replace')
322+
try:
323+
text = response.body.decode(encoding, errors='replace')
324+
except LookupError:
325+
# If the encoding we found isn't known, fall back to ascii
326+
text = response.body.decode('ascii', errors='replace')
327+
328+
text_length = len(text)
329+
if text_length == 0:
330+
return text
331+
332+
# Replace null terminators; some differs (especially those written in C)
333+
# don't handle them well in the middle of a string.
334+
text = text.replace('\u0000', '\ufffd')
335+
316336
# If a significantly large portion of the document was totally undecodable,
317337
# it's likely this wasn't text at all, but binary data.
318-
if raise_if_binary and text.count('\ufffd') / len(text) > 0.25:
338+
if raise_if_binary and text.count('\ufffd') / text_length > 0.25:
319339
raise UndecodableContentError(f'The response body of `{name}` could not be decoded as {encoding}.')
340+
320341
return text
321342

322343

web_monitoring/html_diff_render.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -299,10 +299,13 @@ def html_diff_render(a_text, b_text, a_headers=None, b_headers=None,
299299

300300
color_palette = get_color_palette()
301301
change_styles.string = f'''
302-
ins, ins > * {{text-decoration: none; background-color:
303-
{color_palette['differ_insertion']};}}
304-
del, del > * {{text-decoration: none; background-color:
305-
{color_palette['differ_deletion']};}}'''
302+
ins.wm-diff, ins.wm-diff > * {{background-color:
303+
{color_palette['differ_insertion']} !important;
304+
all: unset;}}
305+
del.wm-diff, del.wm-diff > * {{background-color:
306+
{color_palette['differ_deletion']} !important;
307+
all: unset;}}
308+
script {{display: none !important;}}'''
306309
soup.head.append(change_styles)
307310

308311
soup.body.replace_with(diff_body)
@@ -351,9 +354,9 @@ def _html_for_dmp_operation(operation):
351354
"Convert a diff-match-patch operation to an HTML string."
352355
html_value = html.escape(operation[1])
353356
if operation[0] == -1:
354-
return f'<del>{html_value}</del>'
357+
return f'<del class="wm-diff">{html_value}</del>'
355358
elif operation[0] == 1:
356-
return f'<ins>{html_value}</ins>'
359+
return f'<ins class="wm-diff">{html_value}</ins>'
357360
else:
358361
return html_value
359362

@@ -1118,7 +1121,7 @@ def merge_changes(change_chunks, doc, tag_type='ins'):
11181121

11191122
doc.append(f'</{tag_type}>')
11201123
doc.append(chunk)
1121-
doc.append(f'<{tag_type}>')
1124+
doc.append(f'<{tag_type} class="wm-diff">')
11221125

11231126
# other side of the malformed document case from above
11241127
current_content.reverse()
@@ -1144,7 +1147,7 @@ def merge_changes(change_chunks, doc, tag_type='ins'):
11441147
inline_tag_name = name
11451148

11461149
if depth == 0:
1147-
doc.append(f'<{tag_type}>')
1150+
doc.append(f'<{tag_type} class="wm-diff">')
11481151
depth += 1
11491152
current_content = []
11501153

@@ -1389,7 +1392,7 @@ def merge_change_groups(change_chunks, doc, tag_type=None):
13891392
# doc.append(group)
13901393
# <end> not sure if we should break the group
13911394
if tag_type:
1392-
group.append(f'<{tag_type}>')
1395+
group.append(f'<{tag_type} class="wm-diff">')
13931396

13941397
# other side of the malformed document case from above
13951398
current_content.reverse()
@@ -1420,7 +1423,7 @@ def merge_change_groups(change_chunks, doc, tag_type=None):
14201423
group = []
14211424
doc.append(group)
14221425
if tag_type:
1423-
group.append(f'<{tag_type}>')
1426+
group.append(f'<{tag_type} class="wm-diff">')
14241427
depth += 1
14251428
current_content = []
14261429

@@ -1582,7 +1585,7 @@ def tag_info(token):
15821585
# if we have a hanging delete buffer (with content, not just HTML
15831586
# DOM structure), clean it up and insert it before moving on.
15841587
# FIXME: this should not look explicitly for `<del>`
1585-
if '<del>' in delete_buffer:
1588+
if '<del class="wm-diff">' in delete_buffer:
15861589
for tag in delete_tag_stack:
15871590
delete_buffer.append(f'</{tag[0]}>')
15881591
document.extend(delete_buffer)
@@ -1627,7 +1630,7 @@ def tag_info(token):
16271630
# Add any hanging buffer of deletes that never got completed, but only if
16281631
# it has salient changes in it.
16291632
# FIXME: this should not look explicitly for `<del>`
1630-
if '<del>' in delete_buffer:
1633+
if '<del class="wm-diff">' in delete_buffer:
16311634
for tag in delete_tag_stack:
16321635
delete_buffer.append(f'</{tag[0]}>')
16331636
document.extend(delete_buffer)

web_monitoring/links_diff.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,11 @@ def links_diff_html(a_text, b_text, a_headers=None, b_headers=None,
125125
background-color: {color_palette['differ_insertion']};}}
126126
[wm-deleted] > td {{
127127
background-color: {color_palette['differ_deletion']};}}
128-
ins {{ text-decoration: none;
129-
background-color: {color_palette['differ_insertion']};}}
130-
del {{ text-decoration: none;
131-
background-color: {color_palette['differ_deletion']};}}"""
128+
ins.wm-diff {{ background-color: {color_palette['differ_insertion']}
129+
!important; all: unset;}}
130+
del.wm-diff {{ background-color: {color_palette['differ_deletion']}
131+
!important; all: unset;}}
132+
script {{display: none !important;}}"""
132133

133134
soup.head.append(change_styles)
134135
soup.title.string = get_title(diff['b_parsed'])

web_monitoring/tests/fixtures/empty.txt

Whitespace-only changes.
27 Bytes
Binary file not shown.
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
2+
<html>
3+
<head>
4+
<meta http-equiv="refresh" content="60; url=http://radar2pub.bldr.ncep.noaa.gov">
5+
<meta http-equiv="Content-Type" content="text/html; charset=who-knows-what-i-am">
6+
<meta name="Author" content="David S Alden (GMSI)">
7+
<title>NWS Level II Radar Receive Status</title>
8+
<link rel="stylesheet" type="text/css" href="mon.css">
9+
</head>
10+
<body>
11+
<font face="arial,helvetica" size=+2><b>NWS Level II Radar Receive Status as of Tue Jan 17 14:14:06 UTC 2017</b></font><br>
12+
<br>Key: Green=Up (Lvl2&lt;5 min); Yellow=Warning (5&lt;=Lvl2&lt;30 min); Orange=Down (Lvl2&amp;Lvl3&gt;10 min); Red=Down (Lvl2&gt;=30 min)<br>
13+
SiteID: Black=Ok (-1&lt;=Latency&lt;=60 sec); White=Anomaly (-1&gt;Latency&gt;60 sec)<br>
14+
SiteCodes: 01=Legacy Msg1; 02=Legacy Msg31; 03=Super-Res; 04=Recombined; 05=DP w/o SuperRes; 06=DP w Super Res; 07=DP Recombined<br><br>
15+
<p><b><font face="arial,helvetica">Eastern Region Radar Sites - Last receipt of data</font></b>
16+
<table BORDER=0 CELLSPACING=4><tr>
17+
<td ALIGN=CENTER BGCOLOR="#33FF33" class=black id=blacklink TITLE="Wakefield VA" VALIGN=middle><b><A HREF="site/kakq.html" TARGET="_blank">KAKQ</a></b><span class=black>06<br>14:13:02</span></td>
18+
<td ALIGN=CENTER BGCOLOR="#33FF33" class=black id=blacklink TITLE="Binghamton NY" VALIGN=middle><b><A HREF="site/kbgm.html" TARGET="_blank">KBGM</a></b><span class=black>06<br>14:13:03</span></td>
19+
<td ALIGN=CENTER BGCOLOR="#FF0000" class=black id=blacklink TITLE="Boston MA" VALIGN=middle><b><A HREF="site/kbox.html" TARGET="_blank">KBOX</a></b><span class=black>06<br>13:12:47</span></td>
20+
<td ALIGN=CENTER BGCOLOR="#33FF33" class=black id=blacklink TITLE="Buffalo NY" VALIGN=middle><b><A HREF="site/kbuf.html" TARGET="_blank">KBUF</a></b><span class=black>06<br>14:13:04</span></td>
21+
</tr></table>
22+
<font face="Arial,Helvetica">44 sites up (97.8%) of 45 radar sites monitored</font>
23+
<p><font face="arial,helvetica"> 151 sites up (96%) of 158 total radar sites monitored</font>
24+
</body>
25+
</html>

web_monitoring/tests/test_diffing_server_exc_handling.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,10 @@ def test_cors_origin_header(self):
188188
'Origin': 'http://two.com'})
189189
assert response.headers.get('Access-Control-Allow-Origin') == 'http://two.com'
190190

191+
def test_decode_empty_bodies(self):
192+
response = mock_tornado_request('empty.txt')
193+
df._decode_body(response, 'a')
194+
191195
def test_poorly_encoded_content(self):
192196
response = mock_tornado_request('poorly_encoded_utf8.txt')
193197
df._decode_body(response, 'a')
@@ -204,6 +208,16 @@ def test_fetch_undecodable_content(self):
204208
self.json_check(response)
205209
assert response.code == 422
206210

211+
def test_treats_unknown_encoding_as_ascii(self):
212+
response = mock_tornado_request('unknown_encoding.html')
213+
df._decode_body(response, 'a')
214+
215+
def test_diff_content_with_null_bytes(self):
216+
response = self.fetch('/html_source_dmp?format=json&'
217+
f'a=file://{fixture_path("has_null_byte.txt")}&'
218+
f'b=file://{fixture_path("has_null_byte.txt")}')
219+
assert response.code == 200
220+
207221

208222
def mock_diffing_method(c_body):
209223
return

0 commit comments

Comments
 (0)