Style tweaks

acabal · acabal · commit 64324b3cd48a · 2017-12-10T17:05:40.000-06:00
diff --git a/clean b/clean
@@ -16,14 +16,14 @@ def main():
 	parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML or SVG file, or a directory containing XHTML or SVG files")
 	args = parser.parse_args()
 
-	#Check for required utilities
+	# Check for required utilities
 	xmllint_path = shutil.which("xmllint")
 
 	if xmllint_path is None:
 		se.print_error("Couldn't locate xmllint. Is it installed?")
 		exit(1)
 
-	#Tell xmllint to indent with tabs using an environmental variable
+	# Tell xmllint to indent with tabs using an environmental variable
 	env = os.environ.copy()
 	env["XMLLINT_INDENT"] = "\t"
 
@@ -44,7 +44,7 @@ def main():
 				target_filenames.add(target)
 
 		for filename in target_filenames:
-			#If we're cleaning a directory and setting single lines, skip the colophon, which has special spacing
+			# If we're cleaning a directory and setting single lines, skip the colophon, which has special spacing
 			if args.single_lines and filename.endswith("colophon.xhtml") and os.path.isdir(target):
 				continue
 
@@ -56,15 +56,15 @@ def main():
 					processed_xhtml = processed_xhtml.replace("\n", " ")
 					processed_xhtml = regex.sub(r"\s+", " ", processed_xhtml)
 
-				#Epub3 doesn't allow named entities, so convert them to their unicode equivalents
-				#But, don't unescape the content.opf long-description accidentally
+				# Epub3 doesn't allow named entities, so convert them to their unicode equivalents
+				# But, don't unescape the content.opf long-description accidentally
 				if not filename.endswith("content.opf"):
 					processed_xhtml = html.unescape(processed_xhtml).replace("&", "&amp;")
 
-				#Remove unnecessary doctypes which can cause xmllint to hang
+				# Remove unnecessary doctypes which can cause xmllint to hang
 				processed_xhtml = regex.sub(r"<!DOCTYPE[^>]+?>", "", processed_xhtml, flags=regex.MULTILINE | regex.DOTALL)
 
-				#First, canonicalize XHTML
+				# First, canonicalize XHTML
 				result = subprocess.run([xmllint_path, "--c14n", "-"], input=processed_xhtml.encode(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 				processed_xhtml = result.stdout.decode()
 				error = result.stderr.decode().strip()
@@ -73,10 +73,10 @@ def main():
 					se.print_error("Couldn't parse {}; files must be in XHTML format, which is not the same as HTML\n{}".format(filename, error.replace("-:", "Line ")))
 					exit(1)
 
-				#Next, add the XML header that xmllint stripped during c14n
+				# Next, add the XML header that xmllint stripped during c14n
 				processed_xhtml = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + processed_xhtml
 
-				#Next, pretty-print XML
+				# Next, pretty-print XML
 				processed_xhtml = subprocess.run([xmllint_path, "--format", "-"], input=processed_xhtml.encode(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env).stdout.decode()
 
 				if processed_xhtml != xhtml:
diff --git a/interactive-sr b/interactive-sr
@@ -17,17 +17,17 @@ EOF
 }
 require(){ command -v $1 > /dev/null 2>&1 || { suggestion=""; if [ ! -z "$2" ]; then suggestion=" $2"; fi; die "$1 is not installed.${suggestion}"; } }
 if [ $# -eq 1 ]; then if [ "$1" = "--help" -o "$1" = "-h" ]; then usage; fi fi
-#End boilerplate
+# End boilerplate
 
 require "vim" "Try: apt-get install vim"
 
-#Store the regex
+# Store the regex
 regex="$1"
 
-#Remove the regex from the list of arguments
+# Remove the regex from the list of arguments
 shift
 
-#'set title' shows the filename in the terminal title
-#'set eventignore-=Syntax' enables syntax highlighting in all files
-#'wqa writes and quits all buffers
+# 'set title' shows the filename in the terminal title
+# 'set eventignore-=Syntax' enables syntax highlighting in all files
+# 'wqa writes and quits all buffers
 vim "+silent set title" "+silent bufdo set eventignore-=Syntax | %s${regex}gce | silent update" "+silent qa" $@
diff --git a/lint b/lint
@@ -36,7 +36,7 @@ def main():
 		if args.verbose or (messages and len(args.directories) > 1):
 			print(colored(se_epub.directory, "white", attrs=["reverse"]))
 
-		#Print the table
+		# Print the table
 		if messages:
 			for message in messages:
 				if message.is_submessage:
diff --git a/modernize-spelling b/modernize-spelling
@@ -7,64 +7,6 @@ import regex
 import se
 
 
-DICTIONARY_FILE_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", "words")
-
-
-def main():
-	parser = argparse.ArgumentParser(description="Modernize spelling of some archaic words, and replace words that may be archaically compounded with a dash to a more modern spelling.  For example, replace \"ash-tray\" with \"ashtray\".")
-	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
-	parser.add_argument("-n", "--no-hyphens", dest="modernize_hyphenation", action="store_false", help="don't modernize hyphenation")
-	parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files")
-	args = parser.parse_args()
-
-	try:
-		dictionary = set(line.strip().lower() for line in open(DICTIONARY_FILE_PATH))
-	except Exception:
-		se.print_error("Couldn't open words file at {}".format(DICTIONARY_FILE_PATH))
-		exit(1)
-
-	for target in args.targets:
-		target = os.path.abspath(target)
-
-		if args.verbose:
-			print("Processing {} ...".format(target), end="", flush=True)
-
-		target_filenames = set()
-		if os.path.isdir(target):
-			for root, _, filenames in os.walk(target):
-				for filename in fnmatch.filter(filenames, "*.xhtml"):
-					target_filenames.add(os.path.join(root, filename))
-		else:
-			target_filenames.add(target)
-
-
-		for filename in target_filenames:
-			with open(filename, "r+", encoding="utf-8") as file:
-				xhtml = file.read()
-				new_xhtml = xhtml
-
-				# What language are we using?
-				language = regex.search(r"<html[^>]+?xml:lang=\"([^\"]+)\"", xhtml)
-				if language is None or (language.group(1) != "en-US" and language.group(1) != "en-GB"):
-					if args.verbose:
-						print("\n\t", end="", flush=True)
-					se.print_error("No valid xml:lang attribute in <html> root.  Only en-US and en-GB are supported. File: {}".format(filename))
-					exit(1)
-
-				new_xhtml = modernize_spelling(new_xhtml, language.group(1))
-
-				if args.modernize_hyphenation:
-					new_xhtml = modernize_hyphenation(new_xhtml, dictionary)
-
-				if new_xhtml != xhtml:
-					file.seek(0)
-					file.write(new_xhtml)
-					file.truncate()
-
-		if args.verbose:
-			print(" OK")
-
-
 def modernize_hyphenation(xhtml, dictionary):
 	# Easy fix for a common case
 	xhtml = regex.sub(r"\b([Nn])ow-a-days\b", r"\1owadays", xhtml)			# now-a-days -> nowadays
@@ -86,12 +28,11 @@ def modernize_hyphenation(xhtml, dictionary):
 
 	return xhtml
 
-
 def modernize_spelling(xhtml, language):
 	# ADDING NEW WORDS TO THIS LIST:
 	# A good way to check if a word is "archaic" is to do a Google N-Gram search: https://books.google.com/ngrams/graph?case_insensitive=on&year_start=1800&year_end=2000&smoothing=3
 	# Remember that en-US and en-GB differ significantly, and just because a word might seem strange to you, doesn't mean it's not the common case in the other variant.
-	# If Google N-Gram shows that a word has declined significantly in usage in BOTH en-US and en-GB (or the SE editor makes an exception) then it may be a good candidate to add to this list.
+	# If Google N-Gram shows that a word has declined significantly in usage in BOTH en-US and en-GB (or the SE editor-in-chief makes an exception) then it may be a good candidate to add to this list.
 
 	xhtml = regex.sub(r"\b([Dd])evelope\b", r"\1evelop", xhtml)			# develope -> develop
 	xhtml = regex.sub(r"\b([Oo])ker\b", r"\1cher", xhtml)				# oker -> ocher
@@ -200,6 +141,62 @@ def modernize_spelling(xhtml, language):
 
 	return xhtml
 
+def main():
+	parser = argparse.ArgumentParser(description="Modernize spelling of some archaic words, and replace words that may be archaically compounded with a dash to a more modern spelling.  For example, replace \"ash-tray\" with \"ashtray\".")
+	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
+	parser.add_argument("-n", "--no-hyphens", dest="modernize_hyphenation", action="store_false", help="don't modernize hyphenation")
+	parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files")
+	args = parser.parse_args()
+
+	dictionary_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data", "words")
+
+	try:
+		dictionary = set(line.strip().lower() for line in open(dictionary_file_path))
+	except Exception:
+		se.print_error("Couldn't open words file at {}".format(dictionary_file_path))
+		exit(1)
+
+	for target in args.targets:
+		target = os.path.abspath(target)
+
+		if args.verbose:
+			print("Processing {} ...".format(target), end="", flush=True)
+
+		target_filenames = set()
+		if os.path.isdir(target):
+			for root, _, filenames in os.walk(target):
+				for filename in fnmatch.filter(filenames, "*.xhtml"):
+					target_filenames.add(os.path.join(root, filename))
+		else:
+			target_filenames.add(target)
+
+
+		for filename in target_filenames:
+			with open(filename, "r+", encoding="utf-8") as file:
+				xhtml = file.read()
+				new_xhtml = xhtml
+
+				# What language are we using?
+				language = regex.search(r"<html[^>]+?xml:lang=\"([^\"]+)\"", xhtml)
+				if language is None or (language.group(1) != "en-US" and language.group(1) != "en-GB"):
+					if args.verbose:
+						print("\n\t", end="", flush=True)
+					se.print_error("No valid xml:lang attribute in <html> root.  Only en-US and en-GB are supported. File: {}".format(filename))
+					exit(1)
+
+				new_xhtml = modernize_spelling(new_xhtml, language.group(1))
+
+				if args.modernize_hyphenation:
+					new_xhtml = modernize_hyphenation(new_xhtml, dictionary)
+
+				if new_xhtml != xhtml:
+					file.seek(0)
+					file.write(new_xhtml)
+					file.truncate()
+
+		if args.verbose:
+			print(" OK")
+
 
 if __name__ == "__main__":
 	main()
diff --git a/reading-ease b/reading-ease
@@ -17,7 +17,7 @@ def get_word_count(text):
 	return len(text.split())
 
 def get_syllable_count(word):
-	#see http://eayd.in/?p=232
+	# See http://eayd.in/?p=232
 	exception_add = ["serious", "crucial"]
 	exception_del = ["fortunately", "unfortunately"]
 
@@ -26,15 +26,15 @@ def get_syllable_count(word):
 
 	pre_one = ["preach"]
 
-	syls = 0 #added syllable number
-	disc = 0 #discarded syllable number
+	syls = 0 # Added syllable number
+	disc = 0 # Discarded syllable number
 
-	#1) if letters < 3: return 1
+	# 1) if letters < 3: return 1
 	if len(word) <= 3:
 		syls = 1
 		return syls
 
-	#2) if doesn't end with "ted" or "tes" or "ses" or "ied" or "ies", discard "es" and "ed" at the end.
+	# 2) if doesn't end with "ted" or "tes" or "ses" or "ied" or "ies", discard "es" and "ed" at the end.
 	# if it has only 1 vowel or 1 set of consecutive vowels, discard. (like "speed", "fled" etc.)
 	if word[-2:] == "es" or word[-2:] == "ed":
 		double_and_triple_1 = len(regex.findall(r"[eaoui][eaoui]", word))
@@ -44,7 +44,7 @@ def get_syllable_count(word):
 			else:
 				disc += 1
 
-	#3) discard trailing "e", except where ending is "le"
+	# 3) discard trailing "e", except where ending is "le"
 	le_except = ["whole", "mobile", "pole", "male", "female", "hale", "pale", "tale", "sale", "aisle", "whale", "while"]
 
 	if word[-1:] == "e":
@@ -54,45 +54,45 @@ def get_syllable_count(word):
 		else:
 			disc += 1
 
-	#4) check if consecutive vowels exists, triplets or pairs, count them as one.
+	# 4) check if consecutive vowels exists, triplets or pairs, count them as one.
 	double_and_triple = len(regex.findall(r"[eaoui][eaoui]", word))
 	tripple = len(regex.findall(r"[eaoui][eaoui][eaoui]", word))
 	disc += double_and_triple + tripple
 
-	#5) count remaining vowels in word.
+	# 5) count remaining vowels in word.
 	num_vowels = len(regex.findall(r"[eaoui]", word))
 
-	#6) add one if starts with "mc"
+	# 6) add one if starts with "mc"
 	if word[:2] == "mc":
 		syls += 1
 
-	#7) add one if ends with "y" but is not surrouned by vowel
+	# 7) add one if ends with "y" but is not surrouned by vowel
 	if word[-1:] == "y" and word[-2] not in "aeoui":
 		syls += 1
 
-	#8) add one if "y" is surrounded by non-vowels and is not in the last word.
+	# 8) add one if "y" is surrounded by non-vowels and is not in the last word.
 	for i, j in enumerate(word):
 		if j == "y":
 			if (i != 0) and (i != len(word) - 1):
 				if word[i - 1] not in "aeoui" and word[i + 1] not in "aeoui":
 					syls += 1
 
-	#9) if starts with "tri-" or "bi-" and is followed by a vowel, add one.
+	# 9) if starts with "tri-" or "bi-" and is followed by a vowel, add one.
 	if word[:3] == "tri" and word[3] in "aeoui":
 		syls += 1
 
 	if word[:2] == "bi" and word[2] in "aeoui":
 		syls += 1
 
-	#10) if ends with "-ian", should be counted as two syllables, except for "-tian" and "-cian"
+	# 10) if ends with "-ian", should be counted as two syllables, except for "-tian" and "-cian"
 	if word[-3:] == "ian":
 	#and (word[-4:] != "cian" or word[-4:] != "tian"):
 		if word[-4:] == "cian" or word[-4:] == "tian":
 			pass
 		else:
 			syls += 1
 
-	#11) if starts with "co-" and is followed by a vowel, check if exists in the double syllable dictionary, if not, check if in single dictionary and act accordingly.
+	# 11) if starts with "co-" and is followed by a vowel, check if exists in the double syllable dictionary, if not, check if in single dictionary and act accordingly.
 	if word[:2] == "co" and word[2] in "eaoui":
 
 		if word[:4] in co_two or word[:5] in co_two or word[:6] in co_two:
@@ -102,14 +102,14 @@ def get_syllable_count(word):
 		else:
 			syls += 1
 
-	#12) if starts with "pre-" and is followed by a vowel, check if exists in the double syllable dictionary, if not, check if in single dictionary and act accordingly.
+	# 12) if starts with "pre-" and is followed by a vowel, check if exists in the double syllable dictionary, if not, check if in single dictionary and act accordingly.
 	if word[:3] == "pre" and word[3] in "eaoui":
 		if word[:6] in pre_one:
 			pass
 		else:
 			syls += 1
 
-	#13) check for "-n't" and cross match with dictionary to add syllable.
+	# 13) check for "-n't" and cross match with dictionary to add syllable.
 	negative = ["doesn't", "isn't", "shouldn't", "couldn't", "wouldn't"]
 
 	if word[-3:] == "n't":
@@ -118,14 +118,14 @@ def get_syllable_count(word):
 		else:
 			pass
 
-	#14) Handling the exceptional words.
+	# 14) Handling the exceptional words.
 	if word in exception_del:
 		disc += 1
 
 	if word in exception_add:
 		syls += 1
 
-	# calculate the output
+	# Calculate the output
 	return num_vowels - disc + syls
 
 def main():
diff --git a/semanticate b/semanticate
@@ -33,7 +33,7 @@ def main():
 				xhtml = file.read()
 				processed_xhtml = xhtml
 
-				#Some common abbreviations
+				# Some common abbreviations
 				processed_xhtml = regex.sub(r"(?<!\<abbr\>)Mr\.", r"<abbr>Mr.</abbr>", processed_xhtml)
 				processed_xhtml = regex.sub(r"(?<!\<abbr\>)Mrs\.", r"<abbr>Mrs.</abbr>", processed_xhtml)
 				processed_xhtml = regex.sub(r"(?<!\<abbr\>)Ms\.", r"<abbr>Ms.</abbr>", processed_xhtml)
@@ -77,21 +77,21 @@ def main():
 				processed_xhtml = regex.sub(r"""(?<!\<abbr class="era"\>)B\.?C""", r"""<abbr class="era">BC</abbr>""", processed_xhtml)
 				processed_xhtml = regex.sub(r"""(?<!\<abbr class="time"\>)([ap])\.\s?m\.""", r"""<abbr class="time">\1.m.</abbr>""", processed_xhtml)
 
-				#Guess at adding eoc class
+				# Guess at adding eoc class
 				processed_xhtml = regex.sub(r"""<abbr>([a-zA-Z\.]+?\.)</abbr></p>""", r"""<abbr class="eoc">\1</abbr></p>""", processed_xhtml)
 				processed_xhtml = regex.sub(r"""<abbr>etc\.</abbr>(\s+[A-Z])""", r"""<abbr class="eoc">etc.</abbr>\1""", processed_xhtml)
 
-				#Clean up nesting errors
+				# Clean up nesting errors
 				processed_xhtml = regex.sub(r"""<abbr class="eoc"><abbr>([^<]+)</abbr></abbr>""", r"""<abbr class="eoc">\1</abbr>""", processed_xhtml)
 
-				#Get Roman numerals >= 2 characters
-				#We only wrap these if they're standalone (i.e. not already wrapped in a tag) to prevent recursion in multiple runs
+				# Get Roman numerals >= 2 characters
+				# We only wrap these if they're standalone (i.e. not already wrapped in a tag) to prevent recursion in multiple runs
 				processed_xhtml = regex.sub(r"([^a-zA-Z>])([ixvIXV]{2,})(\b)", r"""\1<span epub:type="z3998:roman">\2</span>\3""", processed_xhtml)
 
-				#Get Roman numerals that are X or V and single characters.  We can't do I for obvious reasons.
+				# Get Roman numerals that are X or V and single characters.  We can't do I for obvious reasons.
 				processed_xhtml = regex.sub(r"""([^a-zA-Z>\"])([vxVX])(\b)""", r"""\1<span epub:type="z3998:roman">\2</span>\3""", processed_xhtml)
 
-				#We may have added HTML tags within title tags.  Remove those here
+				# We may have added HTML tags within title tags.  Remove those here
 				soup = BeautifulSoup(processed_xhtml, "lxml")
 				processed_xhtml = regex.sub(r"<title>.+?</title>", "<title>" +  soup.title.text + "</title>", processed_xhtml)
 
diff --git a/split-file b/split-file
diff --git a/word-count b/word-count