Include the Mediawiki source into the distribution archive

p12tic · p12tic · commit e42b197b55fd · 2017-04-09T11:55:15.000+03:00
diff --git a/Makefile b/Makefile
@@ -239,3 +239,4 @@ source:
 	  http://en.cppreference.com/w/ ; \
 	popd > /dev/null
 
+	./export.py --url=http://en.cppreference.com/mwiki reference/cppreference-export-ns0-ns4-ns10.xml 0 4 10
diff --git a/export.py b/export.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+'''
+    Copyright (C) 2017  Povilas Kanapickas <povilas@radix.lt>
+
+    This file is part of cppreference-doc
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see http://www.gnu.org/licenses/.
+'''
+
+import argparse
+import urllib.parse
+import urllib.request
+import json
+
+def retrieve_page_names(root, ns_index):
+
+    begin = None
+    pages = []
+
+    while True:
+        params = {
+            'action' : 'query',
+            'list' : 'allpages',
+            'apnamespace' : ns_index,
+            'aplimit' : 500,
+            'format' : 'json'
+        }
+        if begin is not None:
+            params['apcontinue'] = begin
+
+        url = "{0}/api.php?{1}".format(root, urllib.parse.urlencode(params))
+
+        with urllib.request.urlopen(url) as f:
+            data = json.loads(f.read().decode('utf-8'))
+            pages += [ p['title'] for p in data['query']['allpages'] ]
+
+            if ('query-continue' in data and 'allpages' in data['query-continue'] and
+                'apcontinue' in data['query-continue']['allpages']):
+                begin = data['query-continue']['allpages']['apcontinue']
+            else:
+                return pages
+
+def export_pages(root, pages, output_path):
+    params = {
+        'wpDownload' : '',
+        'curonly' : 1,
+        'pages' : '\n'.join(pages)
+    }
+
+    data = urllib.parse.urlencode(params)
+    data = data.encode('ascii')
+    url = "{0}/index.php?title=Special:Export&action=submit".format(root)
+
+    urllib.request.urlretrieve(url, output_path, data=data)
+
+def main():
+    parser = argparse.ArgumentParser(prog='export.py')
+    parser.add_argument('--url', type=str, help='The URL to the root of the MediaWiki installation')
+    parser.add_argument('output_path', type=str, help='The path to the XML file to save output to')
+    parser.add_argument('ns_index', type=str, nargs='+', help='The indices of the namespaces to retrieve')
+    args = parser.parse_args()
+
+    pages = []
+    for ns_index in args.ns_index:
+        new_pages = retrieve_page_names(args.url, ns_index)
+        print("Retrieved {0} pages for namespace {1}".format(len(new_pages), ns_index))
+        pages += new_pages
+
+    pages = sorted(pages)
+    export_pages(args.url, pages, args.output_path)
+
+if __name__ == "__main__":
+    main()

Original file line number	Diff line number	Diff line change
`@@ -239,3 +239,4 @@ source:`
`239`	`239`	`http://en.cppreference.com/w/ ; \`
`240`	`240`	`popd > /dev/null`
`241`	`241`
	`242`	`+ ./export.py --url=http://en.cppreference.com/mwiki reference/cppreference-export-ns0-ns4-ns10.xml 0 4 10`