diff --git a/rethinkdb/__init__.py b/rethinkdb/__init__.py index 055c7dfc..49eef611 100644 --- a/rethinkdb/__init__.py +++ b/rethinkdb/__init__.py @@ -11,13 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os import imp +import os + import pkg_resources from rethinkdb import errors, version - # The builtins here defends against re-importing something obscuring `object`. try: import __builtin__ as builtins # Python 2 @@ -25,7 +25,7 @@ import builtins # Python 3 -__all__ = ['RethinkDB'] + errors.__all__ +__all__ = ["RethinkDB"] + errors.__all__ __version__ = version.VERSION @@ -41,7 +41,7 @@ def __init__(self): _restore, ast, query, - net + net, ) self._dump = _dump @@ -65,15 +65,17 @@ def set_loop_type(self, library=None): # find module file manager = pkg_resources.ResourceManager() - libPath = '%(library)s_net/net_%(library)s.py' % {'library': library} + libPath = "%(library)s_net/net_%(library)s.py" % {"library": library} if not manager.resource_exists(__name__, libPath): - raise ValueError('Unknown loop type: %r' % library) + raise ValueError("Unknown loop type: %r" % library) # load the module modulePath = manager.resource_filename(__name__, libPath) - moduleName = 'net_%s' % library - moduleFile, pathName, desc = imp.find_module(moduleName, [os.path.dirname(modulePath)]) - module = imp.load_module('rethinkdb.' + moduleName, moduleFile, pathName, desc) + moduleName = "net_%s" % library + moduleFile, pathName, desc = imp.find_module( + moduleName, [os.path.dirname(modulePath)] + ) + module = imp.load_module("rethinkdb." + moduleName, moduleFile, pathName, desc) # set the connection type self.connection_type = module.Connection diff --git a/rethinkdb/__main__.py b/rethinkdb/__main__.py index 2f08a437..fb0c670f 100644 --- a/rethinkdb/__main__.py +++ b/rethinkdb/__main__.py @@ -17,7 +17,7 @@ # This file incorporates work covered by the following copyright: # Copyright 2010-2016 RethinkDB, all rights reserved. -'''Dispatcher for interactive functions such as repl and backup''' +"""Dispatcher for interactive functions such as repl and backup""" import code import sys @@ -27,68 +27,87 @@ def startInterpreter(argv=None, prog=None): - repl_variables = {'r': net.Connection._r, 'rethinkdb': net.Connection._r} - banner = 'The RethinkDB driver has been imported as `r`.' + repl_variables = {"r": net.Connection._r, "rethinkdb": net.Connection._r} + banner = "The RethinkDB driver has been imported as `r`." # -- get host/port setup # - parse command line parser = utils_common.CommonOptionsParser( - prog=prog, description='An interactive Python shell (repl) with the RethinkDB driver imported') - options, args = parser.parse_args(argv if argv is not None else sys.argv[1:], connect=False) + prog=prog, + description="An interactive Python shell (repl) with the RethinkDB driver imported", + ) + options, args = parser.parse_args( + argv if argv is not None else sys.argv[1:], connect=False + ) if args: - parser.error('No positional arguments supported. Unrecognized option(s): %s' % args) + parser.error( + "No positional arguments supported. Unrecognized option(s): %s" % args + ) # -- open connection try: - repl_variables['conn'] = options.retryQuery.conn() - repl_variables['conn'].repl() - banner += ''' + repl_variables["conn"] = options.retryQuery.conn() + repl_variables["conn"].repl() + banner += """ A connection to %s:%d has been established as `conn` - and can be used by calling `run()` on a query without any arguments.''' % (options.hostname, options.driver_port) + and can be used by calling `run()` on a query without any arguments.""" % ( + options.hostname, + options.driver_port, + ) except errors.ReqlDriverError as e: - banner += '\nWarning: %s' % str(e) + banner += "\nWarning: %s" % str(e) if options.debug: - banner += '\n' + traceback.format_exc() + banner += "\n" + traceback.format_exc() # -- start interpreter - code.interact(banner=banner + '\n==========', local=repl_variables) + code.interact(banner=banner + "\n==========", local=repl_variables) -if __name__ == '__main__': +if __name__ == "__main__": if __package__ is None: - __package__ = 'rethinkdb' + __package__ = "rethinkdb" # -- figure out which mode we are in - modes = ['dump', 'export', 'import', 'index_rebuild', 'repl', 'restore'] + modes = ["dump", "export", "import", "index_rebuild", "repl", "restore"] if len(sys.argv) < 2 or sys.argv[1] not in modes: - sys.exit('ERROR: Must be called with one of the following verbs: %s' % ', '.join(modes)) + sys.exit( + "ERROR: Must be called with one of the following verbs: %s" + % ", ".join(modes) + ) verb = sys.argv[1] - prog = 'python -m rethinkdb' - if sys.version_info < (2, 7) or (sys.version_info >= (3, 0) and sys.version_info < (3, 4)): - prog += '.__main__' # Python versions 2.6, 3.0, 3.1 and 3.3 do not support running packages - prog += ' ' + verb + prog = "python -m rethinkdb" + if sys.version_info < (2, 7) or ( + sys.version_info >= (3, 0) and sys.version_info < (3, 4) + ): + prog += ".__main__" # Python versions 2.6, 3.0, 3.1 and 3.3 do not support running packages + prog += " " + verb argv = sys.argv[2:] - if verb == 'dump': + if verb == "dump": from . import _dump + exit(_dump.main(argv, prog=prog)) - elif verb == 'export': + elif verb == "export": from . import _export + exit(_export.main(argv, prog=prog)) - elif verb == 'import': + elif verb == "import": from . import _import + exit(_import.main(argv, prog=prog)) - elif verb == 'index_rebuild': + elif verb == "index_rebuild": from . import _index_rebuild + exit(_index_rebuild.main(argv, prog=prog)) - elif verb == 'repl': + elif verb == "repl": startInterpreter(argv, prog=prog) - elif verb == 'restore': + elif verb == "restore": from . import _restore + exit(_restore.main(argv, prog=prog)) diff --git a/rethinkdb/_dump.py b/rethinkdb/_dump.py index 0160441d..ec8a714b 100755 --- a/rethinkdb/_dump.py +++ b/rethinkdb/_dump.py @@ -18,7 +18,7 @@ # Copyright 2010-2016 RethinkDB, all rights reserved. -'''`rethinkdb-dump` creates an archive of data from a RethinkDB cluster''' +"""`rethinkdb-dump` creates an archive of data from a RethinkDB cluster""" from __future__ import print_function @@ -35,9 +35,11 @@ from rethinkdb import _export, utils_common from rethinkdb.logger import default_logger -usage = "rethinkdb dump [-c HOST:PORT] [-p] [--password-file FILENAME] [--tls-cert FILENAME] [-f FILE] " \ - "[--clients NUM] [-e (DB | DB.TABLE)]..." -help_epilog = ''' +usage = ( + "rethinkdb dump [-c HOST:PORT] [-p] [--password-file FILENAME] [--tls-cert FILENAME] [-f FILE] " + "[--clients NUM] [-e (DB | DB.TABLE)]..." +) +help_epilog = """ EXAMPLES: rethinkdb dump -c mnemosyne:39500 Archive all data from a cluster running on host 'mnemosyne' with a client port at 39500. @@ -46,11 +48,13 @@ Archive only the 'test' database from a local cluster into a named file. rethinkdb dump -c hades -e test.subscribers -p - Archive a specific table from a cluster running on host 'hades' which requires a password.''' + Archive a specific table from a cluster running on host 'hades' which requires a password.""" def parse_options(argv, prog=None): - parser = utils_common.CommonOptionsParser(usage=usage, epilog=help_epilog, prog=prog) + parser = utils_common.CommonOptionsParser( + usage=usage, epilog=help_epilog, prog=prog + ) parser.add_option( "-f", @@ -58,52 +62,67 @@ def parse_options(argv, prog=None): dest="out_file", metavar="FILE", default=None, - help='file to write archive to (defaults to rethinkdb_dump_DATE_TIME.tar.gz);\nif FILE is -, use standard ' - 'output (note that intermediate files will still be written to the --temp-dir directory)') + help="file to write archive to (defaults to rethinkdb_dump_DATE_TIME.tar.gz);\nif FILE is -, use standard " + "output (note that intermediate files will still be written to the --temp-dir directory)", + ) parser.add_option( "-e", "--export", dest="db_tables", metavar="DB|DB.TABLE", default=[], - type='db_table', - help='limit dump to the given database or table (may be specified multiple times)', - action="append") + type="db_table", + help="limit dump to the given database or table (may be specified multiple times)", + action="append", + ) - parser.add_option("--temp-dir", dest="temp_dir", metavar="directory", default=None, - help='the directory to use for intermediary results') + parser.add_option( + "--temp-dir", + dest="temp_dir", + metavar="directory", + default=None, + help="the directory to use for intermediary results", + ) parser.add_option( "--overwrite-file", dest="overwrite", default=False, help="overwrite -f/--file if it exists", - action="store_true") + action="store_true", + ) parser.add_option( "--clients", dest="clients", metavar="NUM", default=3, - help='number of tables to export simultaneously (default: 3)', - type="pos_int") + help="number of tables to export simultaneously (default: 3)", + type="pos_int", + ) parser.add_option( "--read-outdated", dest="outdated", default=False, - help='use outdated read mode', - action="store_true") + help="use outdated read mode", + action="store_true", + ) options, args = parser.parse_args(argv) # Check validity of arguments if len(args) != 0: - raise parser.error("No positional arguments supported. Unrecognized option(s): %s" % args) + raise parser.error( + "No positional arguments supported. Unrecognized option(s): %s" % args + ) # Add dump name - if platform.system() == "Windows" or platform.system().lower().startswith('cygwin'): + if platform.system() == "Windows" or platform.system().lower().startswith("cygwin"): options.dump_name = "rethinkdb_dump_%s" % datetime.datetime.today().strftime( - "%Y-%m-%dT%H-%M-%S") # no colons in name + "%Y-%m-%dT%H-%M-%S" + ) # no colons in name else: - options.dump_name = "rethinkdb_dump_%s" % datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%S") + options.dump_name = "rethinkdb_dump_%s" % datetime.datetime.today().strftime( + "%Y-%m-%dT%H:%M:%S" + ) # Verify valid output file if options.out_file == "-": @@ -118,11 +137,16 @@ def parse_options(argv, prog=None): if os.path.exists(options.out_file) and not options.overwrite: parser.error("Output file already exists: %s" % options.out_file) if os.path.exists(options.out_file) and not os.path.isfile(options.out_file): - parser.error("There is a non-file at the -f/--file location: %s" % options.out_file) + parser.error( + "There is a non-file at the -f/--file location: %s" % options.out_file + ) # Verify valid client count if options.clients < 1: - raise RuntimeError("Error: invalid number of clients (%d), must be greater than zero" % options.clients) + raise RuntimeError( + "Error: invalid number of clients (%d), must be greater than zero" + % options.clients + ) # Make sure the temporary directory exists and is accessible if options.temp_dir is not None: @@ -130,9 +154,14 @@ def parse_options(argv, prog=None): try: os.makedirs(options.temp_dir) except OSError: - parser.error("Could not create temporary directory: %s" % options.temp_dir) + parser.error( + "Could not create temporary directory: %s" % options.temp_dir + ) if not os.path.isdir(options.temp_dir): - parser.error("Temporary directory doesn't exist or is not a directory: %s" % options.temp_dir) + parser.error( + "Temporary directory doesn't exist or is not a directory: %s" + % options.temp_dir + ) if not os.access(options.temp_dir, os.W_OK): parser.error("Temporary directory inaccessible: %s" % options.temp_dir) @@ -144,10 +173,12 @@ def main(argv=None, prog=None): try: if not options.quiet: # Print a warning about the capabilities of dump, so no one is confused (hopefully) - print("""\ + print( + """\ NOTE: 'rethinkdb-dump' saves data, secondary indexes, and write hooks, but does *not* save cluster metadata. You will need to recreate your cluster setup yourself after - you run 'rethinkdb-restore'.""") + you run 'rethinkdb-restore'.""" + ) try: start_time = time.time() @@ -158,7 +189,7 @@ def main(argv=None, prog=None): options.directory = os.path.realpath(tempfile.mkdtemp(dir=options.temp_dir)) options.fields = None options.delimiter = None - options.format = 'json' + options.format = "json" # -- export to a directory @@ -171,7 +202,7 @@ def main(argv=None, prog=None): default_logger.exception(exc) if options.debug: - sys.stderr.write('\n%s\n' % traceback.format_exc()) + sys.stderr.write("\n%s\n" % traceback.format_exc()) raise Exception("Error: export failed, %s" % exc) @@ -181,14 +212,17 @@ def main(argv=None, prog=None): print(" Zipping export directory...") try: - if hasattr(options.out_file, 'read'): + if hasattr(options.out_file, "read"): archive = tarfile.open(fileobj=options.out_file, mode="w:gz") else: archive = tarfile.open(name=options.out_file, mode="w:gz") for curr, _, files in os.walk(os.path.realpath(options.directory)): for data_file in files: full_path = os.path.join(options.directory, curr, data_file) - archive_path = os.path.join(options.dump_name, os.path.relpath(full_path, options.directory)) + archive_path = os.path.join( + options.dump_name, + os.path.relpath(full_path, options.directory), + ) archive.add(full_path, arcname=archive_path) os.unlink(full_path) finally: @@ -199,12 +233,14 @@ def main(argv=None, prog=None): if not options.quiet: print( - "Done (%.2f seconds): %s" % - (time.time() - - start_time, - options.out_file.name if hasattr( - options.out_file, - 'name') else options.out_file)) + "Done (%.2f seconds): %s" + % ( + time.time() - start_time, + options.out_file.name + if hasattr(options.out_file, "name") + else options.out_file, + ) + ) except KeyboardInterrupt: time.sleep(0.2) raise RuntimeError("Interrupted") diff --git a/rethinkdb/_export.py b/rethinkdb/_export.py index 28f6f371..01bae2f4 100755 --- a/rethinkdb/_export.py +++ b/rethinkdb/_export.py @@ -50,8 +50,10 @@ [-e (DB | DB.TABLE)]... [--format (csv | json | ndjson)] [--fields FIELD,FIELD...] [--delimiter CHARACTER] [--clients NUM]""" -help_description = '`rethinkdb export` exports data from a RethinkDB cluster into a directory' -help_epilog = ''' +help_description = ( + "`rethinkdb export` exports data from a RethinkDB cluster into a directory" +) +help_epilog = """ EXAMPLES: rethinkdb export -c mnemosyne:39500 Export all data from a cluster running on host 'mnemosyne' with a client port at 39500. @@ -68,60 +70,84 @@ rethinkdb export --fields id,value -e test.data Export a specific table from a local cluster in JSON format with only the fields 'id' and 'value'. -''' +""" def parse_options(argv, prog=None): - if platform.system() == "Windows" or platform.system().lower().startswith('cygwin'): + if platform.system() == "Windows" or platform.system().lower().startswith("cygwin"): # no colons in name - default_dir = "rethinkdb_export_%s" % datetime.datetime.today().strftime("%Y-%m-%dT%H-%M-%S") + default_dir = "rethinkdb_export_%s" % datetime.datetime.today().strftime( + "%Y-%m-%dT%H-%M-%S" + ) else: # " - default_dir = "rethinkdb_export_%s" % datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%S") + default_dir = "rethinkdb_export_%s" % datetime.datetime.today().strftime( + "%Y-%m-%dT%H:%M:%S" + ) - parser = utils_common.CommonOptionsParser(usage=usage, description=help_description, epilog=help_epilog, prog=prog) + parser = utils_common.CommonOptionsParser( + usage=usage, description=help_description, epilog=help_epilog, prog=prog + ) - parser.add_option("-d", "--directory", dest="directory", metavar="DIRECTORY", default=default_dir, - help='directory to output to (default: rethinkdb_export_DATE_TIME)', type="new_file") + parser.add_option( + "-d", + "--directory", + dest="directory", + metavar="DIRECTORY", + default=default_dir, + help="directory to output to (default: rethinkdb_export_DATE_TIME)", + type="new_file", + ) parser.add_option( "-e", "--export", dest="db_tables", metavar="DB|DB.TABLE", default=[], - help='limit dump to the given database or table (may be specified multiple times)', + help="limit dump to the given database or table (may be specified multiple times)", action="append", - type="db_table") - parser.add_option("--fields", dest="fields", metavar=",...", default=None, - help='export only specified fields (required for CSV format)') + type="db_table", + ) + parser.add_option( + "--fields", + dest="fields", + metavar=",...", + default=None, + help="export only specified fields (required for CSV format)", + ) parser.add_option( "--format", dest="format", metavar="json|csv|ndjson", default="json", - help='format to write (defaults to json. ndjson is newline delimited json.)', + help="format to write (defaults to json. ndjson is newline delimited json.)", type="choice", - choices=[ - 'json', - 'csv', - 'ndjson']) + choices=["json", "csv", "ndjson"], + ) parser.add_option( "--clients", dest="clients", metavar="NUM", default=3, - help='number of tables to export simultaneously (default: 3)', - type="pos_int") + help="number of tables to export simultaneously (default: 3)", + type="pos_int", + ) parser.add_option( "--read-outdated", dest="outdated", default=False, - help='use outdated read mode', - action="store_true") - - csvGroup = optparse.OptionGroup(parser, 'CSV options') - csvGroup.add_option("--delimiter", dest="delimiter", metavar="CHARACTER", default=None, - help="character to be used as field delimiter, or '\\t' for tab (default: ',')") + help="use outdated read mode", + action="store_true", + ) + + csvGroup = optparse.OptionGroup(parser, "CSV options") + csvGroup.add_option( + "--delimiter", + dest="delimiter", + metavar="CHARACTER", + default=None, + help="character to be used as field delimiter, or '\\t' for tab (default: ',')", + ) parser.add_option_group(csvGroup) options, args = parser.parse_args(argv) @@ -129,11 +155,15 @@ def parse_options(argv, prog=None): # -- Check validity of arguments if len(args) != 0: - parser.error("No positional arguments supported. Unrecognized option(s): %s" % args) + parser.error( + "No positional arguments supported. Unrecognized option(s): %s" % args + ) if options.fields: if len(options.db_tables) != 1 or options.db_tables[0].table is None: - parser.error("The --fields option can only be used when exporting a single table") + parser.error( + "The --fields option can only be used when exporting a single table" + ) options.fields = options.fields.split(",") # - format specific validation @@ -147,7 +177,10 @@ def parse_options(argv, prog=None): elif options.delimiter == "\\t": options.delimiter = "\t" elif len(options.delimiter) != 1: - parser.error("Specify exactly one character for the --delimiter option: %s" % options.delimiter) + parser.error( + "Specify exactly one character for the --delimiter option: %s" + % options.delimiter + ) else: if options.delimiter: parser.error("--delimiter option is only valid for CSV file formats") @@ -212,12 +245,12 @@ def csv_writer(filename, fields, delimiter, task_queue, error_queue): elif isinstance(row[field], str): info.append(row[field]) elif isinstance(row[field], unicode): - info.append(row[field].encode('utf-8')) + info.append(row[field].encode("utf-8")) else: if str == unicode: info.append(json.dumps(row[field])) else: - info.append(json.dumps(row[field]).encode('utf-8')) + info.append(json.dumps(row[field]).encode("utf-8")) out_writer.writerow(info) item = task_queue.get() except BaseException: @@ -229,37 +262,52 @@ def csv_writer(filename, fields, delimiter, task_queue, error_queue): pass -def export_table(db, table, directory, options, error_queue, progress_info, sindex_counter, hook_counter, exit_event): - signal.signal(signal.SIGINT, signal.SIG_DFL) # prevent signal handlers from being set in child processes +def export_table( + db, + table, + directory, + options, + error_queue, + progress_info, + sindex_counter, + hook_counter, + exit_event, +): + signal.signal( + signal.SIGINT, signal.SIG_DFL + ) # prevent signal handlers from being set in child processes writer = None - has_write_hooks = utils_common.check_minimum_version(options, '2.3.7', False) + has_write_hooks = utils_common.check_minimum_version(options, "2.3.7", False) try: # -- get table info - table_info = options.retryQuery('table info: %s.%s' % (db, table), query.db(db).table(table).info()) + table_info = options.retryQuery( + "table info: %s.%s" % (db, table), query.db(db).table(table).info() + ) # Rather than just the index names, store all index information - table_info['indexes'] = options.retryQuery( - 'table index data %s.%s' % (db, table), + table_info["indexes"] = options.retryQuery( + "table index data %s.%s" % (db, table), query.db(db).table(table).index_status(), - run_options={'binary_format': 'raw'} + run_options={"binary_format": "raw"}, ) sindex_counter.value += len(table_info["indexes"]) if has_write_hooks: - table_info['write_hook'] = options.retryQuery( - 'table write hook data %s.%s' % (db, table), + table_info["write_hook"] = options.retryQuery( + "table write hook data %s.%s" % (db, table), query.db(db).table(table).get_write_hook(), - run_options={'binary_format': 'raw'}) + run_options={"binary_format": "raw"}, + ) - if table_info['write_hook'] is not None: + if table_info["write_hook"] is not None: hook_counter.value += 1 - with open(os.path.join(directory, db, table + '.info'), 'w') as info_file: + with open(os.path.join(directory, db, table + ".info"), "w") as info_file: info_file.write(json.dumps(table_info) + "\n") with sindex_counter.get_lock(): sindex_counter.value += len(table_info["indexes"]) @@ -280,7 +328,9 @@ def export_table(db, table, directory, options, error_queue, progress_info, sind options.fields, task_queue, error_queue, - options.format)) + options.format, + ), + ) elif options.format == "csv": filename = directory + "/%s/%s.csv" % (db, table) writer = multiprocessing.Process( @@ -290,7 +340,9 @@ def export_table(db, table, directory, options, error_queue, progress_info, sind options.fields, options.delimiter, task_queue, - error_queue)) + error_queue, + ), + ) elif options.format == "ndjson": filename = directory + "/%s/%s.ndjson" % (db, table) writer = multiprocessing.Process( @@ -300,7 +352,9 @@ def export_table(db, table, directory, options, error_queue, progress_info, sind options.fields, task_queue, error_queue, - options.format)) + options.format, + ), + ) else: raise RuntimeError("unknown format type: %s" % options.format) writer.start() @@ -311,16 +365,13 @@ def export_table(db, table, directory, options, error_queue, progress_info, sind lastPrimaryKey = None read_rows = 0 - run_options = { - "time_format": "raw", - "binary_format": "raw" - } + run_options = {"time_format": "raw", "binary_format": "raw"} if options.outdated: run_options["read_mode"] = "outdated" cursor = options.retryQuery( - 'inital cursor for %s.%s' % (db, table), + "inital cursor for %s.%s" % (db, table), query.db(db).table(table).order_by(index=table_info["primary_key"]), - run_options=run_options + run_options=run_options, ) while not exit_event.is_set(): try: @@ -352,13 +403,22 @@ def export_table(db, table, directory, options, error_queue, progress_info, sind default_logger.exception(exc) cursor = options.retryQuery( - 'backup cursor for %s.%s' % - (db, table), query.db(db).table(table).between( - lastPrimaryKey, query.maxval, left_bound="open").order_by( - index=table_info["primary_key"]), run_options=run_options) + "backup cursor for %s.%s" % (db, table), + query.db(db) + .table(table) + .between(lastPrimaryKey, query.maxval, left_bound="open") + .order_by(index=table_info["primary_key"]), + run_options=run_options, + ) except (errors.ReqlError, errors.ReqlDriverError) as ex: - error_queue.put((RuntimeError, RuntimeError(ex.message), traceback.extract_tb(sys.exc_info()[2]))) + error_queue.put( + ( + RuntimeError, + RuntimeError(ex.message), + traceback.extract_tb(sys.exc_info()[2]), + ) + ) except BaseException: ex_type, ex_class, tb = sys.exc_info() error_queue.put((ex_type, ex_class, traceback.extract_tb(tb))) @@ -372,6 +432,7 @@ def abort_export(signum, frame, exit_event, interrupt_event): interrupt_event.set() exit_event.set() + # We sum up the row count from all tables for total percentage completion # This is because table exports can be staggered when there are not enough clients # to export all of them at once. As a result, the progress bar will not necessarily @@ -409,7 +470,9 @@ def run_clients(options, workingDir, db_table_set): sindex_counter = multiprocessing.Value(ctypes.c_longlong, 0) hook_counter = multiprocessing.Value(ctypes.c_longlong, 0) - signal.signal(signal.SIGINT, lambda a, b: abort_export(a, b, exit_event, interrupt_event)) + signal.signal( + signal.SIGINT, lambda a, b: abort_export(a, b, exit_event, interrupt_event) + ) errors = [] try: @@ -417,19 +480,32 @@ def run_clients(options, workingDir, db_table_set): arg_lists = [] for db, table in db_table_set: - tableSize = int(options.retryQuery("count", query.db(db).table(table).info()['doc_count_estimates'].sum())) - - progress_info.append((multiprocessing.Value(ctypes.c_longlong, 0), - multiprocessing.Value(ctypes.c_longlong, tableSize))) - arg_lists.append((db, table, - workingDir, - options, - error_queue, - progress_info[-1], - sindex_counter, - hook_counter, - exit_event, - )) + tableSize = int( + options.retryQuery( + "count", + query.db(db).table(table).info()["doc_count_estimates"].sum(), + ) + ) + + progress_info.append( + ( + multiprocessing.Value(ctypes.c_longlong, 0), + multiprocessing.Value(ctypes.c_longlong, tableSize), + ) + ) + arg_lists.append( + ( + db, + table, + workingDir, + options, + error_queue, + progress_info[-1], + sindex_counter, + hook_counter, + exit_event, + ) + ) # Wait for all tables to finish while processes or arg_lists: @@ -442,7 +518,9 @@ def run_clients(options, workingDir, db_table_set): processes = [process for process in processes if process.is_alive()] if len(processes) < options.clients and len(arg_lists) > 0: - new_process = multiprocessing.Process(target=export_table, args=arg_lists.pop(0)) + new_process = multiprocessing.Process( + target=export_table, args=arg_lists.pop(0) + ) new_process.start() processes.append(new_process) @@ -458,12 +536,21 @@ def plural(num, text, plural_text): return "%d %s" % (num, text if num == 1 else plural_text) if not options.quiet: - print("\n %s exported from %s, with %s, and %s" % - (plural(sum([max(0, info[0].value) for info in progress_info]), "row", "rows"), - plural(len(db_table_set), "table", "tables"), - plural(sindex_counter.value, "secondary index", "secondary indexes"), - plural(hook_counter.value, "hook function", "hook functions") - )) + print( + "\n %s exported from %s, with %s, and %s" + % ( + plural( + sum([max(0, info[0].value) for info in progress_info]), + "row", + "rows", + ), + plural(len(db_table_set), "table", "tables"), + plural( + sindex_counter.value, "secondary index", "secondary indexes" + ), + plural(hook_counter.value, "hook function", "hook functions"), + ) + ) finally: signal.signal(signal.SIGINT, signal.SIG_DFL) @@ -475,33 +562,44 @@ def plural(num, text, plural_text): for error in errors: print("%s" % error[1], file=sys.stderr) if options.debug: - print("%s traceback: %s" % (error[0].__name__, error[2]), file=sys.stderr) + print( + "%s traceback: %s" % (error[0].__name__, error[2]), file=sys.stderr + ) raise RuntimeError("Errors occurred during export") def run(options): # Make sure this isn't a pre-`reql_admin` cluster - which could result in data loss # if the user has a database named 'rethinkdb' - utils_common.check_minimum_version(options, '1.6') + utils_common.check_minimum_version(options, "1.6") # get the complete list of tables db_table_set = set() - all_tables = [utils_common.DbTable(x['db'], x['name']) for x in options.retryQuery( - 'list tables', query.db('rethinkdb').table('table_config').pluck(['db', 'name']))] + all_tables = [ + utils_common.DbTable(x["db"], x["name"]) + for x in options.retryQuery( + "list tables", + query.db("rethinkdb").table("table_config").pluck(["db", "name"]), + ) + ] if not options.db_tables: db_table_set = all_tables # default to all tables else: - all_databases = options.retryQuery('list dbs', query.db_list().filter(query.row.ne('rethinkdb'))) + all_databases = options.retryQuery( + "list dbs", query.db_list().filter(query.row.ne("rethinkdb")) + ) for db_table in options.db_tables: db, table = db_table - if db == 'rethinkdb': - raise AssertionError('Can not export tables from the system database') + if db == "rethinkdb": + raise AssertionError("Can not export tables from the system database") if db not in all_databases: raise RuntimeError("Error: Database '%s' not found" % db) - if table is None: # This is just a db name, implicitly selecting all tables in that db + if ( + table is None + ): # This is just a db name, implicitly selecting all tables in that db db_table_set.update(set([x for x in all_tables if x.db == db])) else: if utils_common.DbTable(db, table) not in all_tables: @@ -515,22 +613,27 @@ def run(options): parent_dir = os.path.dirname(options.directory) if not os.path.exists(parent_dir): if os.path.isdir(parent_dir): - raise RuntimeError("Output parent directory is not a directory: %s" % parent_dir) + raise RuntimeError( + "Output parent directory is not a directory: %s" % parent_dir + ) try: os.makedirs(parent_dir) except OSError as e: - raise optparse.OptionValueError("Unable to create parent directory for %s: %s" % (parent_dir, e.strerror)) + raise optparse.OptionValueError( + "Unable to create parent directory for %s: %s" + % (parent_dir, e.strerror) + ) working_dir = tempfile.mkdtemp( - prefix=os.path.basename( - options.directory) + - '_partial_', - dir=os.path.dirname( - options.directory)) + prefix=os.path.basename(options.directory) + "_partial_", + dir=os.path.dirname(options.directory), + ) try: for db in set([database for database, _ in db_table_set]): os.makedirs(os.path.join(working_dir, str(db))) except OSError as e: - raise RuntimeError("Failed to create temporary directory (%s): %s" % (e.filename, e.strerror)) + raise RuntimeError( + "Failed to create temporary directory (%s): %s" % (e.filename, e.strerror) + ) # Run the export run_clients(options, working_dir, db_table_set) @@ -538,14 +641,19 @@ def run(options): # Move the temporary directory structure over to the original output directory try: if os.path.isdir(options.directory): - os.rmdir(options.directory) # an empty directory is created here when using _dump + os.rmdir( + options.directory + ) # an empty directory is created here when using _dump elif os.path.exists(options.directory): - raise Exception('There was a file at the output location: %s' % options.directory) + raise Exception( + "There was a file at the output location: %s" % options.directory + ) os.rename(working_dir, options.directory) except OSError as e: raise RuntimeError( - "Failed to move temporary directory to output directory (%s): %s" % - (options.directory, e.strerror)) + "Failed to move temporary directory to output directory (%s): %s" + % (options.directory, e.strerror) + ) def main(argv=None, prog=None): diff --git a/rethinkdb/_import.py b/rethinkdb/_import.py index 6696e95a..0ce90bfc 100755 --- a/rethinkdb/_import.py +++ b/rethinkdb/_import.py @@ -17,7 +17,7 @@ # This file incorporates work covered by the following copyright: # Copyright 2010-2016 RethinkDB, all rights reserved. -'''`rethinkdb import` loads data into a RethinkDB cluster''' +"""`rethinkdb import` loads data into a RethinkDB cluster""" from __future__ import print_function @@ -33,9 +33,10 @@ import sys import time import traceback -import six from multiprocessing.queues import Queue, SimpleQueue +import six + from rethinkdb import ast, errors, query, utils_common from rethinkdb.logger import default_logger @@ -87,26 +88,30 @@ class SourceFile(object): _rows_written = None def __init__( - self, - source, - db, - table, - query_runner, - primary_key=None, - indexes=None, - write_hook=None, - source_options=None): + self, + source, + db, + table, + query_runner, + primary_key=None, + indexes=None, + write_hook=None, + source_options=None, + ): if self.format is None: - raise AssertionError('{class_name} must have a format'.format(class_name=self.__class__.__name__)) - - if self.db == 'rethinkdb': - raise AssertionError('Can not import tables into the system database') + raise AssertionError( + "{class_name} must have a format".format( + class_name=self.__class__.__name__ + ) + ) + if self.db == "rethinkdb": + raise AssertionError("Can not import tables into the system database") # query_runner if not isinstance(query_runner, utils_common.RetryQuery): - raise AssertionError('Query runner is not instance of RetryQuery') + raise AssertionError("Query runner is not instance of RetryQuery") self.query_runner = query_runner @@ -119,8 +124,8 @@ def __init__( self._rows_written = multiprocessing.Value(ctypes.c_longlong, 0) # source - if hasattr(source, 'read'): - if unicode != str or 'b' in source.mode: + if hasattr(source, "read"): + if unicode != str or "b" in source.mode: # Python2.x or binary file, assume utf-8 encoding self._source = codecs.getreader("utf-8")(source) else: @@ -131,12 +136,18 @@ def __init__( self._source = codecs.open(source, mode="r", encoding="utf-8") except IOError as exc: default_logger.exception(exc) - raise ValueError('Unable to open source file "%s": %s' % (str(source), str(exc))) + raise ValueError( + 'Unable to open source file "%s": %s' % (str(source), str(exc)) + ) - if hasattr(self._source, 'name') and self._source.name and os.path.isfile(self._source.name): + if ( + hasattr(self._source, "name") + and self._source.name + and os.path.isfile(self._source.name) + ): self._bytes_size.value = os.path.getsize(source) if self._bytes_size.value == 0: - raise ValueError('Source is zero-length: %s' % source) + raise ValueError("Source is zero-length: %s" % source) # table info self.db = db @@ -147,23 +158,23 @@ def __init__( # options self.source_options = source_options or { - "create_args": { - "primary_key": self.primary_key - } + "create_args": {"primary_key": self.primary_key} } # name - if hasattr(self._source, 'name') and self._source.name: + if hasattr(self._source, "name") and self._source.name: self.name = os.path.basename(self._source.name) else: - self.name = '%s.%s' % (self.db, self.table) + self.name = "%s.%s" % (self.db, self.table) def __hash__(self): return hash((self.db, self.table)) def get_line(self): - '''Returns a single line from the file''' - raise NotImplementedError('This needs to be implemented on the %s subclass' % self.format) + """Returns a single line from the file""" + raise NotImplementedError( + "This needs to be implemented on the %s subclass" % self.format + ) # - bytes @property @@ -210,12 +221,15 @@ def add_rows_written(self, increment): # we have multiple writers to coordinate # - percent done @property def percent_done(self): - '''return a float between 0 and 1 for a reasonable guess of percentage complete''' + """return a float between 0 and 1 for a reasonable guess of percentage complete""" # assume that reading takes 50% of the time and writing the other 50% completed = 0.0 # of 2.0 # - add read percentage - if self._bytes_size.value <= 0 or self._bytes_size.value <= self._bytes_read.value: + if ( + self._bytes_size.value <= 0 + or self._bytes_size.value <= self._bytes_read.value + ): completed += 1.0 elif self._bytes_read.value < 0 and self._total_rows.value >= 0: # done by rows read @@ -224,7 +238,9 @@ def percent_done(self): else: # done by bytes read if self._bytes_read.value > 0: - completed += float(self._bytes_read.value) / float(self._bytes_size.value) + completed += float(self._bytes_read.value) / float( + self._bytes_size.value + ) # - add written percentage if self._rows_read.value or self._rows_written.value: @@ -233,9 +249,13 @@ def percent_done(self): completed += 1.0 elif total_rows < 0: # a guesstimate - per_row_size = float(self._bytes_read.value) / float(self._rows_read.value) - total_rows = float(self._rows_read.value) + \ - (float(self._bytes_size.value - self._bytes_read.value) / per_row_size) + per_row_size = float(self._bytes_read.value) / float( + self._rows_read.value + ) + total_rows = float(self._rows_read.value) + ( + float(self._bytes_size.value - self._bytes_read.value) + / per_row_size + ) completed += float(self._rows_written.value) / total_rows else: # accurate count @@ -245,21 +265,26 @@ def percent_done(self): return completed * 0.5 def setup_table(self): - '''Ensure that the db, table, and indexes exist and are correct''' + """Ensure that the db, table, and indexes exist and are correct""" # - ensure the table exists and is ready self.query_runner( "create table: %s.%s" % (self.db, self.table), - ast.expr([self.table]).set_difference( - query.db(self.db).table_list() - ).for_each(query.db(self.db).table_create( - query.row, **self.source_options["create_args"] if 'create_args' in self.source_options else {}) - ) + ast.expr([self.table]) + .set_difference(query.db(self.db).table_list()) + .for_each( + query.db(self.db).table_create( + query.row, + **self.source_options["create_args"] + if "create_args" in self.source_options + else {} + ) + ), ) self.query_runner( "wait for %s.%s" % (self.db, self.table), - query.db(self.db).table(self.table).wait(timeout=30) + query.db(self.db).table(self.table).wait(timeout=30), ) # - ensure that the primary key on the table is correct @@ -271,58 +296,81 @@ def setup_table(self): self.primary_key = primary_key elif primary_key != self.primary_key: raise RuntimeError( - "Error: table %s.%s primary key was `%s` rather than the expected: %s" % - (self.db, self.table, primary_key, self.primary_key)) + "Error: table %s.%s primary key was `%s` rather than the expected: %s" + % (self.db, self.table, primary_key, self.primary_key) + ) def restore_indexes(self, warning_queue): # recreate secondary indexes - dropping existing on the assumption they are wrong if self.indexes: existing_indexes = self.query_runner( - "indexes from: %s.%s" % - (self.db, self.table), query.db( - self.db).table( - self.table).index_list()) + "indexes from: %s.%s" % (self.db, self.table), + query.db(self.db).table(self.table).index_list(), + ) try: created_indexes = [] for index in self.indexes: if index["index"] in existing_indexes: # drop existing versions self.query_runner( - "drop index: %s.%s:%s" % (self.db, self.table, index["index"]), - query.db(self.db).table(self.table).index_drop(index["index"]) + "drop index: %s.%s:%s" + % (self.db, self.table, index["index"]), + query.db(self.db) + .table(self.table) + .index_drop(index["index"]), ) self.query_runner( - "create index: %s.%s:%s" % (self.db, self.table, index["index"]), - query.db(self.db).table(self.table).index_create(index["index"], index["function"]) + "create index: %s.%s:%s" + % (self.db, self.table, index["index"]), + query.db(self.db) + .table(self.table) + .index_create(index["index"], index["function"]), ) created_indexes.append(index["index"]) # wait for all of the created indexes to build self.query_runner( "waiting for indexes on %s.%s" % (self.db, self.table), - query.db(self.db).table(self.table).index_wait(query.args(created_indexes)) + query.db(self.db) + .table(self.table) + .index_wait(query.args(created_indexes)), ) except RuntimeError: exception_type, exception_class, trcback = sys.exc_info() - warning_queue.put((exception_type, exception_class, traceback.extract_tb(trcback), self._source.name)) + warning_queue.put( + ( + exception_type, + exception_class, + traceback.extract_tb(trcback), + self._source.name, + ) + ) if self.write_hook: self.query_runner( - "Write hook from: %s.%s" % - (self.db, self.table), query.db( - self.db).table( - self.table).get_write_hook()) + "Write hook from: %s.%s" % (self.db, self.table), + query.db(self.db).table(self.table).get_write_hook(), + ) try: self.query_runner( "drop hook: %s.%s" % (self.db, self.table), - query.db(self.db).table(self.table).set_write_hook(None) + query.db(self.db).table(self.table).set_write_hook(None), ) self.query_runner( "create hook: %s.%s:%s" % (self.db, self.table, self.write_hook), - query.db(self.db).table(self.table).set_write_hook(self.write_hook["function"]) + query.db(self.db) + .table(self.table) + .set_write_hook(self.write_hook["function"]), ) except RuntimeError: exception_type, exception_class, trcback = sys.exc_info() - warning_queue.put((exception_type, exception_class, traceback.extract_tb(trcback), self._source.name)) + warning_queue.put( + ( + exception_type, + exception_class, + traceback.extract_tb(trcback), + self._source.name, + ) + ) def batches(self, batch_size=None, warning_queue=None): @@ -336,7 +384,7 @@ def batches(self, batch_size=None, warning_queue=None): batch_size = int(batch_size) if batch_size <= 0: - raise AssertionError('Batch size can not be less than one') + raise AssertionError("Batch size can not be less than one") # setup self.setup_file(warning_queue=warning_queue) @@ -383,16 +431,19 @@ def teardown(self): pass def read_to_queue( - self, - work_queue, - exit_event, - error_queue, - warning_queue, - timing_queue, - fields=None, - ignore_signals=True, - batch_size=None): - if ignore_signals: # ToDo: work out when we are in a worker process automatically + self, + work_queue, + exit_event, + error_queue, + warning_queue, + timing_queue, + fields=None, + ignore_signals=True, + batch_size=None, + ): + if ( + ignore_signals + ): # ToDo: work out when we are in a worker process automatically signal.signal(signal.SIGINT, signal.SIG_IGN) # workers should ignore these if batch_size is None: @@ -402,7 +453,7 @@ def read_to_queue( try: timePoint = time.time() for batch in self.batches(warning_queue=warning_queue): - timing_queue.put(('reader_work', time.time() - timePoint)) + timing_queue.put(("reader_work", time.time() - timePoint)) timePoint = time.time() # apply the fields filter @@ -420,7 +471,7 @@ def read_to_queue( pass else: break - timing_queue.put(('reader_wait', time.time() - timePoint)) + timing_queue.put(("reader_wait", time.time() - timePoint)) timePoint = time.time() # - report relevant errors @@ -438,7 +489,7 @@ class NeedMoreData(Exception): class JsonSourceFile(SourceFile): - format = 'json' + format = "json" decoder = json.JSONDecoder() json_array = None @@ -451,7 +502,7 @@ class JsonSourceFile(SourceFile): def fill_buffer(self): if self._buffer_str is None: - self._buffer_str = '' + self._buffer_str = "" self._buffer_pos = 0 self._buffer_end = 0 elif self._buffer_pos == 0: @@ -459,22 +510,22 @@ def fill_buffer(self): if self._buffer_size == JSON_MAX_BUFFER_SIZE: raise Exception( "Error: JSON max buffer size exceeded on file %s (from position %d). Use '--max-document-size' to " - "extend your buffer." % - (self.name, self.bytes_processed)) + "extend your buffer." % (self.name, self.bytes_processed) + ) self._buffer_size = min(self._buffer_size * 2, JSON_MAX_BUFFER_SIZE) # add more data read_target = self._buffer_size - self._buffer_end + self._buffer_pos if read_target < 1: - raise AssertionError('Can not set the read target and full the buffer') + raise AssertionError("Can not set the read target and full the buffer") new_chunk = self._source.read(read_target) if len(new_chunk) == 0: raise StopIteration() # file ended - self._buffer_str = self._buffer_str[self._buffer_pos:] + new_chunk + self._buffer_str = self._buffer_str[self._buffer_pos :] + new_chunk self._bytes_read.value += len(new_chunk) # reset markers @@ -482,27 +533,37 @@ def fill_buffer(self): self._buffer_end = len(self._buffer_str) - 1 def get_line(self): - '''Return a line from the current _buffer_str, or raise NeedMoreData trying''' + """Return a line from the current _buffer_str, or raise NeedMoreData trying""" # advance over any whitespace - self._buffer_pos = json.decoder.WHITESPACE.match(self._buffer_str, self._buffer_pos).end() + self._buffer_pos = json.decoder.WHITESPACE.match( + self._buffer_str, self._buffer_pos + ).end() if self._buffer_pos >= self._buffer_end: raise NeedMoreData() # read over a comma if we are not the first item in a json_array - if self.json_array and self.found_first and self._buffer_str[self._buffer_pos] == ",": + if ( + self.json_array + and self.found_first + and self._buffer_str[self._buffer_pos] == "," + ): self._buffer_pos += 1 if self._buffer_pos >= self._buffer_end: raise NeedMoreData() # advance over any post-comma whitespace - self._buffer_pos = json.decoder.WHITESPACE.match(self._buffer_str, self._buffer_pos).end() + self._buffer_pos = json.decoder.WHITESPACE.match( + self._buffer_str, self._buffer_pos + ).end() if self._buffer_pos >= self._buffer_end: raise NeedMoreData() # parse and return an object try: - row, self._buffer_pos = self.decoder.raw_decode(self._buffer_str, idx=self._buffer_pos) + row, self._buffer_pos = self.decoder.raw_decode( + self._buffer_str, idx=self._buffer_pos + ) self.found_first = True return row except (ValueError, IndexError): @@ -526,7 +587,9 @@ def setup_file(self, warning_queue=None): elif self._buffer_str[0] == "{": self.json_array = False else: - raise ValueError("Error: JSON format not recognized - file does not begin with an object or array") + raise ValueError( + "Error: JSON format not recognized - file does not begin with an object or array" + ) except IndexError: raise ValueError("Error: JSON file was empty of content") @@ -536,23 +599,39 @@ def teardown(self): # note: fill_buffer should have guaranteed that we have only the data in the end # advance through any leading whitespace - self._buffer_pos = json.decoder.WHITESPACE.match(self._buffer_str, self._buffer_pos).end() + self._buffer_pos = json.decoder.WHITESPACE.match( + self._buffer_str, self._buffer_pos + ).end() # check the end of the array if we have it if self.json_array: if self._buffer_str[self._buffer_pos] != "]": - snippit = self._buffer_str[self._buffer_pos:] - extra = '' if len(snippit) <= 100 else ' and %d more characters' % (len(snippit) - 100) - raise ValueError("Error: JSON array did not end cleanly, rather with: <<%s>>%s" % - (snippit[:100], extra)) + snippit = self._buffer_str[self._buffer_pos :] + extra = ( + "" + if len(snippit) <= 100 + else " and %d more characters" % (len(snippit) - 100) + ) + raise ValueError( + "Error: JSON array did not end cleanly, rather with: <<%s>>%s" + % (snippit[:100], extra) + ) self._buffer_pos += 1 # advance through any trailing whitespace - self._buffer_pos = json.decoder.WHITESPACE.match(self._buffer_str, self._buffer_pos).end() - snippit = self._buffer_str[self._buffer_pos:] + self._buffer_pos = json.decoder.WHITESPACE.match( + self._buffer_str, self._buffer_pos + ).end() + snippit = self._buffer_str[self._buffer_pos :] if len(snippit) > 0: - extra = '' if len(snippit) <= 100 else ' and %d more characters' % (len(snippit) - 100) - raise ValueError("Error: extra data after JSON data: <<%s>>%s" % (snippit[:100], extra)) + extra = ( + "" + if len(snippit) <= 100 + else " and %d more characters" % (len(snippit) - 100) + ) + raise ValueError( + "Error: extra data after JSON data: <<%s>>%s" % (snippit[:100], extra) + ) class CsvSourceFile(SourceFile): @@ -565,21 +644,23 @@ class CsvSourceFile(SourceFile): _columns = None # name of the columns def __init__(self, *args, **kwargs): - if 'source_options' in kwargs and isinstance(kwargs['source_options'], dict): - if 'no_header_row' in kwargs['source_options']: - self.no_header_row = kwargs['source_options']['no_header_row'] - if 'custom_header' in kwargs['source_options']: - self.custom_header = kwargs['source_options']['custom_header'] + if "source_options" in kwargs and isinstance(kwargs["source_options"], dict): + if "no_header_row" in kwargs["source_options"]: + self.no_header_row = kwargs["source_options"]["no_header_row"] + if "custom_header" in kwargs["source_options"]: + self.custom_header = kwargs["source_options"]["custom_header"] super(CsvSourceFile, self).__init__(*args, **kwargs) def byte_counter(self): - '''Generator for getting a byte count on a file being used''' + """Generator for getting a byte count on a file being used""" for line in self._source: self._bytes_read.value += len(line) if unicode != str: - yield line.encode("utf-8") # Python2.x csv module does not really handle unicode + yield line.encode( + "utf-8" + ) # Python2.x csv module does not really handle unicode else: yield line @@ -596,7 +677,9 @@ def setup_file(self, warning_queue=None): # field names may override fields from the header if self.custom_header is not None: if not self.no_header_row: - warning_queue.put("Ignoring header row on %s: %s" % (self.name, str(self._columns))) + warning_queue.put( + "Ignoring header row on %s: %s" % (self.name, str(self._columns)) + ) self._columns = self.custom_header elif self.no_header_row: raise ValueError("Error: No field name information available") @@ -605,18 +688,22 @@ def get_line(self): raw_row = next(self._reader) if len(self._columns) != len(raw_row): raise Exception( - "Error: '%s' line %d has an inconsistent number of columns: %s" % - (self.name, self._reader.line_num, str(raw_row))) + "Error: '%s' line %d has an inconsistent number of columns: %s" + % (self.name, self._reader.line_num, str(raw_row)) + ) row = {} - for key, value in zip(self._columns, raw_row): # note: we import all csv fields as strings + for key, value in zip( + self._columns, raw_row + ): # note: we import all csv fields as strings # treat empty fields as no entry rather than empty string - if value == '': + if value == "": continue row[key] = value if str == unicode else unicode(value, encoding="utf-8") return row + # == @@ -628,7 +715,7 @@ def get_line(self): [--shards NUM_SHARDS] [--replicas NUM_REPLICAS] [--delimiter CHARACTER] [--custom-header FIELD,FIELD... [--no-header]]""" -help_epilog = ''' +help_epilog = """ EXAMPLES: rethinkdb import -d rdb_export -c mnemosyne:39500 --clients 128 @@ -651,25 +738,45 @@ def get_line(self): Import data into a local cluster using the named CSV file with no header and instead use the fields 'id', 'name', and 'number', the delimiter is a semicolon (rather than a comma). -''' +""" def parse_options(argv, prog=None): - parser = utils_common.CommonOptionsParser(usage=usage, epilog=help_epilog, prog=prog) + parser = utils_common.CommonOptionsParser( + usage=usage, epilog=help_epilog, prog=prog + ) - parser.add_option("--clients", dest="clients", metavar="CLIENTS", default=8, - help="client connections to use (default: 8)", type="pos_int") - parser.add_option("--hard-durability", dest="durability", action="store_const", default="soft", - help="use hard durability writes (slower, uses less memory)", const="hard") - parser.add_option("--force", dest="force", action="store_true", default=False, - help="import even if a table already exists, overwriting duplicate primary keys") + parser.add_option( + "--clients", + dest="clients", + metavar="CLIENTS", + default=8, + help="client connections to use (default: 8)", + type="pos_int", + ) + parser.add_option( + "--hard-durability", + dest="durability", + action="store_const", + default="soft", + help="use hard durability writes (slower, uses less memory)", + const="hard", + ) + parser.add_option( + "--force", + dest="force", + action="store_true", + default=False, + help="import even if a table already exists, overwriting duplicate primary keys", + ) parser.add_option( "--batch-size", dest="batch_size", default=utils_common.default_batch_size, help=optparse.SUPPRESS_HELP, - type="pos_int") + type="pos_int", + ) # Replication settings replication_options_group = optparse.OptionGroup(parser, "Replication Options") @@ -679,14 +786,16 @@ def parse_options(argv, prog=None): metavar="SHARDS", help="shards to setup on created tables (default: 1)", type="pos_int", - action="add_key") + action="add_key", + ) replication_options_group.add_option( "--replicas", dest="create_args", metavar="REPLICAS", help="replicas to setup on created tables (default: 1)", type="pos_int", - action="add_key") + action="add_key", + ) parser.add_option_group(replication_options_group) # Directory import options @@ -697,7 +806,8 @@ def parse_options(argv, prog=None): dest="directory", metavar="DIRECTORY", default=None, - help="directory to import data from") + help="directory to import data from", + ) dir_import_group.add_option( "-i", "--import", @@ -706,13 +816,15 @@ def parse_options(argv, prog=None): default=[], help="restore only the given database or table (may be specified multiple times)", action="append", - type="db_table") + type="db_table", + ) dir_import_group.add_option( "--no-secondary-indexes", dest="indexes", action="store_false", default=None, - help="do not create secondary indexes") + help="do not create secondary indexes", + ) parser.add_option_group(dir_import_group) # File import options @@ -724,11 +836,22 @@ def parse_options(argv, prog=None): metavar="FILE", default=None, help="file to import data from", - type="file") - file_import_group.add_option("--table", dest="import_table", metavar="DB.TABLE", - default=None, help="table to import the data into") - file_import_group.add_option("--fields", dest="fields", metavar="FIELD,...", default=None, - help="limit which fields to use when importing one table") + type="file", + ) + file_import_group.add_option( + "--table", + dest="import_table", + metavar="DB.TABLE", + default=None, + help="table to import the data into", + ) + file_import_group.add_option( + "--fields", + dest="fields", + metavar="FIELD,...", + default=None, + help="limit which fields to use when importing one table", + ) file_import_group.add_option( "--format", dest="format", @@ -736,16 +859,16 @@ def parse_options(argv, prog=None): default=None, help="format of the file (default: json, accepts newline delimited json)", type="choice", - choices=[ - "json", - "csv"]) + choices=["json", "csv"], + ) file_import_group.add_option( "--pkey", dest="create_args", metavar="PRIMARY_KEY", default=None, help="field to use as the primary key in the table", - action="add_key") + action="add_key", + ) parser.add_option_group(file_import_group) # CSV import options @@ -755,15 +878,22 @@ def parse_options(argv, prog=None): dest="delimiter", metavar="CHARACTER", default=None, - help="character separating fields, or '\\t' for tab") - csv_import_group.add_option("--no-header", dest="no_header", action="store_true", - default=None, help="do not read in a header of field names") + help="character separating fields, or '\\t' for tab", + ) + csv_import_group.add_option( + "--no-header", + dest="no_header", + action="store_true", + default=None, + help="do not read in a header of field names", + ) csv_import_group.add_option( "--custom-header", dest="custom_header", metavar="FIELD,...", default=None, - help="header to use (overriding file header), must be specified if --no-header") + help="header to use (overriding file header), must be specified if --no-header", + ) parser.add_option_group(csv_import_group) # JSON import options @@ -774,14 +904,16 @@ def parse_options(argv, prog=None): metavar="MAX_SIZE", default=0, help="maximum allowed size (bytes) for a single JSON document (default: 128MiB)", - type="pos_int") + type="pos_int", + ) json_options_group.add_option( "--max-nesting-depth", dest="max_nesting_depth", metavar="MAX_DEPTH", default=0, help="maximum depth of the JSON documents (default: 100)", - type="pos_int") + type="pos_int", + ) parser.add_option_group(json_options_group) options, args = parser.parse_args(argv) @@ -789,7 +921,9 @@ def parse_options(argv, prog=None): # Check validity of arguments if len(args) != 0: - raise parser.error("No positional arguments supported. Unrecognized option(s): %s" % args) + raise parser.error( + "No positional arguments supported. Unrecognized option(s): %s" % args + ) # - create_args if options.create_args is None: @@ -822,15 +956,23 @@ def parse_options(argv, prog=None): if options.no_header: parser.error("--no-header option is not valid when importing a directory") if options.custom_header: - parser.error("table create options are not valid when importing a directory: %s" % - ", ".join([x.lower().replace("_", " ") for x in options.custom_header.keys()])) + parser.error( + "table create options are not valid when importing a directory: %s" + % ", ".join( + [x.lower().replace("_", " ") for x in options.custom_header.keys()] + ) + ) # check valid options if not os.path.isdir(options.directory): parser.error("Directory to import does not exist: %s" % options.directory) - if options.fields and (len(options.db_tables) > 1 or options.db_tables[0].table is None): - parser.error("--fields option can only be used when importing a single table") + if options.fields and ( + len(options.db_tables) > 1 or options.db_tables[0].table is None + ): + parser.error( + "--fields option can only be used when importing a single table" + ) elif options.file: if not os.path.exists(options.file): @@ -841,13 +983,15 @@ def parse_options(argv, prog=None): # format if options.format is None: - options.format = os.path.splitext(options.file)[1].lstrip('.') + options.format = os.path.splitext(options.file)[1].lstrip(".") # import_table if options.import_table: res = utils_common._tableNameRegex.match(options.import_table) if res and res.group("table"): - options.import_table = utils_common.DbTable(res.group("db"), res.group("table")) + options.import_table = utils_common.DbTable( + res.group("db"), res.group("table") + ) else: parser.error("Invalid --table option: %s" % options.import_table) else: @@ -860,12 +1004,16 @@ def parse_options(argv, prog=None): if options.db_tables: parser.error("-i/--import can only be used when importing a directory") if options.indexes: - parser.error("--no-secondary-indexes can only be used when importing a directory") + parser.error( + "--no-secondary-indexes can only be used when importing a directory" + ) if options.format == "csv": # disallow invalid options if options.max_document_size: - parser.error("--max_document_size only affects importing JSON documents") + parser.error( + "--max_document_size only affects importing JSON documents" + ) # delimiter if options.delimiter is None: @@ -873,7 +1021,10 @@ def parse_options(argv, prog=None): elif options.delimiter == "\\t": options.delimiter = "\t" elif len(options.delimiter) != 1: - parser.error("Specify exactly one character for the --delimiter option: %s" % options.delimiter) + parser.error( + "Specify exactly one character for the --delimiter option: %s" + % options.delimiter + ) # no_header if options.no_header is None: @@ -920,10 +1071,13 @@ def parse_options(argv, prog=None): return options + # This is run for each client requested, and accepts tasks from the reader processes -def table_writer(tables, options, work_queue, error_queue, warning_queue, exit_event, timing_queue): +def table_writer( + tables, options, work_queue, error_queue, warning_queue, exit_event, timing_queue +): signal.signal(signal.SIGINT, signal.SIG_IGN) # workers should ignore these db = table = batch = None @@ -936,7 +1090,7 @@ def table_writer(tables, options, work_queue, error_queue, warning_queue, exit_e db, table, batch = work_queue.get(timeout=0.1) except Empty: continue - timing_queue.put(('writer_wait', time.time() - timePoint)) + timing_queue.put(("writer_wait", time.time() - timePoint)) timePoint = time.time() # shut down when appropriate @@ -950,25 +1104,24 @@ def table_writer(tables, options, work_queue, error_queue, warning_queue, exit_e # write the batch to the database try: res = options.retryQuery( - "write batch to %s.%s" % - (db, - table), + "write batch to %s.%s" % (db, table), tbl.insert( - ast.expr( - batch, - nesting_depth=MAX_NESTING_DEPTH), + ast.expr(batch, nesting_depth=MAX_NESTING_DEPTH), durability=options.durability, conflict=conflict_action, - )) + ), + ) if res["errors"] > 0: - raise RuntimeError("Error when importing into table '%s.%s': %s" % (db, table, res["first_error"])) + raise RuntimeError( + "Error when importing into table '%s.%s': %s" + % (db, table, res["first_error"]) + ) modified = res["inserted"] + res["replaced"] + res["unchanged"] if modified != len(batch): raise RuntimeError( - "The inserted/replaced/unchanged number did not match when importing into table '%s.%s': %s" % ( - db, table, res["first_error"] - ) + "The inserted/replaced/unchanged number did not match when importing into table '%s.%s': %s" + % (db, table, res["first_error"]) ) table_info.add_rows_written(modified) @@ -980,53 +1133,53 @@ def table_writer(tables, options, work_queue, error_queue, warning_queue, exit_e if table_info.primary_key not in row: raise RuntimeError( "Connection error while importing. Current row does not have the specified primary key " - "(%s), so cannot guarantee absence of duplicates" % table_info.primary_key) + "(%s), so cannot guarantee absence of duplicates" + % table_info.primary_key + ) res = None if conflict_action == "replace": res = options.retryQuery( - "write row to %s.%s" % - (db, - table), + "write row to %s.%s" % (db, table), tbl.insert( - ast.expr( - row, - nesting_depth=MAX_NESTING_DEPTH), + ast.expr(row, nesting_depth=MAX_NESTING_DEPTH), durability=options.durability, conflict=conflict_action, - ignore_write_hook=True)) + ignore_write_hook=True, + ), + ) else: existingRow = options.retryQuery( "read row from %s.%s" % (db, table), - tbl.get(row[table_info.primary_key]) + tbl.get(row[table_info.primary_key]), ) if not existingRow: res = options.retryQuery( - "write row to %s.%s" % - (db, - table), + "write row to %s.%s" % (db, table), tbl.insert( - ast.expr( - row, - nesting_depth=MAX_NESTING_DEPTH), + ast.expr(row, nesting_depth=MAX_NESTING_DEPTH), durability=options.durability, conflict=conflict_action, - ignore_write_hook=True)) + ignore_write_hook=True, + ), + ) elif existingRow != row: raise RuntimeError( - "Duplicate primary key `%s`:\n%s\n%s" % - (table_info.primary_key, str(row), str(existingRow))) + "Duplicate primary key `%s`:\n%s\n%s" + % (table_info.primary_key, str(row), str(existingRow)) + ) if res["errors"] > 0: - raise RuntimeError("Error when importing into table '%s.%s': %s" % ( - db, table, res["first_error"])) + raise RuntimeError( + "Error when importing into table '%s.%s': %s" + % (db, table, res["first_error"]) + ) if res["inserted"] + res["replaced"] + res["unchanged"] != 1: raise RuntimeError( - "The inserted/replaced/unchanged number was not 1 when inserting on '%s.%s': %s" % ( - db, table, res - ) + "The inserted/replaced/unchanged number was not 1 when inserting on '%s.%s': %s" + % (db, table, res) ) table_info.add_rows_written(1) - timing_queue.put(('writer_work', time.time() - timePoint)) + timing_queue.put(("writer_work", time.time() - timePoint)) timePoint = time.time() except Exception as e: @@ -1063,9 +1216,15 @@ def update_progress(tables, debug, exit_event, sleep=0.2): if complete != lastComplete: timeDelta = readWrites[-1][0] - readWrites[0][0] if debug and len(readWrites) > 1 and timeDelta > 0: - readRate = max((readWrites[-1][1] - readWrites[0][1]) / timeDelta, 0) - writeRate = max((readWrites[-1][2] - readWrites[0][2]) / timeDelta, 0) - utils_common.print_progress(complete, indent=2, read=readRate, write=writeRate) + readRate = max( + (readWrites[-1][1] - readWrites[0][1]) / timeDelta, 0 + ) + writeRate = max( + (readWrites[-1][2] - readWrites[0][2]) / timeDelta, 0 + ) + utils_common.print_progress( + complete, indent=2, read=readRate, write=writeRate + ) lastComplete = complete time.sleep(sleep) except KeyboardInterrupt: @@ -1135,18 +1294,28 @@ def drain_queues(): # create missing dbs needed_dbs = set([x.db for x in sources]) if "rethinkdb" in needed_dbs: - raise RuntimeError("Error: Cannot import tables into the system database: 'rethinkdb'") + raise RuntimeError( + "Error: Cannot import tables into the system database: 'rethinkdb'" + ) options.retryQuery( - "ensure dbs: %s" % - ", ".join(needed_dbs), - ast.expr(needed_dbs).set_difference( - query.db_list()).for_each( - query.db_create( - query.row))) + "ensure dbs: %s" % ", ".join(needed_dbs), + ast.expr(needed_dbs) + .set_difference(query.db_list()) + .for_each(query.db_create(query.row)), + ) # check for existing tables, or if --force is enabled ones with mis-matched primary keys - existing_tables = dict([((x["db"], x["name"]), x["primary_key"]) for x in options.retryQuery( - "list tables", query.db("rethinkdb").table("table_config").pluck(["db", "name", "primary_key"]))]) + existing_tables = dict( + [ + ((x["db"], x["name"]), x["primary_key"]) + for x in options.retryQuery( + "list tables", + query.db("rethinkdb") + .table("table_config") + .pluck(["db", "name", "primary_key"]), + ) + ] + ) already_exist = [] for source in sources: if (source.db, source.table) in existing_tables: @@ -1156,20 +1325,26 @@ def drain_queues(): source.primary_key = existing_tables[(source.db, source.table)] elif source.primary_key != existing_tables[(source.db, source.table)]: raise RuntimeError( - "Error: Table '%s.%s' already exists with a different primary key: %s (expected: %s)" % ( - source.db, source.table, existing_tables[(source.db, source.table)], source.primary_key + "Error: Table '%s.%s' already exists with a different primary key: %s (expected: %s)" + % ( + source.db, + source.table, + existing_tables[(source.db, source.table)], + source.primary_key, ) ) if len(already_exist) == 1: raise RuntimeError( - "Error: Table '%s' already exists, run with --force to import into the existing table" % - already_exist[0]) + "Error: Table '%s' already exists, run with --force to import into the existing table" + % already_exist[0] + ) elif len(already_exist) > 1: already_exist.sort() raise RuntimeError( - "Error: The following tables already exist, run with --force to import into the existing tables:\n %s" % - "\n ".join(already_exist)) + "Error: The following tables already exist, run with --force to import into the existing tables:\n %s" + % "\n ".join(already_exist) + ) # - start the import @@ -1179,7 +1354,7 @@ def drain_queues(): progress_bar = multiprocessing.Process( target=update_progress, name="progress bar", - args=(sources, options.debug, exit_event, progress_bar_sleep) + args=(sources, options.debug, exit_event, progress_bar_sleep), ) progress_bar.start() pools.append([progress_bar]) @@ -1190,8 +1365,7 @@ def drain_queues(): for i in range(options.clients): writer = multiprocessing.Process( target=table_writer, - name="table writer %d" % - i, + name="table writer %d" % i, kwargs={ "tables": tables, "options": options, @@ -1199,7 +1373,9 @@ def drain_queues(): "error_queue": error_queue, "warning_queue": warning_queue, "timing_queue": timing_queue, - "exit_event": exit_event}) + "exit_event": exit_event, + }, + ) writers.append(writer) writer.start() @@ -1214,9 +1390,7 @@ def drain_queues(): table = next(file_iter) reader = multiprocessing.Process( target=table.read_to_queue, - name="table reader %s.%s" % - (table.db, - table.table), + name="table reader %s.%s" % (table.db, table.table), kwargs={ "fields": options.fields, "batch_size": options.batch_size, @@ -1224,7 +1398,9 @@ def drain_queues(): "error_queue": error_queue, "warning_queue": warning_queue, "timing_queue": timing_queue, - "exit_event": exit_event}) + "exit_event": exit_event, + }, + ) readers.append(reader) reader.start() @@ -1236,7 +1412,7 @@ def drain_queues(): if not reader.is_alive(): readers.remove(reader) if len(readers) == options.clients: - time.sleep(.05) + time.sleep(0.05) except StopIteration: pass # ran out of new tables @@ -1256,7 +1432,7 @@ def drain_queues(): # watch the readers for reader in readers[:]: try: - reader.join(.1) + reader.join(0.1) except Exception as exc: default_logger.exception(exc) if not reader.is_alive(): @@ -1297,20 +1473,26 @@ def drain_queues(): utils_common.print_progress(1.0, indent=2) # advance past the progress bar - print('') + print("") # report statistics def plural(num, text): return "%d %s%s" % (num, text, "" if num == 1 else "s") - print(" %s imported to %s in %.2f secs" % (plural(sum(x.rows_written for x in sources), "row"), - plural(len(sources), "table"), time.time() - start_time)) + print( + " %s imported to %s in %.2f secs" + % ( + plural(sum(x.rows_written for x in sources), "row"), + plural(len(sources), "table"), + time.time() - start_time, + ) + ) # report debug statistics if options.debug: - print('Debug timing:') + print("Debug timing:") for key, value in sorted(timing_sums.items(), key=lambda x: x[0]): - print(' %s: %.2f' % (key, value)) + print(" %s: %.2f" % (key, value)) finally: signal.signal(signal.SIGINT, signal.SIG_DFL) @@ -1326,7 +1508,9 @@ def plural(num, text): for warning in warnings: print("%s" % warning[1], file=sys.stderr) if options.debug: - print("%s traceback: %s" % (warning[0].__name__, warning[2]), file=sys.stderr) + print( + "%s traceback: %s" % (warning[0].__name__, warning[2]), file=sys.stderr + ) if len(warning) == 4: print("In file: %s" % warning[3], file=sys.stderr) @@ -1339,12 +1523,11 @@ def plural(num, text): def parse_sources(options, files_ignored=None): - def parse_info_file(path): primary_key = None indexes = [] write_hook = None - with open(path, 'r') as info_file: + with open(path, "r") as info_file: metadata = json.load(info_file) if "primary_key" in metadata: primary_key = metadata["primary_key"] @@ -1354,13 +1537,15 @@ def parse_info_file(path): write_hook = metadata["write_hook"] return primary_key, indexes, write_hook - has_write_hooks = utils_common.check_minimum_version(options, '2.3.7', False) + has_write_hooks = utils_common.check_minimum_version(options, "2.3.7", False) sources = set() if files_ignored is None: files_ignored = [] if options.directory and options.file: - raise RuntimeError("Error: Both --directory and --file cannot be specified together") + raise RuntimeError( + "Error: Both --directory and --file cannot be specified together" + ) elif options.file: db, table = options.import_table path, ext = os.path.splitext(options.file) @@ -1370,18 +1555,24 @@ def parse_info_file(path): elif ext == ".csv": table_type = CsvSourceFile table_type_options = { - 'no_header_row': options.no_header, - 'custom_header': options.custom_header + "no_header_row": options.no_header, + "custom_header": options.custom_header, } else: raise Exception("The table type is not recognised: %s" % ext) # - parse the info file if it exists - primary_key = options.create_args.get('primary_key', None) if options.create_args else None + primary_key = ( + options.create_args.get("primary_key", None) + if options.create_args + else None + ) indexes = [] write_hook = None info_path = path + ".info" - if (primary_key is None or options.indexes is not False) and os.path.isfile(info_path): + if (primary_key is None or options.indexes is not False) and os.path.isfile( + info_path + ): info_primary_key, info_indexes, info_write_hook = parse_info_file(info_path) if primary_key is None: primary_key = info_primary_key @@ -1390,17 +1581,18 @@ def parse_info_file(path): if write_hook is None: write_hook = info_write_hook if write_hook and not has_write_hooks: - raise Exception('this RDB version doesn\'t support write-hooks') + raise Exception("this RDB version doesn't support write-hooks") sources.add( table_type( source=options.file, - db=db, table=table, + db=db, + table=table, query_runner=options.retryQuery, primary_key=primary_key, indexes=indexes, write_hook=write_hook, - source_options=table_type_options + source_options=table_type_options, ) ) elif options.directory: @@ -1454,9 +1646,13 @@ def parse_info_file(path): if not os.path.isfile(info_path): files_ignored.append(os.path.join(root, filename)) else: - primary_key, indexes, write_hook = parse_info_file(info_path) + primary_key, indexes, write_hook = parse_info_file( + info_path + ) if write_hook and not has_write_hooks: - raise Exception('RDB versions below doesn\'t support write-hooks') + raise Exception( + "RDB versions below doesn't support write-hooks" + ) table_type = None if ext == ".json": @@ -1464,29 +1660,42 @@ def parse_info_file(path): elif ext == ".csv": table_type = CsvSourceFile else: - raise Exception("The table type is not recognised: %s" % ext) + raise Exception( + "The table type is not recognised: %s" % ext + ) source = table_type( source=path, query_runner=options.retryQuery, - db=db, table=table, + db=db, + table=table, primary_key=primary_key, indexes=indexes, - write_hook=write_hook + write_hook=write_hook, ) # ensure we don't have a duplicate if table in sources: raise RuntimeError( - "Error: Duplicate db.table found in directory tree: %s.%s" % - (source.db, source.table)) + "Error: Duplicate db.table found in directory tree: %s.%s" + % (source.db, source.table) + ) sources.add(source) # Warn the user about the files that were ignored if len(files_ignored) > 0: - print("Unexpected files found in the specified directory. Importing a directory expects", file=sys.stderr) - print(" a directory from `rethinkdb export`. If you want to import individual tables", file=sys.stderr) - print(" import them as single files. The following files were ignored:", file=sys.stderr) + print( + "Unexpected files found in the specified directory. Importing a directory expects", + file=sys.stderr, + ) + print( + " a directory from `rethinkdb export`. If you want to import individual tables", + file=sys.stderr, + ) + print( + " import them as single files. The following files were ignored:", + file=sys.stderr, + ) for ignored_file in files_ignored: print("%s" % str(ignored_file), file=sys.stderr) else: diff --git a/rethinkdb/_index_rebuild.py b/rethinkdb/_index_rebuild.py index 77c36dd2..b12997b1 100755 --- a/rethinkdb/_index_rebuild.py +++ b/rethinkdb/_index_rebuild.py @@ -30,9 +30,11 @@ from rethinkdb import query, utils_common -usage = "rethinkdb index-rebuild [-c HOST:PORT] [-n NUM] [-r (DB | DB.TABLE)] [--tls-cert FILENAME] [-p] " \ - "[--password-file FILENAME]..." -help_epilog = ''' +usage = ( + "rethinkdb index-rebuild [-c HOST:PORT] [-n NUM] [-r (DB | DB.TABLE)] [--tls-cert FILENAME] [-p] " + "[--password-file FILENAME]..." +) +help_epilog = """ FILE: the archive file to restore data from EXAMPLES: @@ -43,14 +45,16 @@ rethinkdb index-rebuild -r test -r production.users -n 5 rebuild all outdated secondary indexes from a local cluster on all tables in the 'test' database as well as the 'production.users' table, five at a time -''' +""" # Prefix used for indexes that are being rebuilt -TMP_INDEX_PREFIX = '$reql_temp_index$_' +TMP_INDEX_PREFIX = "$reql_temp_index$_" def parse_options(argv, prog=None): - parser = utils_common.CommonOptionsParser(usage=usage, epilog=help_epilog, prog=prog) + parser = utils_common.CommonOptionsParser( + usage=usage, epilog=help_epilog, prog=prog + ) parser.add_option( "-r", @@ -60,21 +64,32 @@ def parse_options(argv, prog=None): default=[], help="databases or tables to rebuild indexes on (default: all, may be specified multiple times)", action="append", - type="db_table") + type="db_table", + ) parser.add_option( "-n", dest="concurrent", metavar="NUM", default=1, help="concurrent indexes to rebuild (default: 1)", - type="pos_int") - parser.add_option("--force", dest="force", action="store_true", default=False, help="rebuild non-outdated indexes") + type="pos_int", + ) + parser.add_option( + "--force", + dest="force", + action="store_true", + default=False, + help="rebuild non-outdated indexes", + ) options, args = parser.parse_args(argv) # Check validity of arguments if len(args) != 0: - parser.error("Error: No positional arguments supported. Unrecognized option '%s'" % args[0]) + parser.error( + "Error: No positional arguments supported. Unrecognized option '%s'" + % args[0] + ) return options @@ -84,44 +99,58 @@ def rebuild_indexes(options): # flesh out options.db_table if not options.db_table: options.db_table = [ - utils_common.DbTable(x['db'], x['name']) for x in - options.retryQuery('all tables', query.db('rethinkdb').table('table_config').pluck(['db', 'name'])) + utils_common.DbTable(x["db"], x["name"]) + for x in options.retryQuery( + "all tables", + query.db("rethinkdb").table("table_config").pluck(["db", "name"]), + ) ] else: for db_table in options.db_table[:]: # work from a copy if not db_table[1]: options.db_table += [ - utils_common.DbTable(db_table[0], x) for x in options.retryQuery('table list of %s' % db_table[0], - query.db(db_table[0]).table_list()) + utils_common.DbTable(db_table[0], x) + for x in options.retryQuery( + "table list of %s" % db_table[0], + query.db(db_table[0]).table_list(), + ) ] del options.db_table[db_table] # wipe out any indexes with the TMP_INDEX_PREFIX for db, table in options.db_table: - for index in options.retryQuery('list indexes on %s.%s' % (db, table), query.db(db).table(table).index_list()): + for index in options.retryQuery( + "list indexes on %s.%s" % (db, table), + query.db(db).table(table).index_list(), + ): if index.startswith(TMP_INDEX_PREFIX): options.retryQuery( - 'drop index: %s.%s:%s' % - (db, - table, - index), - query.db( - index['db']).table( - index['table']).index_drop( - index['name'])) + "drop index: %s.%s:%s" % (db, table, index), + query.db(index["db"]) + .table(index["table"]) + .index_drop(index["name"]), + ) # get the list of indexes to rebuild indexes_to_build = [] for db, table in options.db_table: indexes = None if not options.force: - indexes = options.retryQuery('get outdated indexes from %s.%s' % (db, table), query.db( - db).table(table).index_status().filter({'outdated': True}).get_field('index')) + indexes = options.retryQuery( + "get outdated indexes from %s.%s" % (db, table), + query.db(db) + .table(table) + .index_status() + .filter({"outdated": True}) + .get_field("index"), + ) else: - indexes = options.retryQuery('get all indexes from %s.%s' % - (db, table), query.db(db).table(table).index_status().get_field('index')) + indexes = options.retryQuery( + "get all indexes from %s.%s" % (db, table), + query.db(db).table(table).index_status().get_field("index"), + ) for index in indexes: - indexes_to_build.append({'db': db, 'table': table, 'name': index}) + indexes_to_build.append({"db": db, "table": table, "name": index}) # rebuild selected indexes @@ -132,37 +161,53 @@ def rebuild_indexes(options): indexes_in_progress = [] if not options.quiet: - print("Rebuilding %d index%s: %s" % (total_indexes, 'es' if total_indexes > 1 else '', - ", ".join(["`%(db)s.%(table)s:%(name)s`" % i for i in indexes_to_build]))) + print( + "Rebuilding %d index%s: %s" + % ( + total_indexes, + "es" if total_indexes > 1 else "", + ", ".join( + ["`%(db)s.%(table)s:%(name)s`" % i for i in indexes_to_build] + ), + ) + ) while len(indexes_to_build) > 0 or len(indexes_in_progress) > 0: # Make sure we're running the right number of concurrent index rebuilds - while len(indexes_to_build) > 0 and len(indexes_in_progress) < options.concurrent: + while ( + len(indexes_to_build) > 0 and len(indexes_in_progress) < options.concurrent + ): index = indexes_to_build.pop() indexes_in_progress.append(index) - index['temp_name'] = TMP_INDEX_PREFIX + index['name'] - index['progress'] = 0 - index['ready'] = False + index["temp_name"] = TMP_INDEX_PREFIX + index["name"] + index["progress"] = 0 + index["ready"] = False existing_indexes = dict( - (x['index'], - x['function']) for x in options.retryQuery( - 'existing indexes', - query.db( - index['db']).table( - index['table']).index_status().pluck( - 'index', - 'function'))) - - if index['name'] not in existing_indexes: - raise AssertionError('{index_name} is not part of existing indexes {indexes}'.format( - index_name=index['name'], - indexes=', '.join(existing_indexes) - )) - - if index['temp_name'] not in existing_indexes: - options.retryQuery('create temp index: %(db)s.%(table)s:%(name)s' % index, query.db(index['db']).table( - index['table']).index_create(index['temp_name'], existing_indexes[index['name']])) + (x["index"], x["function"]) + for x in options.retryQuery( + "existing indexes", + query.db(index["db"]) + .table(index["table"]) + .index_status() + .pluck("index", "function"), + ) + ) + + if index["name"] not in existing_indexes: + raise AssertionError( + "{index_name} is not part of existing indexes {indexes}".format( + index_name=index["name"], indexes=", ".join(existing_indexes) + ) + ) + + if index["temp_name"] not in existing_indexes: + options.retryQuery( + "create temp index: %(db)s.%(table)s:%(name)s" % index, + query.db(index["db"]) + .table(index["table"]) + .index_create(index["temp_name"], existing_indexes[index["name"]]), + ) # Report progress highest_progress = max(highest_progress, progress_ratio) @@ -174,28 +219,33 @@ def rebuild_indexes(options): for index in indexes_in_progress: status = options.retryQuery( "progress `%(db)s.%(table)s` index `%(name)s`" % index, - query.db(index['db']).table(index['table']).index_status(index['temp_name']).nth(0) + query.db(index["db"]) + .table(index["table"]) + .index_status(index["temp_name"]) + .nth(0), ) - if status['ready']: - index['ready'] = True + if status["ready"]: + index["ready"] = True options.retryQuery( - "rename `%(db)s.%(table)s` index `%(name)s`" % - index, - query.db( - index['db']).table( - index['table']).index_rename( - index['temp_name'], - index['name'], - overwrite=True)) + "rename `%(db)s.%(table)s` index `%(name)s`" % index, + query.db(index["db"]) + .table(index["table"]) + .index_rename(index["temp_name"], index["name"], overwrite=True), + ) else: - progress_ratio += status.get('progress', 0) / total_indexes + progress_ratio += status.get("progress", 0) / total_indexes - indexes_in_progress = [index for index in indexes_in_progress if not index['ready']] - indexes_completed = total_indexes - len(indexes_to_build) - len(indexes_in_progress) + indexes_in_progress = [ + index for index in indexes_in_progress if not index["ready"] + ] + indexes_completed = ( + total_indexes - len(indexes_to_build) - len(indexes_in_progress) + ) progress_ratio += float(indexes_completed) / total_indexes - if len(indexes_in_progress) == options.concurrent or \ - (len(indexes_in_progress) > 0 and len(indexes_to_build) == 0): + if len(indexes_in_progress) == options.concurrent or ( + len(indexes_in_progress) > 0 and len(indexes_to_build) == 0 + ): # Short sleep to keep from killing the CPU time.sleep(0.1) diff --git a/rethinkdb/_restore.py b/rethinkdb/_restore.py index f6b49beb..23633f94 100755 --- a/rethinkdb/_restore.py +++ b/rethinkdb/_restore.py @@ -18,7 +18,7 @@ # Copyright 2010-2016 RethinkDB, all rights reserved. -'''`rethinkdb restore` loads data into a RethinkDB cluster from an archive''' +"""`rethinkdb restore` loads data into a RethinkDB cluster from an archive""" from __future__ import print_function @@ -35,9 +35,11 @@ from rethinkdb import _import, utils_common -usage = "rethinkdb restore FILE [-c HOST:PORT] [--tls-cert FILENAME] [-p] [--password-file FILENAME] [--clients NUM] " \ - "[--shards NUM_SHARDS] [--replicas NUM_REPLICAS] [--force] [-i (DB | DB.TABLE)]..." -help_epilog = ''' +usage = ( + "rethinkdb restore FILE [-c HOST:PORT] [--tls-cert FILENAME] [-p] [--password-file FILENAME] [--clients NUM] " + "[--shards NUM_SHARDS] [--replicas NUM_REPLICAS] [--force] [-i (DB | DB.TABLE)]..." +) +help_epilog = """ FILE: the archive file to restore data from; if FILE is -, use standard input (note that @@ -60,11 +62,13 @@ rethinkdb restore rdb_dump.tar.gz --clients 4 --force Import data to a local cluster from the named archive file using only 4 client connections and overwriting any existing rows with the same primary key. -''' +""" def parse_options(argv, prog=None): - parser = utils_common.CommonOptionsParser(usage=usage, epilog=help_epilog, prog=prog) + parser = utils_common.CommonOptionsParser( + usage=usage, epilog=help_epilog, prog=prog + ) parser.add_option( "-i", @@ -74,48 +78,80 @@ def parse_options(argv, prog=None): default=[], help="limit restore to the given database or table (may be specified multiple times)", action="append", - type="db_table") - - parser.add_option("--temp-dir", dest="temp_dir", metavar="DIR", default=None, - help="directory to use for intermediary results") - parser.add_option("--clients", dest="clients", metavar="CLIENTS", default=8, - help="client connections to use (default: 8)", type="pos_int") - parser.add_option("--hard-durability", dest="durability", action="store_const", default="soft", - help="use hard durability writes (slower, uses less memory)", const="hard") - parser.add_option("--force", dest="force", action="store_true", default=False, - help="import data even if a table already exists") - parser.add_option("--no-secondary-indexes", dest="indexes", action="store_false", - default=None, help="do not create secondary indexes for the restored tables") + type="db_table", + ) + + parser.add_option( + "--temp-dir", + dest="temp_dir", + metavar="DIR", + default=None, + help="directory to use for intermediary results", + ) + parser.add_option( + "--clients", + dest="clients", + metavar="CLIENTS", + default=8, + help="client connections to use (default: 8)", + type="pos_int", + ) + parser.add_option( + "--hard-durability", + dest="durability", + action="store_const", + default="soft", + help="use hard durability writes (slower, uses less memory)", + const="hard", + ) + parser.add_option( + "--force", + dest="force", + action="store_true", + default=False, + help="import data even if a table already exists", + ) + parser.add_option( + "--no-secondary-indexes", + dest="indexes", + action="store_false", + default=None, + help="do not create secondary indexes for the restored tables", + ) parser.add_option( "--writers-per-table", dest="writers", default=multiprocessing.cpu_count(), help=optparse.SUPPRESS_HELP, - type="pos_int") + type="pos_int", + ) parser.add_option( "--batch-size", dest="batch_size", default=utils_common.default_batch_size, help=optparse.SUPPRESS_HELP, - type="pos_int") + type="pos_int", + ) # Replication settings - replication_options_group = optparse.OptionGroup(parser, 'Replication Options') + replication_options_group = optparse.OptionGroup(parser, "Replication Options") replication_options_group.add_option( "--shards", dest="create_args", metavar="SHARDS", help="shards to setup on created tables (default: 1)", type="pos_int", - action="add_key") + action="add_key", + ) replication_options_group.add_option( "--replicas", dest="create_args", metavar="REPLICAS", help="replicas to setup on created tables (default: 1)", type="pos_int", - action="add_key") + action="add_key", + ) parser.add_option_group(replication_options_group) options, args = parser.parse_args(argv) @@ -124,11 +160,13 @@ def parse_options(argv, prog=None): # - archive if len(args) == 0: - parser.error("Archive to import not specified. Provide an archive file created by rethinkdb-dump.") + parser.error( + "Archive to import not specified. Provide an archive file created by rethinkdb-dump." + ) elif len(args) != 1: parser.error("Only one positional argument supported") options.in_file = args[0] - if options.in_file == '-': + if options.in_file == "-": options.in_file = sys.stdin else: if not os.path.isfile(options.in_file): @@ -138,7 +176,10 @@ def parse_options(argv, prog=None): # - temp_dir if options.temp_dir: if not os.path.isdir(options.temp_dir): - parser.error("Temporary directory doesn't exist or is not a directory: %s" % options.temp_dir) + parser.error( + "Temporary directory doesn't exist or is not a directory: %s" + % options.temp_dir + ) if not os.access(options["temp_dir"], os.W_OK): parser.error("Temporary directory inaccessible: %s" % options.temp_dir) @@ -152,7 +193,7 @@ def parse_options(argv, prog=None): def do_unzip(temp_dir, options): - '''extract the tarfile to the filesystem''' + """extract the tarfile to the filesystem""" tables_to_export = set(options.db_tables) top_level = None @@ -161,7 +202,7 @@ def do_unzip(temp_dir, options): archive = None tarfile_options = { "mode": "r|*", - "fileobj" if hasattr(options.in_file, "read") else "name": options.in_file + "fileobj" if hasattr(options.in_file, "read") else "name": options.in_file, } try: archive = tarfile.open(**tarfile_options) @@ -171,7 +212,9 @@ def do_unzip(temp_dir, options): continue # skip everything but files # normalize the path - relpath = os.path.relpath(os.path.realpath(tarinfo.name.strip().lstrip(os.sep))) + relpath = os.path.relpath( + os.path.realpath(tarinfo.name.strip().lstrip(os.sep)) + ) # skip things that try to jump out of the folder if relpath.startswith(os.path.pardir): @@ -187,18 +230,24 @@ def do_unzip(temp_dir, options): try: top, db, file_name = relpath.split(os.sep) except ValueError: - raise RuntimeError("Error: Archive file has an unexpected directory structure: %s" % tarinfo.name) + raise RuntimeError( + "Error: Archive file has an unexpected directory structure: %s" + % tarinfo.name + ) if not top_level: top_level = top elif top != top_level: raise RuntimeError( - "Error: Archive file has an unexpected directory structure (%s vs %s)" % - (top, top_level)) + "Error: Archive file has an unexpected directory structure (%s vs %s)" + % (top, top_level) + ) # filter out tables we are not looking for table = os.path.splitext(file_name) - if tables_to_export and not ((db, table) in tables_to_export or (db, None) in tables_to_export): + if tables_to_export and not ( + (db, table) in tables_to_export or (db, None) in tables_to_export + ): continue # skip without comment # write the file out @@ -208,7 +257,7 @@ def do_unzip(temp_dir, options): if not os.path.exists(os.path.dirname(dest_path)): os.makedirs(os.path.dirname(dest_path)) - with open(dest_path, 'wb') as dest: + with open(dest_path, "wb") as dest: source = archive.extractfile(tarinfo) chunk = True while chunk: @@ -217,7 +266,11 @@ def do_unzip(temp_dir, options): source.close() if not os.path.isfile(dest_path): - raise AssertionError('Was not able to write {destination_path}'.format(destination_path=dest_path)) + raise AssertionError( + "Was not able to write {destination_path}".format( + destination_path=dest_path + ) + ) finally: if archive: @@ -264,11 +317,13 @@ def do_restore(options): if options.debug: traceback.print_exc() if str(ex) == "Warnings occurred during import": - raise RuntimeError("Warning: import did not create some secondary indexes.") + raise RuntimeError( + "Warning: import did not create some secondary indexes." + ) else: error_string = str(ex) - if error_string.startswith('Error: '): - error_string = error_string[len('Error: '):] + if error_string.startswith("Error: "): + error_string = error_string[len("Error: ") :] raise RuntimeError("Error: import failed: %s" % error_string) # 'Done' message will be printed by the import script finally: diff --git a/rethinkdb/ast.py b/rethinkdb/ast.py index 34fa40bb..3b9fddc6 100644 --- a/rethinkdb/ast.py +++ b/rethinkdb/ast.py @@ -15,7 +15,7 @@ # This file incorporates work covered by the following copyright: # Copyright 2010-2016 RethinkDB, all rights reserved. -__all__ = ['expr', 'RqlQuery', 'ReQLEncoder', 'ReQLDecoder', 'Repl'] +__all__ = ["expr", "RqlQuery", "ReQLEncoder", "ReQLDecoder", "Repl"] import base64 @@ -51,7 +51,7 @@ class Repl(object): @classmethod def get(cls): - if 'repl' in cls.thread_data.__dict__: + if "repl" in cls.thread_data.__dict__: return cls.thread_data.repl else: return None @@ -63,18 +63,19 @@ def set(cls, conn): @classmethod def clear(cls): - if 'repl' in cls.thread_data.__dict__: + if "repl" in cls.thread_data.__dict__: del cls.thread_data.repl cls.repl_active = False + # This is both an external function and one used extensively # internally to convert coerce python values to RQL types def expr(val, nesting_depth=20): - ''' + """ Convert a Python primitive into a RQL primitive value - ''' + """ if not isinstance(nesting_depth, int): raise ReqlDriverCompileError("Second argument to `r.expr` must be a number.") @@ -86,14 +87,17 @@ def expr(val, nesting_depth=20): elif isinstance(val, collections.Callable): return Func(val) elif isinstance(val, (datetime.datetime, datetime.date)): - if not hasattr(val, 'tzinfo') or not val.tzinfo: - raise ReqlDriverCompileError("""Cannot convert %s to ReQL time object + if not hasattr(val, "tzinfo") or not val.tzinfo: + raise ReqlDriverCompileError( + """Cannot convert %s to ReQL time object without timezone information. You can add timezone information with the third party module \"pytz\" or by constructing ReQL compatible timezone values with r.make_timezone(\"[+-]HH:MM\"). Alternatively, use one of ReQL's bultin time constructors, r.now, r.time, or r.iso8601. - """ % (type(val).__name__)) + """ + % (type(val).__name__) + ) return ISO8601(val.isoformat()) elif isinstance(val, RqlBinary): return Binary(val) @@ -135,7 +139,9 @@ def run(self, c=None, **global_optargs): "`repl()` on another thread, but not this one." ) else: - raise ReqlDriverError("RqlQuery.run must be given a connection to run on.") + raise ReqlDriverError( + "RqlQuery.run must be given a connection to run on." + ) return c._start(self, **global_optargs) @@ -386,11 +392,15 @@ def set_difference(self, *args): def __getitem__(self, index): if isinstance(index, slice): if index.stop: - return Slice(self, index.start or 0, index.stop, - bracket_operator=True) + return Slice(self, index.start or 0, index.stop, bracket_operator=True) else: - return Slice(self, index.start or 0, -1, - right_bound='closed', bracket_operator=True) + return Slice( + self, + index.start or 0, + -1, + right_bound="closed", + bracket_operator=True, + ) else: return Bracket(self, index, bracket_operator=True) @@ -398,7 +408,8 @@ def __iter__(*args, **kwargs): raise ReqlDriverError( "__iter__ called on an RqlQuery object.\n" "To iterate over the results of a query, call run first.\n" - "To iterate inside a query, use map or for_each.") + "To iterate inside a query, use map or for_each." + ) def get_field(self, *args): return GetField(self, *args) @@ -457,7 +468,7 @@ def max(self, *args, **kwargs): def map(self, *args): if len(args) > 0: # `func_wrap` only the last argument - return Map(self, *(args[:-1] + (func_wrap(args[-1]), ))) + return Map(self, *(args[:-1] + (func_wrap(args[-1]),))) else: return Map(self) @@ -470,7 +481,7 @@ def fold(self, *args, **kwargs): kwfuncargs = {} for arg_name in kwargs: kwfuncargs[arg_name] = func_wrap(kwargs[arg_name]) - return Fold(self, *(args[:-1] + (func_wrap(args[-1]), )), **kwfuncargs) + return Fold(self, *(args[:-1] + (func_wrap(args[-1]),)), **kwfuncargs) else: return Fold(self) @@ -481,8 +492,7 @@ def concat_map(self, *args): return ConcatMap(self, *[func_wrap(arg) for arg in args]) def order_by(self, *args, **kwargs): - args = [arg if isinstance(arg, (Asc, Desc)) else func_wrap(arg) - for arg in args] + args = [arg if isinstance(arg, (Asc, Desc)) else func_wrap(arg) for arg in args] return OrderBy(self, *args, **kwargs) def between(self, *args, **kwargs): @@ -623,22 +633,24 @@ def set_infix(self): self.infix = True def compose(self, args, optargs): - t_args = [T('r.expr(', args[i], ')') - if needs_wrap(self._args[i]) else args[i] - for i in xrange(len(args))] + t_args = [ + T("r.expr(", args[i], ")") if needs_wrap(self._args[i]) else args[i] + for i in xrange(len(args)) + ] if self.infix: - return T('(', T(*t_args, intsp=[' ', self.statement_infix, ' ']), ')') + return T("(", T(*t_args, intsp=[" ", self.statement_infix, " "]), ")") else: - return T('r.', self.statement, '(', T(*t_args, intsp=', '), ')') + return T("r.", self.statement, "(", T(*t_args, intsp=", "), ")") class RqlBiOperQuery(RqlQuery): def compose(self, args, optargs): - t_args = [T('r.expr(', args[i], ')') - if needs_wrap(self._args[i]) else args[i] - for i in xrange(len(args))] - return T('(', T(*t_args, intsp=[' ', self.statement, ' ']), ')') + t_args = [ + T("r.expr(", args[i], ")") if needs_wrap(self._args[i]) else args[i] + for i in xrange(len(args)) + ] + return T("(", T(*t_args, intsp=[" ", self.statement, " "]), ")") class RqlBiCompareOperQuery(RqlBiOperQuery): @@ -654,40 +666,41 @@ def __init__(self, *args, **optargs): "This is almost always a precedence error.\n" "Note that `a < b | b < c` <==> `a < (b | b) < c`.\n" "If you really want this behavior, use `.or_` or " - "`.and_` instead.") - raise ReqlDriverCompileError(err % - (self.statement, - QueryPrinter(self).print_query())) + "`.and_` instead." + ) + raise ReqlDriverCompileError( + err % (self.statement, QueryPrinter(self).print_query()) + ) except AttributeError: pass # No infix attribute, so not possible to be an infix bool operator class RqlTopLevelQuery(RqlQuery): def compose(self, args, optargs): - args.extend([T(key, '=', value) for key, value in dict_items(optargs)]) - return T('r.', self.statement, '(', T(*(args), intsp=', '), ')') + args.extend([T(key, "=", value) for key, value in dict_items(optargs)]) + return T("r.", self.statement, "(", T(*(args), intsp=", "), ")") class RqlMethodQuery(RqlQuery): def compose(self, args, optargs): if len(args) == 0: - return T('r.', self.statement, '()') + return T("r.", self.statement, "()") if needs_wrap(self._args[0]): - args[0] = T('r.expr(', args[0], ')') + args[0] = T("r.expr(", args[0], ")") restargs = args[1:] - restargs.extend([T(k, '=', v) for k, v in dict_items(optargs)]) - restargs = T(*restargs, intsp=', ') + restargs.extend([T(k, "=", v) for k, v in dict_items(optargs)]) + restargs = T(*restargs, intsp=", ") - return T(args[0], '.', self.statement, '(', restargs, ')') + return T(args[0], ".", self.statement, "(", restargs, ")") class RqlBracketQuery(RqlMethodQuery): def __init__(self, *args, **optargs): - if 'bracket_operator' in optargs: - self.bracket_operator = optargs['bracket_operator'] - del optargs['bracket_operator'] + if "bracket_operator" in optargs: + self.bracket_operator = optargs["bracket_operator"] + del optargs["bracket_operator"] else: self.bracket_operator = False @@ -696,15 +709,15 @@ def __init__(self, *args, **optargs): def compose(self, args, optargs): if self.bracket_operator: if needs_wrap(self._args[0]): - args[0] = T('r.expr(', args[0], ')') - return T(args[0], '[', T(*args[1:], intsp=[',']), ']') + args[0] = T("r.expr(", args[0], ")") + return T(args[0], "[", T(*args[1:], intsp=[","]), "]") else: return RqlMethodQuery.compose(self, args, optargs) class RqlTzinfo(datetime.tzinfo): def __init__(self, offsetstr): - hours, minutes = map(int, offsetstr.split(':')) + hours, minutes = map(int, offsetstr.split(":")) self.offsetstr = offsetstr self.delta = datetime.timedelta(hours=hours, minutes=minutes) @@ -738,19 +751,24 @@ def recursively_make_hashable(obj): if isinstance(obj, list): return tuple([recursively_make_hashable(i) for i in obj]) elif isinstance(obj, dict): - return frozenset([(k, recursively_make_hashable(v)) - for k, v in dict_items(obj)]) + return frozenset( + [(k, recursively_make_hashable(v)) for k, v in dict_items(obj)] + ) return obj class ReQLEncoder(json.JSONEncoder): - ''' + """ Default JSONEncoder subclass to handle query conversion. - ''' + """ def __init__(self): json.JSONEncoder.__init__( - self, ensure_ascii=False, allow_nan=False, check_circular=False, separators=(',', ':') + self, + ensure_ascii=False, + allow_nan=False, + check_circular=False, + separators=(",", ":"), ) def default(self, obj): @@ -760,70 +778,86 @@ def default(self, obj): class ReQLDecoder(json.JSONDecoder): - ''' + """ Default JSONDecoder subclass to handle pseudo-type conversion. - ''' + """ def __init__(self, reql_format_opts=None): json.JSONDecoder.__init__(self, object_hook=self.convert_pseudotype) self.reql_format_opts = reql_format_opts or {} def convert_time(self, obj): - if 'epoch_time' not in obj: - raise ReqlDriverError(('pseudo-type TIME object %s does not ' + - 'have expected field "epoch_time".') - % json.dumps(obj)) - - if 'timezone' in obj: - return datetime.datetime.fromtimestamp(obj['epoch_time'], - RqlTzinfo(obj['timezone'])) + if "epoch_time" not in obj: + raise ReqlDriverError( + ( + "pseudo-type TIME object %s does not " + + 'have expected field "epoch_time".' + ) + % json.dumps(obj) + ) + + if "timezone" in obj: + return datetime.datetime.fromtimestamp( + obj["epoch_time"], RqlTzinfo(obj["timezone"]) + ) else: - return datetime.datetime.utcfromtimestamp(obj['epoch_time']) + return datetime.datetime.utcfromtimestamp(obj["epoch_time"]) @staticmethod def convert_grouped_data(obj): - if 'data' not in obj: - raise ReqlDriverError(('pseudo-type GROUPED_DATA object' + - ' %s does not have the expected field "data".') - % json.dumps(obj)) - return dict([(recursively_make_hashable(k), v) for k, v in obj['data']]) + if "data" not in obj: + raise ReqlDriverError( + ( + "pseudo-type GROUPED_DATA object" + + ' %s does not have the expected field "data".' + ) + % json.dumps(obj) + ) + return dict([(recursively_make_hashable(k), v) for k, v in obj["data"]]) @staticmethod def convert_binary(obj): - if 'data' not in obj: - raise ReqlDriverError(('pseudo-type BINARY object %s does not have ' + - 'the expected field "data".') - % json.dumps(obj)) - return RqlBinary(base64.b64decode(obj['data'].encode('utf-8'))) + if "data" not in obj: + raise ReqlDriverError( + ( + "pseudo-type BINARY object %s does not have " + + 'the expected field "data".' + ) + % json.dumps(obj) + ) + return RqlBinary(base64.b64decode(obj["data"].encode("utf-8"))) def convert_pseudotype(self, obj): - reql_type = obj.get('$reql_type$') + reql_type = obj.get("$reql_type$") if reql_type is not None: - if reql_type == 'TIME': - time_format = self.reql_format_opts.get('time_format') - if time_format is None or time_format == 'native': + if reql_type == "TIME": + time_format = self.reql_format_opts.get("time_format") + if time_format is None or time_format == "native": # Convert to native python datetime object return self.convert_time(obj) - elif time_format != 'raw': - raise ReqlDriverError("Unknown time_format run option \"%s\"." - % time_format) - elif reql_type == 'GROUPED_DATA': - group_format = self.reql_format_opts.get('group_format') - if group_format is None or group_format == 'native': + elif time_format != "raw": + raise ReqlDriverError( + 'Unknown time_format run option "%s".' % time_format + ) + elif reql_type == "GROUPED_DATA": + group_format = self.reql_format_opts.get("group_format") + if group_format is None or group_format == "native": return self.convert_grouped_data(obj) - elif group_format != 'raw': - raise ReqlDriverError("Unknown group_format run option \"%s\"." - % group_format) - elif reql_type == 'GEOMETRY': + elif group_format != "raw": + raise ReqlDriverError( + 'Unknown group_format run option "%s".' % group_format + ) + elif reql_type == "GEOMETRY": # No special support for this. Just return the raw object return obj - elif reql_type == 'BINARY': - binary_format = self.reql_format_opts.get('binary_format') - if binary_format is None or binary_format == 'native': + elif reql_type == "BINARY": + binary_format = self.reql_format_opts.get("binary_format") + if binary_format is None or binary_format == "native": return self.convert_binary(obj) - elif binary_format != 'raw': - raise ReqlDriverError("Unknown binary_format run option \"%s\"." - % binary_format) + elif binary_format != "raw": + raise ReqlDriverError( + 'Unknown binary_format run option "%s".' % binary_format + ) else: raise ReqlDriverError("Unknown pseudo-type %s" % reql_type) # If there was no pseudotype, or the relevant format is raw, return @@ -854,7 +888,7 @@ class MakeArray(RqlQuery): term_type = P_TERM.MAKE_ARRAY def compose(self, args, optargs): - return T('[', T(*args, intsp=', '), ']') + return T("[", T(*args, intsp=", "), "]") class MakeObj(RqlQuery): @@ -875,16 +909,21 @@ def build(self): return self.optargs def compose(self, args, optargs): - return T('r.expr({', T(*[T(repr(key), ': ', value) - for key, value in dict_items(optargs)], - intsp=', '), '})') + return T( + "r.expr({", + T( + *[T(repr(key), ": ", value) for key, value in dict_items(optargs)], + intsp=", " + ), + "})", + ) class Var(RqlQuery): term_type = P_TERM.VAR def compose(self, args, optargs): - return 'var_' + args[0] + return "var_" + args[0] class JavaScript(RqlTopLevelQuery): @@ -924,7 +963,7 @@ def __call__(self, *args, **kwargs): raise TypeError("'r.row' is not callable, use 'r.row[...]' instead") def compose(self, args, optargs): - return 'r.row' + return "r.row" class Eq(RqlBiCompareOperQuery): @@ -962,8 +1001,8 @@ class Not(RqlQuery): def compose(self, args, optargs): if isinstance(self._args[0], Datum): - args[0] = T('r.expr(', args[0], ')') - return T('(~', args[0], ')') + args[0] = T("r.expr(", args[0], ")") + return T("(~", args[0], ")") class Add(RqlBiOperQuery): @@ -1023,17 +1062,17 @@ class BitSar(RqlBoolOperQuery): class Floor(RqlMethodQuery): term_type = P_TERM.FLOOR - statement = 'floor' + statement = "floor" class Ceil(RqlMethodQuery): term_type = P_TERM.CEIL - statement = 'ceil' + statement = "ceil" class Round(RqlMethodQuery): term_type = P_TERM.ROUND - statement = 'round' + statement = "round" class Append(RqlMethodQuery): @@ -1073,91 +1112,91 @@ class SetDifference(RqlMethodQuery): class Slice(RqlBracketQuery): term_type = P_TERM.SLICE - statement = 'slice' + statement = "slice" # Slice has a special bracket syntax, implemented here def compose(self, args, optargs): if self.bracket_operator: if needs_wrap(self._args[0]): - args[0] = T('r.expr(', args[0], ')') - return T(args[0], '[', args[1], ':', args[2], ']') + args[0] = T("r.expr(", args[0], ")") + return T(args[0], "[", args[1], ":", args[2], "]") else: return RqlBracketQuery.compose(self, args, optargs) class Skip(RqlMethodQuery): term_type = P_TERM.SKIP - statement = 'skip' + statement = "skip" class Limit(RqlMethodQuery): term_type = P_TERM.LIMIT - statement = 'limit' + statement = "limit" class GetField(RqlBracketQuery): term_type = P_TERM.GET_FIELD - statement = 'get_field' + statement = "get_field" class Bracket(RqlBracketQuery): term_type = P_TERM.BRACKET - statement = 'bracket' + statement = "bracket" class Contains(RqlMethodQuery): term_type = P_TERM.CONTAINS - statement = 'contains' + statement = "contains" class HasFields(RqlMethodQuery): term_type = P_TERM.HAS_FIELDS - statement = 'has_fields' + statement = "has_fields" class WithFields(RqlMethodQuery): term_type = P_TERM.WITH_FIELDS - statement = 'with_fields' + statement = "with_fields" class Keys(RqlMethodQuery): term_type = P_TERM.KEYS - statement = 'keys' + statement = "keys" class Values(RqlMethodQuery): term_type = P_TERM.VALUES - statement = 'values' + statement = "values" class Object(RqlMethodQuery): term_type = P_TERM.OBJECT - statement = 'object' + statement = "object" class Pluck(RqlMethodQuery): term_type = P_TERM.PLUCK - statement = 'pluck' + statement = "pluck" class Without(RqlMethodQuery): term_type = P_TERM.WITHOUT - statement = 'without' + statement = "without" class Merge(RqlMethodQuery): term_type = P_TERM.MERGE - statement = 'merge' + statement = "merge" class Between(RqlMethodQuery): term_type = P_TERM.BETWEEN - statement = 'between' + statement = "between" class DB(RqlTopLevelQuery): term_type = P_TERM.DB - statement = 'db' + statement = "db" def table_list(self, *args): return TableList(self, *args) @@ -1203,18 +1242,17 @@ def __init__(self, *args): def compose(self, args, optargs): if len(args) != 2: - return T('r.do(', T(T(*(args[1:]), intsp=', '), args[0], - intsp=', '), ')') + return T("r.do(", T(T(*(args[1:]), intsp=", "), args[0], intsp=", "), ")") if isinstance(self._args[1], Datum): - args[1] = T('r.expr(', args[1], ')') + args[1] = T("r.expr(", args[1], ")") - return T(args[1], '.do(', args[0], ')') + return T(args[1], ".do(", args[0], ")") class Table(RqlQuery): term_type = P_TERM.TABLE - statement = 'table' + statement = "table" def insert(self, *args, **kwargs): return Insert(self, *[expr(arg) for arg in args], **kwargs) @@ -1282,201 +1320,201 @@ def uuid(self, *args, **kwargs): return UUID(self, *args, **kwargs) def compose(self, args, optargs): - args.extend([T(k, '=', v) for k, v in dict_items(optargs)]) + args.extend([T(k, "=", v) for k, v in dict_items(optargs)]) if isinstance(self._args[0], DB): - return T(args[0], '.table(', T(*(args[1:]), intsp=', '), ')') + return T(args[0], ".table(", T(*(args[1:]), intsp=", "), ")") else: - return T('r.table(', T(*(args), intsp=', '), ')') + return T("r.table(", T(*(args), intsp=", "), ")") class Get(RqlMethodQuery): term_type = P_TERM.GET - statement = 'get' + statement = "get" class GetAll(RqlMethodQuery): term_type = P_TERM.GET_ALL - statement = 'get_all' + statement = "get_all" class GetIntersecting(RqlMethodQuery): term_type = P_TERM.GET_INTERSECTING - statement = 'get_intersecting' + statement = "get_intersecting" class GetNearest(RqlMethodQuery): term_type = P_TERM.GET_NEAREST - statement = 'get_nearest' + statement = "get_nearest" class UUID(RqlMethodQuery): term_type = P_TERM.UUID - statement = 'uuid' + statement = "uuid" class Reduce(RqlMethodQuery): term_type = P_TERM.REDUCE - statement = 'reduce' + statement = "reduce" class Sum(RqlMethodQuery): term_type = P_TERM.SUM - statement = 'sum' + statement = "sum" class Avg(RqlMethodQuery): term_type = P_TERM.AVG - statement = 'avg' + statement = "avg" class Min(RqlMethodQuery): term_type = P_TERM.MIN - statement = 'min' + statement = "min" class Max(RqlMethodQuery): term_type = P_TERM.MAX - statement = 'max' + statement = "max" class Map(RqlMethodQuery): term_type = P_TERM.MAP - statement = 'map' + statement = "map" class Fold(RqlMethodQuery): term_type = P_TERM.FOLD - statement = 'fold' + statement = "fold" class Filter(RqlMethodQuery): term_type = P_TERM.FILTER - statement = 'filter' + statement = "filter" class ConcatMap(RqlMethodQuery): term_type = P_TERM.CONCAT_MAP - statement = 'concat_map' + statement = "concat_map" class OrderBy(RqlMethodQuery): term_type = P_TERM.ORDER_BY - statement = 'order_by' + statement = "order_by" class Distinct(RqlMethodQuery): term_type = P_TERM.DISTINCT - statement = 'distinct' + statement = "distinct" class Count(RqlMethodQuery): term_type = P_TERM.COUNT - statement = 'count' + statement = "count" class Union(RqlMethodQuery): term_type = P_TERM.UNION - statement = 'union' + statement = "union" class Nth(RqlBracketQuery): term_type = P_TERM.NTH - statement = 'nth' + statement = "nth" class Match(RqlMethodQuery): term_type = P_TERM.MATCH - statement = 'match' + statement = "match" class ToJsonString(RqlMethodQuery): term_type = P_TERM.TO_JSON_STRING - statement = 'to_json_string' + statement = "to_json_string" class Split(RqlMethodQuery): term_type = P_TERM.SPLIT - statement = 'split' + statement = "split" class Upcase(RqlMethodQuery): term_type = P_TERM.UPCASE - statement = 'upcase' + statement = "upcase" class Downcase(RqlMethodQuery): term_type = P_TERM.DOWNCASE - statement = 'downcase' + statement = "downcase" class OffsetsOf(RqlMethodQuery): term_type = P_TERM.OFFSETS_OF - statement = 'offsets_of' + statement = "offsets_of" class IsEmpty(RqlMethodQuery): term_type = P_TERM.IS_EMPTY - statement = 'is_empty' + statement = "is_empty" class Group(RqlMethodQuery): term_type = P_TERM.GROUP - statement = 'group' + statement = "group" class InnerJoin(RqlMethodQuery): term_type = P_TERM.INNER_JOIN - statement = 'inner_join' + statement = "inner_join" class OuterJoin(RqlMethodQuery): term_type = P_TERM.OUTER_JOIN - statement = 'outer_join' + statement = "outer_join" class EqJoin(RqlMethodQuery): term_type = P_TERM.EQ_JOIN - statement = 'eq_join' + statement = "eq_join" class Zip(RqlMethodQuery): term_type = P_TERM.ZIP - statement = 'zip' + statement = "zip" class CoerceTo(RqlMethodQuery): term_type = P_TERM.COERCE_TO - statement = 'coerce_to' + statement = "coerce_to" class Ungroup(RqlMethodQuery): term_type = P_TERM.UNGROUP - statement = 'ungroup' + statement = "ungroup" class TypeOf(RqlMethodQuery): term_type = P_TERM.TYPE_OF - statement = 'type_of' + statement = "type_of" class Update(RqlMethodQuery): term_type = P_TERM.UPDATE - statement = 'update' + statement = "update" class Delete(RqlMethodQuery): term_type = P_TERM.DELETE - statement = 'delete' + statement = "delete" class Replace(RqlMethodQuery): term_type = P_TERM.REPLACE - statement = 'replace' + statement = "replace" class Insert(RqlMethodQuery): term_type = P_TERM.INSERT - statement = 'insert' + statement = "insert" class DbCreate(RqlTopLevelQuery): @@ -1526,42 +1564,42 @@ class TableListTL(RqlTopLevelQuery): class SetWriteHook(RqlMethodQuery): term_type = P_TERM.SET_WRITE_HOOK - statement = 'set_write_hook' + statement = "set_write_hook" class GetWriteHook(RqlMethodQuery): term_type = P_TERM.GET_WRITE_HOOK - statement = 'get_write_hook' + statement = "get_write_hook" class IndexCreate(RqlMethodQuery): term_type = P_TERM.INDEX_CREATE - statement = 'index_create' + statement = "index_create" class IndexDrop(RqlMethodQuery): term_type = P_TERM.INDEX_DROP - statement = 'index_drop' + statement = "index_drop" class IndexRename(RqlMethodQuery): term_type = P_TERM.INDEX_RENAME - statement = 'index_rename' + statement = "index_rename" class IndexList(RqlMethodQuery): term_type = P_TERM.INDEX_LIST - statement = 'index_list' + statement = "index_list" class IndexStatus(RqlMethodQuery): term_type = P_TERM.INDEX_STATUS - statement = 'index_status' + statement = "index_status" class IndexWait(RqlMethodQuery): term_type = P_TERM.INDEX_WAIT - statement = 'index_wait' + statement = "index_wait" class Config(RqlMethodQuery): @@ -1581,27 +1619,27 @@ class Wait(RqlMethodQuery): class Reconfigure(RqlMethodQuery): term_type = P_TERM.RECONFIGURE - statement = 'reconfigure' + statement = "reconfigure" class Rebalance(RqlMethodQuery): term_type = P_TERM.REBALANCE - statement = 'rebalance' + statement = "rebalance" class Sync(RqlMethodQuery): term_type = P_TERM.SYNC - statement = 'sync' + statement = "sync" class Grant(RqlMethodQuery): term_type = P_TERM.GRANT - statement = 'grant' + statement = "grant" class GrantTL(RqlTopLevelQuery): term_type = P_TERM.GRANT - statement = 'grant' + statement = "grant" class Branch(RqlTopLevelQuery): @@ -1623,47 +1661,47 @@ class And(RqlBoolOperQuery): class ForEach(RqlMethodQuery): term_type = P_TERM.FOR_EACH - statement = 'for_each' + statement = "for_each" class Info(RqlMethodQuery): term_type = P_TERM.INFO - statement = 'info' + statement = "info" class InsertAt(RqlMethodQuery): term_type = P_TERM.INSERT_AT - statement = 'insert_at' + statement = "insert_at" class SpliceAt(RqlMethodQuery): term_type = P_TERM.SPLICE_AT - statement = 'splice_at' + statement = "splice_at" class DeleteAt(RqlMethodQuery): term_type = P_TERM.DELETE_AT - statement = 'delete_at' + statement = "delete_at" class ChangeAt(RqlMethodQuery): term_type = P_TERM.CHANGE_AT - statement = 'change_at' + statement = "change_at" class Sample(RqlMethodQuery): term_type = P_TERM.SAMPLE - statement = 'sample' + statement = "sample" class Json(RqlTopLevelQuery): term_type = P_TERM.JSON - statement = 'json' + statement = "json" class Args(RqlTopLevelQuery): term_type = P_TERM.ARGS - statement = 'args' + statement = "args" # Use this class as a wrapper to 'bytes' so we can tell the difference @@ -1673,20 +1711,25 @@ def __new__(cls, *args, **kwargs): return bytes.__new__(cls, *args, **kwargs) def __repr__(self): - excerpt = binascii.hexlify(self[0:6]).decode('utf-8') - excerpt = ' '.join([excerpt[i:i + 2] - for i in xrange(0, len(excerpt), 2)]) - excerpt = ', \'%s%s\'' % (excerpt, '...' if len(self) > 6 else '') \ - if len(self) > 0 else '' - return "" % (len(self), 's' - if len(self) != 1 else '', excerpt) + excerpt = binascii.hexlify(self[0:6]).decode("utf-8") + excerpt = " ".join([excerpt[i : i + 2] for i in xrange(0, len(excerpt), 2)]) + excerpt = ( + ", '%s%s'" % (excerpt, "..." if len(self) > 6 else "") + if len(self) > 0 + else "" + ) + return "" % ( + len(self), + "s" if len(self) != 1 else "", + excerpt, + ) class Binary(RqlTopLevelQuery): # Note: this term isn't actually serialized, it should exist only # in the client term_type = P_TERM.BINARY - statement = 'binary' + statement = "binary" def __init__(self, data): # We only allow 'bytes' objects to be serialized as binary @@ -1695,13 +1738,19 @@ def __init__(self, data): if isinstance(data, RqlQuery): RqlTopLevelQuery.__init__(self, data) elif isinstance(data, unicode): - raise ReqlDriverCompileError("Cannot convert a unicode string to binary, " - "use `unicode.encode()` to specify the " - "encoding.") + raise ReqlDriverCompileError( + "Cannot convert a unicode string to binary, " + "use `unicode.encode()` to specify the " + "encoding." + ) elif not isinstance(data, bytes): - raise ReqlDriverCompileError(("Cannot convert %s to binary, convert the " - "object to a `bytes` object first.") - % type(data).__name__) + raise ReqlDriverCompileError( + ( + "Cannot convert %s to binary, convert the " + "object to a `bytes` object first." + ) + % type(data).__name__ + ) else: self.base64_data = base64.b64encode(data) @@ -1711,171 +1760,170 @@ def __init__(self, data): def compose(self, args, optargs): if len(self._args) == 0: - return T('r.', self.statement, '(bytes())') + return T("r.", self.statement, "(bytes())") else: return RqlTopLevelQuery.compose(self, args, optargs) def build(self): if len(self._args) == 0: - return {'$reql_type$': 'BINARY', - 'data': self.base64_data.decode('utf-8')} + return {"$reql_type$": "BINARY", "data": self.base64_data.decode("utf-8")} else: return RqlTopLevelQuery.build(self) class Range(RqlTopLevelQuery): term_type = P_TERM.RANGE - statement = 'range' + statement = "range" class ToISO8601(RqlMethodQuery): term_type = P_TERM.TO_ISO8601 - statement = 'to_iso8601' + statement = "to_iso8601" class During(RqlMethodQuery): term_type = P_TERM.DURING - statement = 'during' + statement = "during" class Date(RqlMethodQuery): term_type = P_TERM.DATE - statement = 'date' + statement = "date" class TimeOfDay(RqlMethodQuery): term_type = P_TERM.TIME_OF_DAY - statement = 'time_of_day' + statement = "time_of_day" class Timezone(RqlMethodQuery): term_type = P_TERM.TIMEZONE - statement = 'timezone' + statement = "timezone" class Year(RqlMethodQuery): term_type = P_TERM.YEAR - statement = 'year' + statement = "year" class Month(RqlMethodQuery): term_type = P_TERM.MONTH - statement = 'month' + statement = "month" class Day(RqlMethodQuery): term_type = P_TERM.DAY - statement = 'day' + statement = "day" class DayOfWeek(RqlMethodQuery): term_type = P_TERM.DAY_OF_WEEK - statement = 'day_of_week' + statement = "day_of_week" class DayOfYear(RqlMethodQuery): term_type = P_TERM.DAY_OF_YEAR - statement = 'day_of_year' + statement = "day_of_year" class Hours(RqlMethodQuery): term_type = P_TERM.HOURS - statement = 'hours' + statement = "hours" class Minutes(RqlMethodQuery): term_type = P_TERM.MINUTES - statement = 'minutes' + statement = "minutes" class Seconds(RqlMethodQuery): term_type = P_TERM.SECONDS - statement = 'seconds' + statement = "seconds" class Time(RqlTopLevelQuery): term_type = P_TERM.TIME - statement = 'time' + statement = "time" class ISO8601(RqlTopLevelQuery): term_type = P_TERM.ISO8601 - statement = 'iso8601' + statement = "iso8601" class EpochTime(RqlTopLevelQuery): term_type = P_TERM.EPOCH_TIME - statement = 'epoch_time' + statement = "epoch_time" class Now(RqlTopLevelQuery): term_type = P_TERM.NOW - statement = 'now' + statement = "now" class InTimezone(RqlMethodQuery): term_type = P_TERM.IN_TIMEZONE - statement = 'in_timezone' + statement = "in_timezone" class ToEpochTime(RqlMethodQuery): term_type = P_TERM.TO_EPOCH_TIME - statement = 'to_epoch_time' + statement = "to_epoch_time" class GeoJson(RqlTopLevelQuery): term_type = P_TERM.GEOJSON - statement = 'geojson' + statement = "geojson" class ToGeoJson(RqlMethodQuery): term_type = P_TERM.TO_GEOJSON - statement = 'to_geojson' + statement = "to_geojson" class Point(RqlTopLevelQuery): term_type = P_TERM.POINT - statement = 'point' + statement = "point" class Line(RqlTopLevelQuery): term_type = P_TERM.LINE - statement = 'line' + statement = "line" class Polygon(RqlTopLevelQuery): term_type = P_TERM.POLYGON - statement = 'polygon' + statement = "polygon" class Distance(RqlMethodQuery): term_type = P_TERM.DISTANCE - statement = 'distance' + statement = "distance" class Intersects(RqlMethodQuery): term_type = P_TERM.INTERSECTS - statement = 'intersects' + statement = "intersects" class Includes(RqlMethodQuery): term_type = P_TERM.INCLUDES - statement = 'includes' + statement = "includes" class Circle(RqlTopLevelQuery): term_type = P_TERM.CIRCLE - statement = 'circle' + statement = "circle" class Fill(RqlMethodQuery): term_type = P_TERM.FILL - statement = 'fill' + statement = "fill" class PolygonSub(RqlMethodQuery): term_type = P_TERM.POLYGON_SUB - statement = 'polygon_sub' + statement = "polygon_sub" # Returns True if IMPLICIT_VAR is found in the subquery @@ -1924,21 +1972,27 @@ def __init__(self, lmbd): self._args.extend([MakeArray(*vrids), expr(lmbd(*vrs))]) def compose(self, args, optargs): - return T('lambda ', T(*[v.compose([v._args[0].compose(None, None)], - []) for v in self.vrs], - intsp=', '), ': ', args[1]) + return T( + "lambda ", + T( + *[v.compose([v._args[0].compose(None, None)], []) for v in self.vrs], + intsp=", " + ), + ": ", + args[1], + ) class Asc(RqlTopLevelQuery): term_type = P_TERM.ASC - statement = 'asc' + statement = "asc" class Desc(RqlTopLevelQuery): term_type = P_TERM.DESC - statement = 'desc' + statement = "desc" class Literal(RqlTopLevelQuery): term_type = P_TERM.LITERAL - statement = 'literal' + statement = "literal" diff --git a/rethinkdb/asyncio_net/net_asyncio.py b/rethinkdb/asyncio_net/net_asyncio.py index 3c3b2beb..781081e5 100644 --- a/rethinkdb/asyncio_net/net_asyncio.py +++ b/rethinkdb/asyncio_net/net_asyncio.py @@ -22,11 +22,17 @@ import struct from rethinkdb import ql2_pb2 -from rethinkdb.errors import ReqlAuthError, ReqlCursorEmpty, ReqlDriverError, ReqlTimeoutError, RqlCursorEmpty -from rethinkdb.net import Connection as ConnectionBase, Cursor, Query, Response, maybe_profile +from rethinkdb.errors import ( + ReqlAuthError, + ReqlCursorEmpty, + ReqlDriverError, + ReqlTimeoutError, + RqlCursorEmpty, +) +from rethinkdb.net import Connection as ConnectionBase +from rethinkdb.net import Cursor, Query, Response, maybe_profile - -__all__ = ['Connection'] +__all__ = ["Connection"] pResponse = ql2_pb2.Response.ResponseType @@ -39,8 +45,8 @@ def _read_until(streamreader, delimiter): buffer = bytearray() while True: - c = (yield from streamreader.read(1)) - if c == b'': + c = yield from streamreader.read(1) + if c == b"": break # EOF buffer.append(c[0]) if c == delimiter: @@ -148,9 +154,11 @@ def _get_next(self, timeout): return self.items.popleft() def _maybe_fetch_batch(self): - if self.error is None and \ - len(self.items) < self.threshold and \ - self.outstanding_requests == 0: + if ( + self.error is None + and len(self.items) < self.threshold + and self.outstanding_requests == 0 + ): self.outstanding_requests += 1 asyncio.ensure_future(self.conn._parent._continue(self)) @@ -172,11 +180,11 @@ def __init__(self, parent, io_loop=None): def client_port(self): if self.is_open(): - return self._streamwriter.get_extra_info('sockname')[1] + return self._streamwriter.get_extra_info("sockname")[1] def client_address(self): if self.is_open(): - return self._streamwriter.get_extra_info('sockname')[0] + return self._streamwriter.get_extra_info("sockname")[0] @asyncio.coroutine def connect(self, timeout): @@ -192,13 +200,22 @@ def connect(self, timeout): ssl_context.load_verify_locations(self._parent.ssl["ca_certs"]) self._streamreader, self._streamwriter = yield from asyncio.open_connection( - self._parent.host, self._parent.port, loop=self._io_loop, ssl=ssl_context + self._parent.host, + self._parent.port, + loop=self._io_loop, + ssl=ssl_context, + ) + self._streamwriter.get_extra_info("socket").setsockopt( + socket.IPPROTO_TCP, socket.TCP_NODELAY, 1 + ) + self._streamwriter.get_extra_info("socket").setsockopt( + socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1 ) - self._streamwriter.get_extra_info('socket').setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) - self._streamwriter.get_extra_info('socket').setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) except Exception as err: - raise ReqlDriverError('Could not connect to %s:%s. Error: %s' % - (self._parent.host, self._parent.port, str(err))) + raise ReqlDriverError( + "Could not connect to %s:%s. Error: %s" + % (self._parent.host, self._parent.port, str(err)) + ) try: self._parent.handshake.reset() @@ -214,8 +231,9 @@ def connect(self, timeout): self._streamwriter.write(request) response = yield from asyncio.wait_for( - _read_until(self._streamreader, b'\0'), - timeout, loop=self._io_loop, + _read_until(self._streamreader, b"\0"), + timeout, + loop=self._io_loop, ) response = response[:-1] except ReqlAuthError: @@ -224,14 +242,15 @@ def connect(self, timeout): except ReqlTimeoutError as err: yield from self.close() raise ReqlDriverError( - 'Connection interrupted during handshake with %s:%s. Error: %s' % ( - self._parent.host, self._parent.port, str(err) - ) + "Connection interrupted during handshake with %s:%s. Error: %s" + % (self._parent.host, self._parent.port, str(err)) ) except Exception as err: yield from self.close() - raise ReqlDriverError('Could not connect to %s:%s. Error: %s' % - (self._parent.host, self._parent.port, str(err))) + raise ReqlDriverError( + "Could not connect to %s:%s. Error: %s" + % (self._parent.host, self._parent.port, str(err)) + ) # Start a parallel function to perform reads # store a reference to it so it doesn't get destroyed @@ -302,12 +321,13 @@ def _reader(self): # Do not pop the query from the dict until later, so # we don't lose track of it in case of an exception query, future = self._user_queries[token] - res = Response(token, buf, - self._parent._get_json_decoder(query)) + res = Response(token, buf, self._parent._get_json_decoder(query)) if res.type == pResponse.SUCCESS_ATOM: future.set_result(maybe_profile(res.data[0], res)) - elif res.type in (pResponse.SUCCESS_SEQUENCE, - pResponse.SUCCESS_PARTIAL): + elif res.type in ( + pResponse.SUCCESS_SEQUENCE, + pResponse.SUCCESS_PARTIAL, + ): cursor = AsyncioCursor(self, query, res) future.set_result(maybe_profile(cursor, res)) elif res.type == pResponse.WAIT_COMPLETE: @@ -330,7 +350,9 @@ def __init__(self, *args, **kwargs): try: self.port = int(self.port) except ValueError: - raise ReqlDriverError("Could not convert port %s to an integer." % self.port) + raise ReqlDriverError( + "Could not convert port %s to an integer." % self.port + ) @asyncio.coroutine def __aenter__(self): diff --git a/rethinkdb/backports/__init__.py b/rethinkdb/backports/__init__.py index 612d3283..1b7ab3f5 100644 --- a/rethinkdb/backports/__init__.py +++ b/rethinkdb/backports/__init__.py @@ -1,3 +1,4 @@ # This is a Python "namespace package" http://www.python.org/dev/peps/pep-0382/ from pkgutil import extend_path + __path__ = extend_path(__path__, __name__) diff --git a/rethinkdb/backports/ssl_match_hostname/__init__.py b/rethinkdb/backports/ssl_match_hostname/__init__.py index 45f17811..1959a224 100644 --- a/rethinkdb/backports/ssl_match_hostname/__init__.py +++ b/rethinkdb/backports/ssl_match_hostname/__init__.py @@ -20,7 +20,7 @@ import re -__version__ = '3.4.0.2' +__version__ = "3.4.0.2" class CertificateError(ValueError): @@ -38,18 +38,19 @@ def _dnsname_match(domain_name, hostname, max_wildcards=1): # Ported from python3-syntax: # leftmost, *remainder = domain_name.split(r'.') - parts = domain_name.split(r'.') + parts = domain_name.split(r".") leftmost = parts[0] remainder = parts[1:] - wildcards = leftmost.count('*') + wildcards = leftmost.count("*") if wildcards > max_wildcards: # Issue #17980: avoid denials of service by refusing more # than one wildcard per fragment. A survey of established # policy among SSL implementations showed it to be a # reasonable choice. raise CertificateError( - "too many wildcards in certificate DNS name: " + repr(domain_name)) + "too many wildcards in certificate DNS name: " + repr(domain_name) + ) # speed up common case w/o wildcards if not wildcards: @@ -58,11 +59,11 @@ def _dnsname_match(domain_name, hostname, max_wildcards=1): # RFC 6125, section 6.4.3, subitem 1. # The client SHOULD NOT attempt to match a presented identifier in which # the wildcard character comprises a label other than the left-most label. - if leftmost == '*': + if leftmost == "*": # When '*' is a fragment by itself, it matches a non-empty dotless # fragment. - pats.append('[^.]+') - elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + pats.append("[^.]+") + elif leftmost.startswith("xn--") or hostname.startswith("xn--"): # RFC 6125, section 6.4.3, subitem 3. # The client SHOULD NOT attempt to match a presented identifier # where the wildcard character is embedded within an A-label or @@ -70,13 +71,13 @@ def _dnsname_match(domain_name, hostname, max_wildcards=1): pats.append(re.escape(leftmost)) else: # Otherwise, '*' matches any dotless string, e.g. www* - pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + pats.append(re.escape(leftmost).replace(r"\*", "[^.]*")) # add the remaining fragments, ignore any wildcards for frag in remainder: pats.append(re.escape(frag)) - pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + pat = re.compile(r"\A" + r"\.".join(pats) + r"\Z", re.IGNORECASE) return pat.match(hostname) @@ -93,9 +94,9 @@ def match_hostname(cert, hostname): raise ValueError("empty or no certificate") dnsnames = [] - san = cert.get('subjectAltName', ()) + san = cert.get("subjectAltName", ()) for key, value in san: - if key == 'DNS': + if key == "DNS": if _dnsname_match(value, hostname): return dnsnames.append(value) @@ -103,18 +104,23 @@ def match_hostname(cert, hostname): if not dnsnames: # The subject is only checked when there is no dNSName entry # in subjectAltName - for sub in cert.get('subject', ()): + for sub in cert.get("subject", ()): for key, value in sub: # XXX according to RFC 2818, the most specific Common Name # must be used. - if key == 'commonName': + if key == "commonName": if _dnsname_match(value, hostname): return dnsnames.append(value) if len(dnsnames) > 1: - raise CertificateError("hostname %r doesn't match either of %s" % (hostname, ', '.join(map(repr, dnsnames)))) + raise CertificateError( + "hostname %r doesn't match either of %s" + % (hostname, ", ".join(map(repr, dnsnames))) + ) elif len(dnsnames) == 1: raise CertificateError("hostname %r doesn't match %r" % (hostname, dnsnames[0])) else: - raise CertificateError("no appropriate commonName or subjectAltName fields were found") + raise CertificateError( + "no appropriate commonName or subjectAltName fields were found" + ) diff --git a/rethinkdb/docs.py b/rethinkdb/docs.py index c606b5b6..cd39de2e 100644 --- a/rethinkdb/docs.py +++ b/rethinkdb/docs.py @@ -5,190 +5,723 @@ import rethinkdb docsSource = [ - - (rethinkdb.net.Connection.close, b'conn.close(noreply_wait=True)\n\nClose an open connection.\n\nClosing a connection normally waits until all outstanding requests have finished and then frees any open resources associated with the connection. By passing `False` to the `noreply_wait` optional argument, the connection will be closed immediately, possibly aborting any outstanding noreply writes.\n\nA noreply query is executed by passing the `noreply` option to the [run](http://rethinkdb.com/api/python/run/) command, indicating that `run()` should not wait for the query to complete before returning. You may also explicitly wait for a noreply query to complete by using the [noreply_wait](http://rethinkdb.com/api/python/noreply_wait) command.\n\n*Example* Close an open connection, waiting for noreply writes to finish.\n\n conn.close()\n\n*Example* Close an open connection immediately.\n\n conn.close(noreply_wait=False)\n'), - (rethinkdb.connect, b'r.connect(host="localhost", port=28015, db="test", auth_key="", timeout=20) -> connection\nr.connect(host) -> connection\n\nCreate a new connection to the database server. The keyword arguments are:\n\n- `host`: host of the RethinkDB instance. The default value is `localhost`.\n- `port`: the driver port, by default `28015`.\n- `db`: the database used if not explicitly specified in a query, by default `test`.\n- `user`: the user account to connect as (default `admin`).\n- `password`: the password for the user account to connect as (default `\'\'`, empty).\n- `timeout`: timeout period in seconds for the connection to be opened (default `20`).\n- `ssl`: a hash of options to support SSL connections (default `None`). Currently, there is only one option available, and if the `ssl` option is specified, this key is required:\n - `ca_certs`: a path to the SSL CA certificate.\n\nIf the connection cannot be established, a `ReqlDriverError` exception will be thrown.\n\n\n\nThe RethinkDB Python driver includes support for asynchronous connections using Tornado and Twisted. Read the asynchronous connections documentation for more information.\n\n*Example* Open a connection using the default host and port, specifying the default database.\n\n conn = r.connect(db=\'marvel\')\n\n*Example* Open a new connection to the database.\n\n conn = r.connect(host=\'localhost\',\n port=28015,\n db=\'heroes\')\n\n*Example* Open a new connection to the database, specifying a user/password combination for authentication.\n\n conn = r.connect(host=\'localhost\',\n port=28015,\n db=\'heroes\',\n user=\'herofinder\',\n password=\'metropolis\')\n\n*Example* Open a new connection to the database using an SSL proxy.\n\n conn = r.connect(host=\'localhost\',\n port=28015,\n auth_key=\'hunter2\',\n ssl={\'ca_certs\': \'/path/to/ca.crt\'})\n\n*Example* Use a `with` statement to open a connection and pass it to a block. Using this style, the connection will be automatically closed when execution reaches the end of the block.\n\n with r.connect(db=\'marvel\') as conn:\n r.table(\'superheroes\').run(conn)\n'), - (rethinkdb.net.Connection.noreply_wait, b'conn.noreply_wait()\n\n`noreply_wait` ensures that previous queries with the `noreply` flag have been processed\nby the server. Note that this guarantee only applies to queries run on the given connection.\n\n*Example* We have previously run queries with the `noreply` argument set to `True`. Now\nwait until the server has processed them.\n\n conn.noreply_wait()\n\n'), - (rethinkdb, b'r -> r\n\nThe top-level ReQL namespace.\n\n*Example* Setup your top-level namespace.\n\n import rethinkdb as r\n\n'), - (rethinkdb.net.Connection.reconnect, b'conn.reconnect(noreply_wait=True)\n\nClose and reopen a connection.\n\nClosing a connection normally waits until all outstanding requests have finished and then frees any open resources associated with the connection. By passing `False` to the `noreply_wait` optional argument, the connection will be closed immediately, possibly aborting any outstanding noreply writes.\n\nA noreply query is executed by passing the `noreply` option to the [run](http://rethinkdb.com/api/python/run/) command, indicating that `run()` should not wait for the query to complete before returning. You may also explicitly wait for a noreply query to complete by using the [noreply_wait](http://rethinkdb.com/api/python/noreply_wait) command.\n\n*Example* Cancel outstanding requests/queries that are no longer needed.\n\n conn.reconnect(noreply_wait=False)\n'), - (rethinkdb.net.Connection.repl, b"conn.repl()\n\nSet the default connection to make REPL use easier. Allows calling\n`.run()` on queries without specifying a connection.\n\n__Note:__ Avoid using `repl` in application code. RethinkDB connection objects are not thread-safe, and calls to `connect` from multiple threads may change the global connection object used by `repl`. Applications should specify connections explicitly.\n\n*Example* Set the default connection for the REPL, then call\n`run()` without specifying the connection.\n\n r.connect(db='marvel').repl()\n r.table('heroes').run()\n"), - (rethinkdb.ast.RqlQuery.run, b'query.run(conn[, options]) -> cursor\nquery.run(conn[, options]) -> object\n\nRun a query on a connection, returning either a single JSON result or\na cursor, depending on the query.\n\nThe optional arguments are:\n\n- `read_mode`: One of three possible values affecting the consistency guarantee for the query (default: `\'single\'`).\n - `\'single\'` (the default) returns values that are in memory (but not necessarily written to disk) on the primary replica.\n - `\'majority\'` will only return values that are safely committed on disk on a majority of replicas. This requires sending a message to every replica on each read, so it is the slowest but most consistent.\n - `\'outdated\'` will return values that are in memory on an arbitrarily-selected replica. This is the fastest but least consistent.\n- `time_format`: what format to return times in (default: `\'native\'`).\n Set this to `\'raw\'` if you want times returned as JSON objects for exporting.\n- `profile`: whether or not to return a profile of the query\'s\n execution (default: `False`).\n- `durability`: possible values are `\'hard\'` and `\'soft\'`. In soft durability mode RethinkDB\nwill acknowledge the write immediately after receiving it, but before the write has\nbeen committed to disk.\n- `group_format`: what format to return `grouped_data` and `grouped_streams` in (default: `\'native\'`).\n Set this to `\'raw\'` if you want the raw pseudotype.\n- `noreply`: set to `True` to not receive the result object or cursor and return immediately.\n- `db`: the database to run this query against as a string. The default is the database specified in the `db` parameter to [connect](http://rethinkdb.com/api/python/connect/) (which defaults to `test`). The database may also be specified with the [db](http://rethinkdb.com/api/python/db/) command.\n- `array_limit`: the maximum numbers of array elements that can be returned by a query (default: 100,000). This affects all ReQL commands that return arrays. Note that it has no effect on the size of arrays being _written_ to the database; those always have an upper limit of 100,000 elements.\n- `binary_format`: what format to return binary data in (default: `\'native\'`). Set this to `\'raw\'` if you want the raw pseudotype.\n- `min_batch_rows`: minimum number of rows to wait for before batching a result set (default: 8). This is an integer.\n- `max_batch_rows`: maximum number of rows to wait for before batching a result set (default: unlimited). This is an integer.\n- `max_batch_bytes`: maximum number of bytes to wait for before batching a result set (default: 1MB). This is an integer.\n- `max_batch_seconds`: maximum number of seconds to wait before batching a result set (default: 0.5). This is a float (not an integer) and may be specified to the microsecond.\n- `first_batch_scaledown_factor`: factor to scale the other parameters down by on the first batch (default: 4). For example, with this set to 8 and `max_batch_rows` set to 80, on the first batch `max_batch_rows` will be adjusted to 10 (80 / 8). This allows the first batch to return faster.\n\n*Example* Run a query on the connection `conn` and print out every\nrow in the result.\n\n for doc in r.table(\'marvel\').run(conn):\n print doc\n\n*Example* If you are OK with potentially out of date data from all\nthe tables involved in this query and want potentially faster reads,\npass a flag allowing out of date data in an options object. Settings\nfor individual tables will supercede this global setting for all\ntables in the query.\n\n r.table(\'marvel\').run(conn, read_mode=\'outdated\')\n\n*Example* If you just want to send a write and forget about it, you\ncan set `noreply` to true in the options. In this case `run` will\nreturn immediately.\n\n r.table(\'marvel\').run(conn, noreply=True)\n\n*Example* If you want to specify whether to wait for a write to be\nwritten to disk (overriding the table\'s default settings), you can set\n`durability` to `\'hard\'` or `\'soft\'` in the options.\n\n r.table(\'marvel\')\n .insert({ \'superhero\': \'Iron Man\', \'superpower\': \'Arc Reactor\' })\n .run(conn, noreply=True, durability=\'soft\')\n\n*Example* If you do not want a time object to be converted to a\nnative date object, you can pass a `time_format` flag to prevent it\n(valid flags are "raw" and "native"). This query returns an object\nwith two fields (`epoch_time` and `$reql_type$`) instead of a native date\nobject.\n\n r.now().run(conn, time_format="raw")\n\n*Example* Specify the database to use for the query.\n\n for doc in r.table(\'marvel\').run(conn, db=\'heroes\'):\n print doc\n\nThis is equivalent to using the `db` command to specify the database:\n\n r.db(\'heroes\').table(\'marvel\').run(conn) ...\n\n*Example* Change the batching parameters for this query.\n\n r.table(\'marvel\').run(conn, max_batch_rows=16, max_batch_bytes=2048)\n'), - (rethinkdb.net.Connection.server, b'conn.server()\n\nReturn information about the server being used by a connection.\n\nThe `server` command returns either two or three fields:\n\n* `id`: the UUID of the server the client is connected to.\n* `proxy`: a boolean indicating whether the server is a RethinkDB proxy node.\n* `name`: the server name. If `proxy` is `True`, this field will not be returned.\n\n*Example* Return server information.\n\n > conn.server()\n \n {\n "id": "404bef53-4b2c-433f-9184-bc3f7bda4a15",\n "name": "amadeus",\n "proxy": False\n }\n'), - (rethinkdb.set_loop_type, b'r.set_loop_type(string)\n\nSet an asynchronous event loop model. There are two supported models:\n\n* `"tornado"`: use the Tornado web framework. Under this model, the connect and run commands will return Tornado `Future` objects.\n* `"twisted"`: use the Twisted networking engine. Under this model, the connect and run commands will return Twisted `Deferred` objects.\n\n*Example* Read a table\'s data using Tornado.\n\n r.set_loop_type("tornado")\n conn = r.connect(host=\'localhost\', port=28015)\n \n @gen.coroutine\n def use_cursor(conn):\n # Print every row in the table.\n cursor = yield r.table(\'test\').order_by(index="id").run(yield conn)\n while (yield cursor.fetch_next()):\n item = yield cursor.next()\n print(item)\n\nFor a longer discussion with both Tornado and Twisted examples, see the documentation article on Asynchronous connections.\n\n'), - (rethinkdb.net.Connection.use, b"conn.use(db_name)\n\nChange the default database on this connection.\n\n*Example* Change the default database so that we don't need to\nspecify the database when referencing a table.\n\n conn.use('marvel')\n r.table('heroes').run(conn) # refers to r.db('marvel').table('heroes')\n"), - (rethinkdb.ast.Table.config, b'table.config() -> selection<object>\ndatabase.config() -> selection<object>\n\nQuery (read and/or update) the configurations for individual tables or databases.\n\nThe `config` command is a shorthand way to access the `table_config` or `db_config` [System tables](http://rethinkdb.com/docs/system-tables/#configuration-tables). It will return the single row from the system that corresponds to the database or table configuration, as if [get](http://rethinkdb.com/api/python/get) had been called on the system table with the UUID of the database or table in question.\n\n*Example* Get the configuration for the `users` table.\n\n r.table(\'users\').config().run(conn)\n\n\n\nExample return:\n\n \n {\n "id": "31c92680-f70c-4a4b-a49e-b238eb12c023",\n "name": "users",\n "db": "superstuff",\n "primary_key": "id",\n "shards": [\n {\n "primary_replica": "a",\n "replicas": ["a", "b"],\n "nonvoting_replicas": []\n },\n {\n "primary_replica": "d",\n "replicas": ["c", "d"],\n "nonvoting_replicas": []\n }\n ],\n "indexes": [],\n "write_acks": "majority",\n "durability": "hard"\n }\n\n*Example* Change the write acknowledgement requirement of the `users` table.\n\n r.table(\'users\').config().update({\'write_acks\': \'single\'}).run(conn)\n'), - (rethinkdb.grant, b'r.grant("username", {"permission": bool[, ...]}) -> object\ndb.grant("username", {"permission": bool[, ...]}) -> object\ntable.grant("username", {"permission": bool[, ...]}) -> object\n\nGrant or deny access permissions for a user account, globally or on a per-database or per-table basis.\n\nThere are four different permissions that can be granted to an account:\n\n* `read` allows reading the data in tables.\n* `write` allows modifying data, including inserting, replacing/updating, and deleting.\n* `connect` allows a user to open HTTP connections via the http command. This permission can only be granted in global scope.\n* `config` allows users to create/drop secondary indexes on a table and changing the cluster configuration; to create and drop tables, if granted on a database; and to create and drop databases, if granted globally.\n\nPermissions may be granted on a global scope, or granted for a specific table or database. The scope is defined by calling `grant` on its own (e.g., `r.grant()`, on a table (`r.table().grant()`), or on a database (`r.db().grant()`).\n\nThe `grant` command returns an object of the following form:\n\n {\n "granted": 1,\n "permissions_changes": [\n {\n "new_val": { new permissions },\n "old_val": { original permissions }\n }\n ]\n\nThe `granted` field will always be `1`, and the `permissions_changes` list will have one object, describing the new permissions values and the old values they were changed from (which may be `None`).\n\nPermissions that are not defined on a local scope will be inherited from the next largest scope. For example, a write operation on a table will first check if `write` permissions are explicitly set to `True` or `False` for that table and account combination; if they are not, the `write` permissions for the database will be used if those are explicitly set; and if neither table nor database permissions are set for that account, the global `write` permissions for that account will be used.\n\n__Note:__ For all accounts other than the special, system-defined `admin` account, permissions that are not explicitly set in any scope will effectively be `False`. When you create a new user account by inserting a record into the system table, that account will have _no_ permissions until they are explicitly granted.\n\nFor a full description of permissions, read Permissions and user accounts.\n\n*Example* Grant the `chatapp` user account read and write permissions on the `users` database.\n\n > r.db(\'users\').grant(\'chatapp\', {\'read\': True, \'write\': True}).run(conn)\n \n {\n "granted": 1,\n "permissions_changes": [\n {\n "new_val": { "read": true, "write": true },\n "old_val": { null }\n }\n ]\n\n*Example* Deny write permissions from the `chatapp` account for the `admin` table.\n\n r.db(\'users\').table(\'admin\').grant(\'chatapp\', {\'write\': False}).run(conn)\n\nThis will override the `write: true` permissions granted in the first example, but for this table only. Other tables in the `users` database will inherit from the database permissions.\n\n*Example* Delete a table-level permission for the `chatapp` account.\n\n r.db(\'users\').table(\'admin\').grant(\'chatapp\', {\'write\': None}).run(conn)\n\nBy specifying `None`, the table scope `write` permission is removed, and will again inherit from the next highest scope (database or global).\n\n*Example* Grant `chatapp` the ability to use HTTP connections.\n\n r.grant(\'chatapp\', {\'connect\': True}).run(conn)\n\nThis grant can only be given on a global level.\n\n*Example* Grant a `monitor` account read-only access to all databases.\n\n r.grant(\'monitor\', {\'read\': True}).run(conn)\n'), - (rethinkdb.ast.Table.rebalance, b'table.rebalance() -> object\ndatabase.rebalance() -> object\n\nRebalances the shards of a table. When called on a database, all the tables in that database will be rebalanced.\n\nThe `rebalance` command operates by measuring the distribution of primary keys within a table and picking split points that will give each shard approximately the same number of documents. It won\'t change the number of shards within a table, or change any other configuration aspect for the table or the database.\n\nA table will lose availability temporarily after `rebalance` is called; use the [wait](http://rethinkdb.com/api/python/wait) command to wait for the table to become available again, or [status](http://rethinkdb.com/api/python/status) to check if the table is available for writing.\n\nRethinkDB automatically rebalances tables when the number of shards are increased, and as long as your documents have evenly distributed primary keys—such as the default UUIDs—it is rarely necessary to call `rebalance` manually. Cases where `rebalance` may need to be called include:\n\n* Tables with unevenly distributed primary keys, such as incrementing integers\n* Changing a table\'s primary key type\n* Increasing the number of shards on an empty table, then using non-UUID primary keys in that table\n\nThe [web UI](http://rethinkdb.com/docs/administration-tools/) (and the [info](http://rethinkdb.com/api/python/info) command) can be used to tell you when a table\'s shards need to be rebalanced.\n\nThe return value of `rebalance` is an object with two fields:\n\n* `rebalanced`: the number of tables rebalanced.\n* `status_changes`: a list of new and old table status values. Each element of the list will be an object with two fields:\n * `old_val`: The table\'s [status](http://rethinkdb.com/api/python/status) value before `rebalance` was executed. \n * `new_val`: The table\'s `status` value after `rebalance` was executed. (This value will almost always indicate the table is unavailable.)\n\nSee the [status](http://rethinkdb.com/api/python/status) command for an explanation of the objects returned in the `old_val` and `new_val` fields.\n\n*Example* Rebalance a table.\n\n r.table(\'superheroes\').rebalance().run(conn)\n\n\n\nExample return:\n\n {\n "rebalanced": 1,\n "status_changes": [\n {\n "old_val": {\n "db": "database",\n "id": "5cb35225-81b2-4cec-9eef-bfad15481265",\n "name": "superheroes",\n "shards": [\n {\n "primary_replica": "jeeves",\n "replicas": [\n {\n "server": "jeeves",\n "state": "ready"\n }\n ]\n },\n {\n "primary_replica": "jeeves",\n "replicas": [\n {\n "server": "jeeves",\n "state": "ready"\n }\n ]\n }\n ],\n "status": {\n "all_replicas_ready": True,\n "ready_for_outdated_reads": True,\n "ready_for_reads": True,\n "ready_for_writes": True\n }\n },\n "new_val": {\n "db": "database",\n "id": "5cb35225-81b2-4cec-9eef-bfad15481265",\n "name": "superheroes",\n "shards": [\n {\n "primary_replica": "jeeves",\n "replicas": [\n {\n "server": "jeeves",\n "state": "transitioning"\n }\n ]\n },\n {\n "primary_replica": "jeeves",\n "replicas": [\n {\n "server": "jeeves",\n "state": "transitioning"\n }\n ]\n }\n ],\n "status": {\n "all_replicas_ready": False,\n "ready_for_outdated_reads": False,\n "ready_for_reads": False,\n "ready_for_writes": False\n }\n }\n \n }\n ]\n }\n'), - (rethinkdb.ast.Table.reconfigure, b'table.reconfigure(shards=, replicas=[, primary_replica_tag=, dry_run=False, nonvoting_replica_tags=None]) -> object\ndatabase.reconfigure(shards=, replicas=[, primary_replica_tag=, dry_run=False, nonvoting_replica_tags=None]) -> object\ntable.reconfigure(emergency_repair=