Skip to content

Commit 289ddb9

Browse files
committed
remerge of a part of the windows restore
over-the-shoulder by @danielmewes
1 parent 984e935 commit 289ddb9

File tree

2 files changed

+169
-156
lines changed

2 files changed

+169
-156
lines changed

rethinkdb/_import.py

Lines changed: 71 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,8 @@ def read_to_queue(self, work_queue, exit_event, error_queue, warning_queue, timi
317317
break
318318
except Full:
319319
pass
320+
else:
321+
break
320322
timing_queue.put(('reader_wait', time.time() - timePoint))
321323
timePoint = time.time()
322324

@@ -810,7 +812,7 @@ def abort_import(pools, exit_event, interrupt_event):
810812
worker.terminate()
811813
worker.join(.1)
812814
else:
813-
print("\nTerminate signal seen, aborting")
815+
print("\nTerminate signal seen, aborting gracefully")
814816
interrupt_event.set()
815817
exit_event.set()
816818

@@ -873,6 +875,9 @@ def import_tables(options, sources):
873875
interrupt_event = multiprocessing.Event()
874876

875877
timing_queue = SimpleQueue()
878+
879+
errors = []
880+
warnings = []
876881
timingSums = {}
877882

878883
pools = []
@@ -882,6 +887,24 @@ def import_tables(options, sources):
882887
# - setup KeyboardInterupt handler
883888
signal.signal(signal.SIGINT, lambda a, b: abort_import(pools, exit_event, interrupt_event))
884889

890+
# - queue draining
891+
def drainQueues():
892+
# error_queue
893+
while not error_queue.empty():
894+
errors.append(error_queue.get())
895+
896+
# warning_queue
897+
while not warning_queue.empty():
898+
warnings.append(warning_queue.get())
899+
900+
# timing_queue
901+
while not timing_queue.empty():
902+
key, value = timing_queue.get()
903+
if not key in timingSums:
904+
timingSums[key] = value
905+
else:
906+
timingSums[key] += value
907+
885908
# - setup dbs and tables
886909

887910
# create missing dbs
@@ -942,7 +965,6 @@ def import_tables(options, sources):
942965
# - read the tables options.clients at a time
943966
readers = []
944967
pools.append(readers)
945-
filesLeft = len(sources)
946968
fileIter = iter(sources)
947969
try:
948970
while not exit_event.is_set():
@@ -959,40 +981,36 @@ def import_tables(options, sources):
959981
)
960982
readers.append(reader)
961983
reader.start()
962-
filesLeft -= 1
963984

964-
# drain the timing queue
965-
while not timing_queue.empty():
966-
key, value = timing_queue.get()
967-
if not key in timingSums:
968-
timingSums[key] = value
969-
else:
970-
timingSums[key] += value
985+
# drain the queues
986+
drainQueues()
971987

972988
# reap completed tasks
973989
for reader in readers[:]:
974990
if not reader.is_alive():
975991
readers.remove(reader)
976-
if filesLeft and len(readers) == options.clients:
992+
if len(readers) == options.clients:
977993
time.sleep(.05)
978994
except StopIteration:
979995
pass # ran out of new tables
980996

981997
# - wait for the last batch of readers to complete
982998
while readers:
983-
# drain the timing queue
984-
while not timing_queue.empty():
985-
key, value = timing_queue.get()
986-
if not key in timingSums:
987-
timingSums[key] = value
988-
else:
989-
timingSums[key] += value
999+
# drain the queues
1000+
drainQueues()
1001+
1002+
# drain the work queue to prevent readers from stalling on exit
1003+
if exit_event.is_set():
1004+
try:
1005+
while True:
1006+
work_queue.get(timeout=0.1)
1007+
except Empty: pass
9901008

9911009
# watch the readers
9921010
for reader in readers[:]:
993-
if exit_event.is_set():
994-
reader.terminate() # kill it abruptly
995-
reader.join(.1)
1011+
try:
1012+
reader.join(.1)
1013+
except Exception: pass
9961014
if not reader.is_alive():
9971015
readers.remove(reader)
9981016

@@ -1018,7 +1036,7 @@ def import_tables(options, sources):
10181036
try:
10191037
writer.terminate()
10201038
except Exception: pass
1021-
1039+
10221040
# - stop the progress bar
10231041
if progressBar:
10241042
done_event.set()
@@ -1028,10 +1046,8 @@ def import_tables(options, sources):
10281046
if progressBar.is_alive():
10291047
progressBar.terminate()
10301048

1031-
# - drain the error_queue
1032-
errors = []
1033-
while not error_queue.empty():
1034-
errors.append(error_queue.get())
1049+
# - drain queues
1050+
drainQueues()
10351051

10361052
# - final reporting
10371053
if not options.quiet:
@@ -1054,33 +1070,35 @@ def import_tables(options, sources):
10541070
finally:
10551071
signal.signal(signal.SIGINT, signal.SIG_DFL)
10561072

1073+
drainQueues()
1074+
1075+
for error in errors:
1076+
print("%s" % error.message, file=sys.stderr)
1077+
if options.debug and error.traceback:
1078+
print(" Traceback:\n%s" % error.traceback, file=sys.stderr)
1079+
if len(error.file) == 4:
1080+
print(" In file: %s" % error.file, file=sys.stderr)
1081+
1082+
for warning in warnings:
1083+
print("%s" % warning[1], file=sys.stderr)
1084+
if options.debug:
1085+
print("%s traceback: %s" % (warning[0].__name__, warning[2]), file=sys.stderr)
1086+
if len(warning) == 4:
1087+
print("In file: %s" % warning[3], file=sys.stderr)
1088+
10571089
if interrupt_event.is_set():
10581090
raise RuntimeError("Interrupted")
1059-
1060-
if len(errors) != 0:
1061-
for error in errors:
1062-
print("%s" % error.message, file=sys.stderr)
1063-
if options.debug and error.traceback:
1064-
print(" Traceback:\n%s" % error.traceback, file=sys.stderr)
1065-
if len(error.file) == 4:
1066-
print(" In file: %s" % error.file, file=sys.stderr)
1091+
if errors:
10671092
raise RuntimeError("Errors occurred during import")
1068-
1069-
if not warning_queue.empty():
1070-
while not warning_queue.empty():
1071-
warning = warning_queue.get()
1072-
print("%s" % warning[1], file=sys.stderr)
1073-
if options.debug:
1074-
print("%s traceback: %s" % (warning[0].__name__, warning[2]), file=sys.stderr)
1075-
if len(warning) == 4:
1076-
print("In file: %s" % warning[3], file=sys.stderr)
1093+
if warnings:
10771094
raise RuntimeError("Warnings occurred during import")
10781095

1079-
def import_directory(options):
1096+
def import_directory(options, files_ignored=None):
10801097
# Scan for all files, make sure no duplicated tables with different formats
10811098
dbs = False
10821099
sources = {} # (db, table) => {file:, format:, db:, table:, info:}
1083-
files_ignored = []
1100+
if files_ignored is None:
1101+
files_ignored = []
10841102
for root, dirs, files in os.walk(options.directory):
10851103
if not dbs:
10861104
files_ignored.extend([os.path.join(root, f) for f in files])
@@ -1104,7 +1122,7 @@ def import_directory(options):
11041122
table, ext = os.path.splitext(filename)
11051123
table = os.path.basename(table)
11061124

1107-
if ext not in [".json", ".csv", ".info"]:
1125+
if ext not in (".json", ".csv", ".info"):
11081126
files_ignored.append(os.path.join(root, filename))
11091127
elif ext == ".info":
11101128
pass # Info files are included based on the data files
@@ -1137,7 +1155,13 @@ def import_directory(options):
11371155
except OSError:
11381156
files_ignored.append(os.path.join(root, f))
11391157

1140-
tableType = JsonSourceFile if ext == ".json" else CsvSourceFile
1158+
tableType = None
1159+
if ext == ".json":
1160+
tableType = JsonSourceFile
1161+
elif ext == ".csv":
1162+
tableType = CsvSourceFile
1163+
else:
1164+
raise Exception("The table type is not recognised: %s" % ext)
11411165
sources[(db, table)] = tableType(
11421166
source=path,
11431167
db=db, table=table,

0 commit comments

Comments
 (0)