Skip to content

Commit 974dbcc

Browse files
author
Sergei Voronezhskii
committed
List task for hung workers
If HungListener found some hung tests it will print information about: - worker id - test name - test params - last 15 lines from .result file Closes #107
1 parent 822eed3 commit 974dbcc

File tree

3 files changed

+71
-12
lines changed

3 files changed

+71
-12
lines changed

lib/test.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pytap13
1010
import pprint
1111
import shutil
12+
from functools import partial
1213

1314
try:
1415
from cStringIO import StringIO
@@ -84,28 +85,35 @@ def flush(self):
8485
self.stream.flush()
8586

8687

88+
def get_filename_by_test(postfix, test_name):
89+
rg = re.compile('\.test.*')
90+
return os.path.basename(rg.sub(postfix, test_name))
91+
92+
93+
get_reject = partial(get_filename_by_test, '.reject')
94+
get_result = partial(get_filename_by_test, '.result')
95+
get_skipcond = partial(get_filename_by_test, '.skipcond')
96+
97+
8798
class Test:
8899
"""An individual test file. A test object can run itself
89100
and remembers completion state of the run.
90101
91102
If file <test_name>.skipcond is exists it will be executed before
92103
test and if it sets self.skip to True value the test will be skipped.
93104
"""
94-
rg = re.compile('\.test.*')
95105

96106
def __init__(self, name, args, suite_ini, params={}, conf_name=None):
97107
"""Initialize test properties: path to test file, path to
98108
temporary result file, path to the client program, test status."""
99109
self.name = name
100110
self.args = args
101111
self.suite_ini = suite_ini
102-
self.result = os.path.join(suite_ini['suite'],
103-
os.path.basename(self.rg.sub('.result', name)))
104-
self.skip_cond = os.path.join(suite_ini['suite'],
105-
os.path.basename(self.rg.sub('.skipcond', name)))
112+
self.result = os.path.join(suite_ini['suite'], get_result(name))
113+
self.skip_cond = os.path.join(suite_ini['suite'], get_skipcond(name))
106114
self.tmp_result = os.path.join(self.suite_ini['vardir'],
107115
os.path.basename(self.result))
108-
self.reject = self.rg.sub('.reject', name)
116+
self.reject = get_reject(name)
109117
self.is_executed = False
110118
self.is_executed_ok = None
111119
self.is_equal_result = None

lib/worker.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import lib
1010
from lib.utils import safe_makedirs
1111
from lib.test_suite import TestSuite
12-
12+
from lib.test import get_result
1313
from lib.colorer import color_stdout, color_log
1414
from lib.tarantool_server import TarantoolServer
1515

@@ -157,6 +157,19 @@ def __init__(self, worker_id, worker_name):
157157
super(WorkerDone, self).__init__(worker_id, worker_name)
158158

159159

160+
class WorkerCurrentTask(BaseWorkerMessage):
161+
""" Provide information about current task running on worker.
162+
It possible to check the `.result` file of hung tests.
163+
And collect information about current tasks in parallel mode,
164+
to show which parallel tests can affect failed test.
165+
"""
166+
def __init__(self, worker_id, worker_name,
167+
task_name, task_param, task_result_filepath):
168+
super(WorkerCurrentTask, self).__init__(worker_id, worker_name)
169+
self.task_name = task_name
170+
self.task_param = task_param
171+
self.task_result_filepath = task_result_filepath
172+
160173
# Worker
161174
########
162175

@@ -176,6 +189,13 @@ def wrap_output(self, output, log_only):
176189
def done_marker(self):
177190
return WorkerDone(self.id, self.name)
178191

192+
def current_task(self, task_id):
193+
task_name, task_param = task_id
194+
task_result_filepath = os.path.join(self.suite.ini['vardir'],
195+
get_result(task_name))
196+
return WorkerCurrentTask(self.id, self.name,
197+
task_name, task_param, task_result_filepath)
198+
179199
def wrap_result(self, task_id, short_status):
180200
return WorkerTaskResult(self.id, self.name, task_id, short_status)
181201

@@ -285,6 +305,8 @@ def run_loop(self, task_queue, result_queue):
285305
schema='test_var')
286306
self.stop_worker(task_queue, result_queue)
287307
break
308+
309+
result_queue.put(self.current_task(task_id))
288310
short_status = self.run_task(task_id)
289311
result_queue.put(self.wrap_result(task_id, short_status))
290312
if not lib.Options().args.is_force and short_status == 'fail':
@@ -307,7 +329,9 @@ def run_all(self, task_queue, result_queue):
307329

308330
try:
309331
self.run_loop(task_queue, result_queue)
310-
except (KeyboardInterrupt, Exception):
332+
except (KeyboardInterrupt, Exception) as e:
333+
if not isinstance(e, KeyboardInterrupt):
334+
color_stdout('Exception: %s\n' % e, schema='error')
311335
self.stop_worker(task_queue, result_queue, cleanup=False)
312336

313337
result_queue.put(self.done_marker())

listeners.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import lib
77
from lib.worker import get_reproduce_file
8-
from lib.worker import WorkerOutput, WorkerDone, WorkerTaskResult
8+
from lib.worker import WorkerOutput, WorkerDone, WorkerTaskResult, WorkerCurrentTask
99
from lib.colorer import color_stdout
1010

1111

@@ -178,28 +178,55 @@ def __init__(self, get_not_done_worker_ids, kill_all_workers, warn_timeout,
178178
self.kill_timeout = kill_timeout
179179
self.warned_seconds_ago = 0.0
180180
self.inactivity = 0.0
181+
self.worker_current_task = dict()
181182

182183
def process_result(self, obj):
183184
self.warned_seconds_ago = 0.0
184185
self.inactivity = 0.0
185186

187+
if isinstance(obj, WorkerCurrentTask):
188+
self.worker_current_task[obj.worker_id] = obj
189+
190+
if isinstance(obj, WorkerDone):
191+
if obj.worker_id in self.worker_current_task:
192+
del self.worker_current_task[obj.worker_id]
193+
194+
186195
def process_timeout(self, delta_seconds):
187196
self.warned_seconds_ago += delta_seconds
188197
self.inactivity += delta_seconds
189198
worker_ids = self.get_not_done_worker_ids()
199+
190200
if self.warned_seconds_ago < self.warn_timeout:
191201
return
202+
192203
color_stdout("No output during %d seconds. "
193-
"List of workers not reporting the status: %s; "
194-
"Will abort after %d seconds without output.\n" % (
195-
self.inactivity, worker_ids, self.kill_timeout),
204+
"Will abort after %d seconds without output. "
205+
"List of workers not reporting the status:\n" % (
206+
self.inactivity, self.kill_timeout),
196207
schema='test_var')
208+
209+
for current_task in self.worker_current_task.itervalues():
210+
color_stdout("[{0:03d}] {1} {2}\n".format(current_task.worker_id,
211+
current_task.task_name,
212+
current_task.task_param),
213+
schema='test_var')
214+
color_stdout("Last 15 lines of result file "
215+
"[{0}]\n".format(current_task.task_result_filepath),
216+
schema='error')
217+
lib.utils.print_tail_n(current_task.task_result_filepath,
218+
num_lines=15)
219+
220+
197221
self.warned_seconds_ago = 0.0
222+
198223
if self.inactivity < self.kill_timeout:
199224
return
225+
200226
color_stdout('\n[Main process] No output from workers. '
201227
'It seems that we hang. Send SIGKILL to workers; '
202228
'exiting...\n',
203229
schema='test_var')
204230
self.kill_all_workers()
231+
205232
raise HangError()

0 commit comments

Comments
 (0)