Skip to content

gather_job_data: detect specific before generic #40

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions docs/gather_job_data/_includes/testrun_address_already_in_use.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
2022-08-25T11:18:47.3246765Z [051] box-py/bootstrap.test.py
2022-08-25T11:18:47.3247301Z [051] [Instance "box"] Failed to start
2022-08-25T11:18:47.3247697Z [051]
2022-08-25T11:18:47.3248302Z [051] Last 15 lines of Tarantool Log file [Instance "box"][/tmp/t/051_box-py/box.log]:
2022-08-25T11:18:47.3248946Z [051] Run console at unix/:/tmp/t/051_box-py/box.control
2022-08-25T11:18:47.3249565Z [051] tcp_server: remove dead UNIX socket: /tmp/t/051_box-py/box.control
2022-08-25T11:18:47.3250131Z [051] started
2022-08-25T11:18:47.3250774Z [051] 2022-08-25 11:18:47.236 [44218] main/103/box I> Tarantool 2.11.0-entrypoint-404-gfbfa5aaf1
2022-08-25T11:18:47.3251377Z [051] 2022-08-25 11:18:47.236 [44218] main/103/box I> log level 5
2022-08-25T11:18:47.3265325Z [051] 2022-08-25 11:18:47.236 [44218] main/103/box I> wal/engine cleanup is paused
2022-08-25T11:18:47.3266562Z [051] 2022-08-25 11:18:47.236 [44218] main/103/box I> mapping 117440512 bytes for memtx tuple arena...
2022-08-25T11:18:47.3267565Z [051] 2022-08-25 11:18:47.236 [44218] main/103/box I> Actual slab_alloc_factor calculated on the basis of desired slab_alloc_factor = 1.044274
2022-08-25T11:18:47.3268475Z [051] 2022-08-25 11:18:47.236 [44218] main/103/box I> mapping 134217728 bytes for vinyl tuple arena...
2022-08-25T11:18:47.3269236Z [051] 2022-08-25 11:18:47.271 [44218] main/103/box I> update replication_synchro_quorum = 1
2022-08-25T11:18:47.3270172Z [051] 2022-08-25 11:18:47.271 [44218] main/103/box I> instance uuid 7af31831-16cc-48fe-ae08-cf3cbaa20f5b
2022-08-25T11:18:47.3271127Z [051] 2022-08-25 11:18:47.272 [44218] main/103/box evio.c:369 E> tx_binary: failed to bind on 127.0.0.1:41608: bind, called on fd 22, aka 0.0.0.0:0: Address already in use
2022-08-25T11:18:47.3272079Z [051] 2022-08-25 11:18:47.273 [44218] main/103/box evio.c:375 E> SocketError: tx_binary: failed to bind, called on fd -1: Address already in use
2022-08-25T11:18:47.3273095Z [051] 2022-08-25 11:18:47.273 [44218] main/103/box F> can't initialize storage: tx_binary: failed to bind, called on fd -1: Address already in use
2022-08-25T11:18:47.3274062Z [051] 2022-08-25 11:18:47.273 [44218] main/103/box F> can't initialize storage: tx_binary: failed to bind, called on fd -1: Address already in use
2022-08-25T11:18:47.3274563Z [051] [ fail ]
4 changes: 2 additions & 2 deletions multivac/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import glob
import os

from sensors.failures import failure_specs
from sensors.failures import specific_failures, generic_failures

log_examples_path = 'docs/gather_job_data/'

Expand Down Expand Up @@ -45,7 +45,7 @@ def describe_failure_type(failure_spec: dict, heading_marker='-'):
)

failure_specs_sorted = list(sorted(
failure_specs, key=lambda x: x['type']
generic_failures + specific_failures, key=lambda x: x['type']
))
for failure_spec in failure_specs_sorted:
failures_rst.writelines(describe_failure_type(failure_spec))
17 changes: 9 additions & 8 deletions multivac/gather_job_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import re
import sys

from sensors.failures import failure_specs
from sensors.failures import specific_failures, generic_failures, compile_failure_specs

PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(PROJECT_DIR)
Expand Down Expand Up @@ -54,7 +54,7 @@ def reverse_readline(filename, buf_size=8192):
yield segment


def detect_error(logs: str) -> (str, str):
def detect_error(logs: str, failure_specs: list) -> (str, str):
for number, line in enumerate(reverse_readline(logs)):
# check if the line matches one of regular expressions:
for failure_type in failure_specs:
Expand Down Expand Up @@ -124,7 +124,9 @@ def gather_data(self):
print(f'no logs for job {job_id}')

if job['conclusion'] == 'failure':
job_failure_type, failure_line = detect_error(logs)
job_failure_type, failure_line = detect_error(logs, specific_failures)
if job_failure_type == 'unknown':
job_failure_type, failure_line = detect_error(logs, generic_failures)
if job_failure_type == self.watch_failure:
print(
f'{job_id} {job["name"]}\t'
Expand Down Expand Up @@ -222,12 +224,11 @@ def print_failure_stats(self):
args = parser.parse_args()

# compile regular expressions
for failure_type in failure_specs:
failure_type.update(
{'re_compiled': [re.compile(expression) for expression in failure_type['re']]}
)
compile_failure_specs(specific_failures)
compile_failure_specs(generic_failures)

results = {failure_type['type']: 0 for failure_type in failure_specs}
results = {failure_type['type']: 0 for failure_type in generic_failures}
results.update({failure_type['type']: 0 for failure_type in specific_failures})
results.update({'unknown': 0})
results.update({'total': 0})

Expand Down
27 changes: 24 additions & 3 deletions multivac/sensors/failures.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

failure_categories = [
{
'tag': 'git',
Expand All @@ -20,7 +22,21 @@
'description': 'Internal Tarantool problem',
}
]
failure_specs = [

specific_failures = [
{
'type': 'testrun_address_already_in_use',
're': [
r'.*Address already in use.*',
],
'description':
'Test-run error with managing sockets. '
'See `tarantool/test-run#141 '
'<https://github.com/tarantool/test-run/issues/141>`__'
},
]

generic_failures = [
{
'type': 'git_unsafe_error',
're': [r'.*fatal: unsafe repository.*'],
Expand Down Expand Up @@ -84,7 +100,6 @@
're': [r'.*Test hung!.*'],
'description': 'Test had started but did not return results.',
},

{
'type': 'curl_error',
're': [r'.*curl: \(22\) The requested URL returned error:.*'],
Expand Down Expand Up @@ -131,7 +146,6 @@
're': [r'.*dpkg-buildpackage: error: debian/rules build.*'],
'description': '',
},

{
'type': 'git_submodule_error',
're': [r'.*fatal: Needed a single revision.*'],
Expand Down Expand Up @@ -238,3 +252,10 @@
'description': 'https://stackoverflow.com/questions/26734777/',
},
]


def compile_failure_specs(failure_specs):
for failure_type in failure_specs:
failure_type.update(
{'re_compiled': [re.compile(expression) for expression in failure_type['re']]}
)