-
Notifications
You must be signed in to change notification settings - Fork 1
Feature/file based filter todo1 #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: feature/file-based-filter
Are you sure you want to change the base?
Changes from 6 commits
dd53ee5
912dec4
3c26cac
4453cdd
b6b0b30
e17af2e
08481e0
6b098e8
7d44b94
5823908
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -836,15 +836,15 @@ def _convert_compile_commands(aquery_output, focused_on_file: str = None): | |
aquery_output.actions, | ||
timeout=3 if focused_on_file else None # If running in fast, interactive mode with --file, we need to cap latency. #TODO test timeout--if it doesn't work, cap input length here, before passing in array. Might also want to divide timeout/cap by #targets | ||
) | ||
# Collect outputs, tolerating any timeouts | ||
outputs = [] | ||
try: | ||
for output in output_iterator: | ||
outputs.append(output) | ||
except concurrent.futures.TimeoutError: | ||
assert focused_on_file, "Timeout should only have been set in the fast, interactive --file mode, focused on a single file." | ||
if not found_header_focused_upon.is_set(): | ||
log_warning(f""">>> Timed out looking for a command for {focused_on_file} | ||
# Collect outputs, tolerating any timeouts | ||
outputs = [] | ||
try: | ||
for output in output_iterator: | ||
outputs.append(output) | ||
except concurrent.futures.TimeoutError: | ||
assert focused_on_file, "Timeout should only have been set in the fast, interactive --file mode, focused on a single file." | ||
if not found_header_focused_upon.is_set(): | ||
log_warning(f""">>> Timed out looking for a command for {focused_on_file} | ||
If that's a source file, please report this. We should work to improve the performance. | ||
If that's a header file, we should probably do the same, but it may be unavoidable.""") | ||
|
||
|
@@ -979,20 +979,52 @@ def _get_commands(target: str, flags: str): | |
|
||
|
||
# Then, actually query Bazel's compile actions for that configured target | ||
target_statement = f'deps({target})' # TODO we should always be quoting targets when we splice them in. Let's use single quotes like with mnemonic, above. See slightly down in https://bazel.build/query/language#tokens | ||
target_statement = f"deps('{target}')" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor, but as in the TODO, we should really be quoting targets everywhere! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also quote in other queries. But I found another problem in bazel query. |
||
compile_commands = [] # TODO simplify loop, especially if we can reduce it to one command per case (see below)? Move warning messages outside? | ||
if file_flags: | ||
file_path = file_flags[0] | ||
# Normalize file_path into 2 types of relative paths. | ||
# Source file. 1. <WORKSPACE_ROOT>/srcs/file 2. <OUTPUT_BASE>/external/<REPO>/srcs/file | ||
# Genfiles or output file. <EXECUTION_ROOT>/bazel-out/<configurations>/bin/srcs/file | ||
|
||
pure_path = pathlib.PurePath(file_path) | ||
|
||
if pure_path.is_absolute(): | ||
workspace_root = subprocess.check_output(["bazel", "info", "workspace"], stderr=subprocess. DEVNULL).decode("utf-8").strip() | ||
output_base = subprocess.check_output(["bazel", "info", "output_base"], stderr=subprocess. DEVNULL).decode("utf-8").strip() | ||
execution_root = subprocess.check_output(["bazel", "info", "execution_root"], stderr=subprocess. DEVNULL).decode("utf-8").strip() | ||
|
||
if pure_path.is_relative_to(workspace_root): | ||
file_path = str(pure_path.relative_to(workspace_root)) | ||
# Execution_root is longer then output_base | ||
elif pure_path.is_relative_to(execution_root): | ||
file_path = str(pure_path.relative_to(execution_root)) | ||
elif pure_path.is_relative_to(output_base): | ||
file_path = str(pure_path.relative_to(output_base)) | ||
else: | ||
# Treat pure_path as system absolute path | ||
pass | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Really appreciate your tackling. But on a quick read, I think this part could use some polish along the lines we'd discussed.
Probably also worth using subprocess.run instead of subprocess.check_output, just because of deprecation and to match the others? (Also, there's the encoding issue.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would like to ask if there are any other problems with the processing of paths besides subprocess.check_output? At present, I have modified these 3 calls, but I have not changed other places in the file |
||
|
||
found = False | ||
target_statement_candidates = [] | ||
if file_path.endswith(_get_files.source_extensions): | ||
target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") | ||
else: | ||
fname = os.path.basename(file_path) # TODO consider running a preliminary aquery to make this more specific, getting the targets that generate the given file. Use outputs() aquery function. Should also test the case where the file is generated/is coming in as a filegroup--that's likely to be fixed by this change. | ||
header_target_statement = f"let v = {target_statement} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)" # Bazel does not list headers as direct inputs, but rather hides them behind "middlemen", necessitating a query like this. | ||
middle_target_candidates = subprocess.Popen(f"bazel aquery 'outputs('{re.escape(file_path)}', {target_statement})' | grep -o 'Target: .*' | grep -o '[@/].*'", shell=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL).communicate()[0].decode("utf-8").strip().splitlines() | ||
if middle_target_candidates: | ||
log_info(f">>> File detected as intermediate of {middle_target_candidates}") | ||
target_statement_candidates.extend([ | ||
# TODO there seems a bug in bazel aquery? There are many targets listed output when executing bazel query "somepath('TARGET', 'SUBTARGET')" but there isn't any actions output when executing bazel aquery "somepath('TARGET', 'SUBTARGET')" and it works when executing bazel aquery "<somepath of target>" | ||
f"allpaths({target}, {'+'.join(middle_target_candidates)})" | ||
]) | ||
else: | ||
fname = os.path.basename(file_path) # TODO consider running a preliminary aquery to make this more specific, getting the targets that generate the given file. Use outputs() aquery function. Should also test the case where the file is generated/is coming in as a filegroup--that's likely to be fixed by this change. | ||
header_target_statement = f"let v = {target_statement} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)" # Bazel does not list headers as direct inputs, but rather hides them behind "middlemen", necessitating a query like this. | ||
target_statement_candidates.extend([ | ||
header_target_statement, | ||
f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 and https://github.com/bazelbuild/bazel/issues/16310 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, <maybe some limited depth>) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below. | ||
]) | ||
target_statement_candidates.extend([ | ||
header_target_statement, | ||
f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 and https://github.com/bazelbuild/bazel/issues/16310 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, <maybe some limited depth>) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below. | ||
f'deps({target})', # TODO: Let's detect out-of-bazel, absolute paths and run this if and only if we're looking for a system header. We need to think about how we want to handle absolute paths more generally, perhaps normalizing them to relative if possible, like with the windows absolute path issue, above. | ||
]) | ||
for target_statement in target_statement_candidates: | ||
|
Uh oh!
There was an error while loading. Please reload this page.