Skip to content

Support full function-like macro expansions and allow for whitespace in PP directives #350

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion fortls/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,11 @@ def locate_config(root: str) -> str | None:
error_exit(f"Reading file failed: {err_str}")
print(f" Detected format: {'fixed' if file_obj.fixed else 'free'}")
print("\n=========\nParser Output\n=========\n")
file_ast = file_obj.parse(debug=True, pp_defs=pp_defs, include_dirs=include_dirs)
file_ast = file_obj.parse(
debug=True,
pp_defs=pp_defs,
include_dirs=include_dirs,
)
print("\n=========\nObject Tree\n=========\n")
for obj in file_ast.get_scopes():
print("{}: {}".format(obj.get_type(), obj.FQSN))
Expand Down
21 changes: 17 additions & 4 deletions fortls/langserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,11 +751,18 @@ def get_definition(
return None
# Search in Preprocessor defined variables
if def_name in def_file.pp_defs:
def_value = def_file.pp_defs.get(def_name)
def_arg_str = ""
if isinstance(def_value, tuple):
def_arg_str, def_value = def_value
def_arg_str = ", ".join([x.strip() for x in def_arg_str.split(",")])
def_arg_str = f"({def_arg_str})"

var = Variable(
def_file.ast,
def_line + 1,
def_name,
f"#define {def_name} {def_file.pp_defs.get(def_name)}",
f"#define {def_name}{def_arg_str} {def_value}",
[],
)
return var
Expand Down Expand Up @@ -1316,7 +1323,9 @@ def serve_onChange(self, request: dict):
# Update inheritance (currently file only)
# tmp_file.ast.resolve_links(self.obj_tree, self.link_version)
elif file_obj.preproc:
file_obj.preprocess(pp_defs=self.pp_defs)
file_obj.preprocess(
pp_defs=self.pp_defs,
)
self.pp_defs = {**self.pp_defs, **file_obj.pp_defs}

def serve_onOpen(self, request: dict):
Expand Down Expand Up @@ -1389,7 +1398,8 @@ def update_workspace_file(
if not file_changed:
return False, None
ast_new = file_obj.parse(
pp_defs=self.pp_defs, include_dirs=self.include_dirs
pp_defs=self.pp_defs,
include_dirs=self.include_dirs,
)
# Add the included read in pp_defs from to the ones specified in the
# configuration file
Expand Down Expand Up @@ -1453,7 +1463,10 @@ def file_init(
# This is a bypass.
# For more see on SO: shorturl.at/hwAG1
set_keyword_ordering(sort)
file_ast = file_obj.parse(pp_defs=pp_defs, include_dirs=include_dirs)
file_ast = file_obj.parse(
pp_defs=pp_defs,
include_dirs=include_dirs,
)
except:
log.error("Error while parsing file %s", filepath, exc_info=True)
return "Error during parsing"
Expand Down
102 changes: 80 additions & 22 deletions fortls/parsers/internal/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1176,7 +1176,10 @@ def find_word_in_code_line(
return line_no, word_range

def preprocess(
self, pp_defs: dict = None, include_dirs: set = None, debug: bool = False
self,
pp_defs: dict = None,
include_dirs: set = None,
debug: bool = False,
) -> tuple[list, list]:
if pp_defs is None:
pp_defs = {}
Expand Down Expand Up @@ -1265,7 +1268,9 @@ def parse(
if self.preproc:
log.debug("=== PreProc Pass ===\n")
pp_skips, pp_defines = self.preprocess(
pp_defs=pp_defs, include_dirs=include_dirs, debug=debug
pp_defs=pp_defs,
include_dirs=include_dirs,
debug=debug,
)
for pp_reg in pp_skips:
file_ast.start_ppif(pp_reg[0])
Expand Down Expand Up @@ -2038,6 +2043,7 @@ def replace_ops(expr: str):
expr = expr.replace("!=", " <> ")
expr = expr.replace("!", " not ")
expr = expr.replace(" <> ", " != ")

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is a remainder from cherry-picking, o need for this diff.

Suggested change

return expr

def replace_defined(line: str):
Expand Down Expand Up @@ -2070,7 +2076,9 @@ def replace_vars(line: str):

if defs is None:
defs = {}
out_line = replace_defined(text)

out_line = text
out_line = replace_defined(out_line)
Comment on lines +2079 to +2081
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK these changes can be reverted

out_line = replace_vars(out_line)
try:
line_res = eval(replace_ops(out_line))
Expand Down Expand Up @@ -2098,26 +2106,27 @@ def replace_vars(line: str):
if def_cont_name is not None:
output_file.append("")
if line.rstrip()[-1] != "\\":
defs_tmp[def_cont_name] += line.strip()
append_multiline_macro(defs_tmp, def_cont_name, line.strip())
def_cont_name = None
else:
defs_tmp[def_cont_name] += line[0:-1].strip()
append_multiline_macro(defs_tmp, def_cont_name, line[0:-1].strip())

continue
# Handle conditional statements
match = FRegex.PP_REGEX.match(line)
if match:
if match and check_pp_prefix(match.group(1)):
output_file.append(line)
def_name = None
if_start = False
# Opening conditional statements
if match.group(1) == "if ":
is_path = eval_pp_if(line[match.end(1) :], defs_tmp)
if match.group(2).lower() == "if ":
is_path = eval_pp_if(line[match.end(2) :], defs_tmp)
if_start = True
elif match.group(1) == "ifdef":
elif match.group(2).lower() == "ifdef":
if_start = True
def_name = line[match.end(0) :].strip()
is_path = def_name in defs_tmp
elif match.group(1) == "ifndef":
elif match.group(2).lower() == "ifndef":
if_start = True
def_name = line[match.end(0) :].strip()
is_path = not (def_name in defs_tmp)
Expand All @@ -2135,7 +2144,7 @@ def replace_vars(line: str):
inc_start = False
exc_start = False
exc_continue = False
if match.group(1) == "elif":
if match.group(2).lower() == "elif":
if (not pp_stack_group) or (pp_stack_group[-1][0] != len(pp_stack)):
# First elif statement for this elif group
if pp_stack[-1][0] < 0:
Expand All @@ -2147,15 +2156,15 @@ def replace_vars(line: str):
exc_continue = True
if pp_stack[-1][0] < 0:
pp_stack[-1][0] = i + 1
elif eval_pp_if(line[match.end(1) :], defs_tmp):
elif eval_pp_if(line[match.end(2) :], defs_tmp):
pp_stack[-1][1] = i + 1
pp_skips.append(pp_stack.pop())
pp_stack_group[-1][1] = True
pp_stack.append([-1, -1])
inc_start = True
else:
exc_start = True
elif match.group(1) == "else":
elif match.group(2).lower() == "else":
if pp_stack[-1][0] < 0:
pp_stack[-1][0] = i + 1
exc_start = True
Expand All @@ -2171,7 +2180,7 @@ def replace_vars(line: str):
pp_skips.append(pp_stack.pop())
pp_stack.append([-1, -1])
inc_start = True
elif match.group(1) == "endif":
elif match.group(2).lower() == "endif":
if pp_stack_group and (pp_stack_group[-1][0] == len(pp_stack)):
pp_stack_group.pop()
if pp_stack[-1][0] < 0:
Expand All @@ -2192,10 +2201,12 @@ def replace_vars(line: str):
continue
# Handle variable/macro definitions files
match = FRegex.PP_DEF.match(line)
if (match is not None) and ((len(pp_stack) == 0) or (pp_stack[-1][0] < 0)):
if (match is not None and check_pp_prefix(match.group(1))) and (
(len(pp_stack) == 0) or (pp_stack[-1][0] < 0)
):
output_file.append(line)
pp_defines.append(i + 1)
def_name = match.group(2)
def_name = match.group(3)
# If this is an argument list of a function add them to the name
# get_definition will only return the function name upon hover
# hence if the argument list is appended in the def_name then
Expand All @@ -2204,18 +2215,27 @@ def replace_vars(line: str):
# This also does not allow for multiline argument list definitions.
# if match.group(3):
# def_name += match.group(3)
if (match.group(1) == "define") and (def_name not in defs_tmp):
if (match.group(2) == "define") and (def_name not in defs_tmp):
eq_ind = line[match.end(0) :].find(" ")
if eq_ind < 0:
eq_ind = line[match.end(0) :].find("\t")

Comment on lines +2220 to +2222
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think there is adequate support in the REGEX or generally throughout fortls. I think we should probably remove this.

if eq_ind >= 0:
# Handle multiline macros
if line.rstrip()[-1] == "\\":
defs_tmp[def_name] = line[match.end(0) + eq_ind : -1].strip()
def_value = line[match.end(0) + eq_ind : -1].strip()
def_cont_name = def_name
else:
defs_tmp[def_name] = line[match.end(0) + eq_ind :].strip()
def_value = line[match.end(0) + eq_ind :].strip()
else:
defs_tmp[def_name] = "True"
elif (match.group(1) == "undef") and (def_name in defs_tmp):
def_value = "True"

# are there arguments to parse?
if match.group(4):
def_value = (match.group(5), def_value)

defs_tmp[def_name] = def_value
elif (match.group(2) == "undef") and (def_name in defs_tmp):
defs_tmp.pop(def_name, None)
log.debug(f"{line.strip()} !!! Define statement({i + 1})")
continue
Expand Down Expand Up @@ -2265,8 +2285,16 @@ def replace_vars(line: str):
continue
def_regex = def_regexes.get(def_tmp)
if def_regex is None:
def_regex = re.compile(rf"\b{def_tmp}\b")
if isinstance(value, tuple):
def_regex = expand_def_func_macro(def_tmp, value)
else:
def_regex = re.compile(rf"\b{def_tmp}\b")

def_regexes[def_tmp] = def_regex

if isinstance(def_regex, tuple):
def_regex, value = def_regex

line_new, nsubs = def_regex.subn(value, line)
if nsubs > 0:
log.debug(
Expand All @@ -2275,3 +2303,33 @@ def replace_vars(line: str):
line = line_new
output_file.append(line)
return output_file, pp_skips, pp_defines, defs_tmp


def expand_def_func_macro(def_name: str, def_value: tuple[str, str]):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would probably rename to something like

Suggested change
def expand_def_func_macro(def_name: str, def_value: tuple[str, str]):
def expand_func_macro(def_name: str, def_value: tuple[str, str]):

def_args, sub = def_value
def_args = def_args.split(",")
regex = re.compile(rf"\b{def_name}\s*\({','.join(['(.*)']*len(def_args))}\)")

for i, arg in enumerate(def_args):
arg = arg.strip()
sub = re.sub(rf"\b({arg})\b", rf"\\{i + 1}", sub)

return regex, sub


def append_multiline_macro(pp_defs: dict, def_name: str, line: str):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs to return def_value, should not be doing inplace argument replacements.
I think something like this is a bit more Pythonic

def append_multiline_macro(def_value: str | tuple, line: str):
    def_args = None
    if isinstance(def_value, tuple):
        def_args, def_value = def_value
    def_value += line
    if def_args is not None:
        return (def_args, def_value)
    return def_value

def_value = pp_defs[def_name]
def_args = None
if isinstance(def_value, tuple):
def_args, def_value = def_value

def_value += line

if def_args is not None:
def_value = (def_args, def_value)

pp_defs[def_name] = def_value
Comment on lines +2320 to +2331
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def append_multiline_macro(pp_defs: dict, def_name: str, line: str):
def_value = pp_defs[def_name]
def_args = None
if isinstance(def_value, tuple):
def_args, def_value = def_value
def_value += line
if def_args is not None:
def_value = (def_args, def_value)
pp_defs[def_name] = def_value
def append_multiline_macro(def_value: str | tuple, line: str):
def_args = None
if isinstance(def_value, tuple):
def_args, def_value = def_value
def_value += line
if def_args is not None:
return (def_args, def_value)
return def_value



def check_pp_prefix(prefix: str):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would probably rename to something like

Suggested change
def check_pp_prefix(prefix: str):
def is_pp(prefix: str):

However, I think this might be a bit slow compared to compiled regex expressions.
Also keep in mind that this should not activate with fypp preprocessor macros.

return prefix == "#"
Comment on lines +2306 to +2335
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would move this inside the preprocess function, as internal functions to keep things isolated.

My ideal case scenario is to rewrite the preprocessor using pyparsing, but I have not been able to iron out some of the bugs in my local attempt.

14 changes: 10 additions & 4 deletions fortls/regex_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,17 @@ class FortranRegularExpressions:
FREE_FORMAT_TEST: Pattern = compile(r"[ ]{1,4}[a-z]", I)
# Preprocessor matching rules
DEFINED: Pattern = compile(r"defined[ ]*\(?[ ]*([a-z_]\w*)[ ]*\)?", I)
PP_REGEX: Pattern = compile(r"#(if |ifdef|ifndef|else|elif|endif)")
PP_DEF: Pattern = compile(r"#(define|undef)[ ]*([\w]+)(\((\w+(,[ ]*)?)+\))?", I)
PP_REGEX: Pattern = compile(
r"[ ]*(#)[ ]*(if |ifdef|ifndef|else|elif|endif)",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You shouldn't be capturing (#) into a group

I,
)
PP_DEF: Pattern = compile(
r"[ ]*(#)[ ]*(define|undef|undefined)" r"[ ]+(\w+)(\([ ]*([ \w,]*?)[ ]*\))?",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • You shouldn't be capturing (#) into a group
  • String needs to be merged, "..." r"..." is redundant
  • Should be [ ]* not [ ]+
  • undefined is a welcome feature addition, but including it as part of this PR, hides the addition. We need to keep peripheral additions to a minimum otherwise the complexity of the PR explodes and makes reviewing very slow.

I,
)
PP_DEF_TEST: Pattern = compile(r"(![ ]*)?defined[ ]*\([ ]*(\w*)[ ]*\)$", I)
PP_INCLUDE: Pattern = compile(r"#include[ ]*([\"\w\.]*)", I)
PP_ANY: Pattern = compile(r"(^#:?\w+)")
PP_INCLUDE: Pattern = compile(r"[ ]*#[ ]*include[ ]+([\"\w\.]*)", I)
PP_ANY: Pattern = compile(r"(^[ ]*(?:#)[ ]*\w*:?\w+)")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this REGEX is correct. The group should not be capturing the leading whitespaces.

# Context matching rules
CALL: Pattern = compile(r"[ ]*CALL[ ]+[\w%]*$", I)
INT_STMNT: Pattern = compile(r"^[ ]*[a-z]*$", I)
Expand Down
15 changes: 11 additions & 4 deletions test/test_preproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ def check_return(result_array, checks):
string += hover_req(file_path, 30, 23)
file_path = root_dir / "preproc_if_elif_skip.F90"
string += hover_req(file_path, 30, 23)
file_path = root_dir / "preproc_spacing_arg_defs.F90"
string += hover_req(file_path, 11, 20)
string += hover_req(file_path, 20, 17)
string += hover_req(file_path, 22, 13)
config = str(root_dir / ".pp_conf.json")
errcode, results = run_request(string, ["--config", config])
assert errcode == 0
Expand All @@ -52,12 +56,12 @@ def check_return(result_array, checks):
"```fortran90\n#define PETSC_ERR_INT_OVERFLOW 84\n```",
"```fortran90\n#define varVar 55\n```",
(
"```fortran90\n#define ewrite if (priority <= 3) write((priority),"
" format)\n```"
"```fortran90\n#define ewrite(priority, format)"
" if (priority <= 3) write((priority), format)\n```"
),
(
"```fortran90\n#define ewrite2 if (priority <= 3) write((priority),"
" format)\n```"
"```fortran90\n#define ewrite2(priority, format)"
" if (priority <= 3) write((priority), format)\n```"
),
"```fortran90\n#define SUCCESS .true.\n```",
"```fortran90\nREAL, CONTIGUOUS, POINTER, DIMENSION(:) :: var1\n```",
Expand All @@ -68,6 +72,9 @@ def check_return(result_array, checks):
"```fortran90\nINTEGER, PARAMETER :: res = 0+1+0+0\n```",
"```fortran90\nINTEGER, PARAMETER :: res = 0+0+0+1\n```",
"```fortran90\nINTEGER, PARAMETER :: res = 1+0+0+0\n```",
"```fortran90\n#define MAYBEWRAP(PROCEDURE) PROCEDURE\n```",
"```fortran90\nSUBROUTINE test_type_set_test()\n```",
"```fortran90\n#define MACROARGS(x, y) x + y\n```",
)
assert len(ref_results) == len(results) - 1
check_return(results[1:], ref_results)
5 changes: 5 additions & 0 deletions test/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ def test_workspace_symbols():
def check_return(result_array):
# Expected objects
objs = (
["argtest", 13, 16],
["test", 6, 7],
["test_abstract", 2, 0],
["test_associate_block", 2, 0],
Expand All @@ -196,7 +197,11 @@ def check_return(result_array):
["test_str1", 13, 5],
["test_str2", 13, 5],
["test_sub", 6, 8],
["test_type", 5, 5],
["test_type_set_test", 6, 25],
["test_vis_mod", 2, 0],
["the_test", 13, 15],
["wrap_test_type_set_test", 6, 33],
Comment on lines +200 to +204
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The added tests should not be part of this. Global namespace search.
They need to be excluded from the root dir and also renamed.
test_type implies that a series of checks are done relevant to the AST node for Type, which is not the case.

)
assert len(result_array) == len(objs)
for i, obj in enumerate(objs):
Expand Down
2 changes: 1 addition & 1 deletion test/test_source/pp/.pp_conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@
"pp_suffixes": [".h", ".F90"],
"incl_suffixes": [".h"],
"include_dirs": ["include"],
"pp_defs": { "HAVE_CONTIGUOUS": "" }
"pp_defs": { "HAVE_CONTIGUOUS": "" },
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert

}
22 changes: 22 additions & 0 deletions test/test_source/pp/include/indent.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
!! sample code adapted from json-fortran/json_macros.inc

# define SPACING_TEST
# define FILE_ENCODING ,encoding='UTF-8'

# ifdef __GFORTRAN__
! gfortran uses cpp in old-school compatibility mode so
! the # stringify and ## concatenate operators don't work
! but we can use C/C++ style comment to ensure PROCEDURE is
! correctly tokenized and prepended with 'wrap_' when the
! macro is expanded
# define MAYBEWRAP(PROCEDURE) PROCEDURE , wrap_/**/PROCEDURE
# else
! Intel's fpp does support the more contemporary ## concatenation
! operator, but doesn't treat the C/C++ comments the same way.
! If you use the gfortran approach and pass the -noB switch to
! fpp, the macro will expand, but with a space between wrap_ and
! whatever PROCEDURE expands to
# define MAYBEWRAP(PROCEDURE) PROCEDURE
# endif

# define MACROARGS( x , y ) x + y
Loading