Skip to content

Fix location metadata on backslash/unicode escape lints #4003

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CONTRIBUTORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -435,3 +435,5 @@ contributors:
* Batuhan Taskaya: contributor

* Frank Harrison (doublethefish): contributor

* Matthew Suozzo
1 change: 1 addition & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Pylint's ChangeLog

Closes #3992

* Fix column metadata for anomalous backslash lints

What's New in Pylint 2.6.1?
===========================
Expand Down
43 changes: 31 additions & 12 deletions pylint/checkers/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,7 @@ def process_tokens(self, tokens):
elif tok_type == tokenize.STRING:
# 'token' is the whole un-parsed token; we can look at the start
# of it to see whether it's a raw or unicode string etc.
self.process_string_token(token, start[0])
self.process_string_token(token, start[0], start[1])
# We figure the next token, ignoring comments & newlines:
j = i + 1
while j < len(tokens) and tokens[j].type in (
Expand Down Expand Up @@ -799,7 +799,7 @@ def check_for_concatenated_strings(self, elements, iterable_type):
"implicit-str-concat", line=elt.lineno, args=(iterable_type,)
)

def process_string_token(self, token, start_row):
def process_string_token(self, token, start_row, start_col):
quote_char = None
index = None
for index, char in enumerate(token):
Expand All @@ -811,21 +811,30 @@ def process_string_token(self, token, start_row):

prefix = token[:index].lower() # markers like u, b, r.
after_prefix = token[index:]
if after_prefix[:3] == after_prefix[-3:] == 3 * quote_char:
string_body = after_prefix[3:-3]
else:
string_body = after_prefix[1:-1] # Chop off quotes
# Chop off quotes
quote_length = (
3 if after_prefix[:3] == after_prefix[-3:] == 3 * quote_char else 1
)
string_body = after_prefix[quote_length:-quote_length]
# No special checks on raw strings at the moment.
if "r" not in prefix:
self.process_non_raw_string_token(prefix, string_body, start_row)
self.process_non_raw_string_token(
prefix,
string_body,
start_row,
start_col + len(prefix) + quote_length,
)

def process_non_raw_string_token(self, prefix, string_body, start_row):
def process_non_raw_string_token(
self, prefix, string_body, start_row, string_start_col
):
"""check for bad escapes in a non-raw string.

prefix: lowercase string of eg 'ur' string prefix markers.
string_body: the un-parsed body of the string, not including the quote
marks.
start_row: integer line number in the source.
string_start_col: integer col number of the string start in the source.
"""
# Walk through the string; if we see a backslash then escape the next
# character, and skip over it. If we see a non-escaped character,
Expand All @@ -844,6 +853,16 @@ def process_non_raw_string_token(self, prefix, string_body, start_row):
# of the string would be a SyntaxError.
next_char = string_body[index + 1]
match = string_body[index : index + 2]
# The column offset will vary depending on whether the string token
# is broken across lines. Calculate relative to the nearest line
# break or relative to the start of the token's line.
last_newline = string_body.rfind("\n", 0, index)
if last_newline == -1:
line = start_row
col_offset = index + string_start_col
else:
line = start_row + string_body.count("\n", 0, index)
col_offset = index - last_newline - 1
if next_char in self.UNICODE_ESCAPE_CHARACTERS:
if "u" in prefix:
pass
Expand All @@ -852,16 +871,16 @@ def process_non_raw_string_token(self, prefix, string_body, start_row):
else:
self.add_message(
"anomalous-unicode-escape-in-string",
line=start_row,
line=line,
args=(match,),
col_offset=index,
col_offset=col_offset,
)
elif next_char not in self.ESCAPE_CHARACTERS:
self.add_message(
"anomalous-backslash-in-string",
line=start_row,
line=line,
args=(match,),
col_offset=index,
col_offset=col_offset,
)
# Whether it was a valid escape or not, backslash followed by
# another character can always be consumed whole: the second
Expand Down
21 changes: 21 additions & 0 deletions tests/functional/a/anomalous_backslash_escape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# pylint:disable=W0105, W0511
"""Test for anomalous backslash escapes in strings"""

BAD_ESCAPE = '\z' # [anomalous-backslash-in-string]
BAD_ESCAPE_NOT_FIRST = 'abc\z' # [anomalous-backslash-in-string]
BAD_ESCAPE_WITH_PREFIX = b'abc\z' # [anomalous-backslash-in-string]
BAD_ESCAPE_WITH_BACKSLASH = b'a\
\z' # [anomalous-backslash-in-string]
# +3:[anomalous-backslash-in-string]
BAD_ESCAPE_BLOCK = b'''
abc
\z
'''
BAD_ESCAPE_PARENS = (b'abc'
b'\z') # [anomalous-backslash-in-string]
GOOD_ESCAPE = '\b'

# Valid raw strings
BAD_ESCAPE_BUT_RAW = r'\z'

# In a comment you can have whatever you want: \z
6 changes: 6 additions & 0 deletions tests/functional/a/anomalous_backslash_escape.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
anomalous-backslash-in-string:4::"Anomalous backslash in string: '\z'. String constant might be missing an r prefix."
anomalous-backslash-in-string:5::"Anomalous backslash in string: '\z'. String constant might be missing an r prefix."
anomalous-backslash-in-string:6::"Anomalous backslash in string: '\z'. String constant might be missing an r prefix."
anomalous-backslash-in-string:8::"Anomalous backslash in string: '\z'. String constant might be missing an r prefix."
anomalous-backslash-in-string:12::"Anomalous backslash in string: '\z'. String constant might be missing an r prefix."
anomalous-backslash-in-string:15::"Anomalous backslash in string: '\z'. String constant might be missing an r prefix."
3 changes: 2 additions & 1 deletion tests/functional/e/excess_escapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
ESCAPE_UNICODE = "\\\\n"

# Bad docstring
"""Even in a docstring # [anomalous-backslash-in-string]
# +3:[anomalous-backslash-in-string]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

"""Even in a docstring

You shouldn't have ambiguous text like: C:\Program Files\alpha
"""
2 changes: 1 addition & 1 deletion tests/functional/e/excess_escapes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ anomalous-backslash-in-string:16::"Anomalous backslash in string: '\o'. String c
anomalous-backslash-in-string:16::"Anomalous backslash in string: '\o'. String constant might be missing an r prefix."
anomalous-backslash-in-string:18::"Anomalous backslash in string: '\8'. String constant might be missing an r prefix."
anomalous-backslash-in-string:18::"Anomalous backslash in string: '\9'. String constant might be missing an r prefix."
anomalous-backslash-in-string:28::"Anomalous backslash in string: '\P'. String constant might be missing an r prefix."
anomalous-backslash-in-string:31::"Anomalous backslash in string: '\P'. String constant might be missing an r prefix."