Skip to content

Commit 17ceee2

Browse files
committed
Fix location metadata on backslash lint checks
The existing column offset logic does not account for the start-of-line to start-of-string offset nor does the line logic reflect the actual line number of the error for multi-line strings.
1 parent f3fd5ab commit 17ceee2

File tree

4 files changed

+35
-13
lines changed

4 files changed

+35
-13
lines changed

CONTRIBUTORS.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,3 +435,5 @@ contributors:
435435
* Batuhan Taskaya: contributor
436436

437437
* Frank Harrison (doublethefish): contributor
438+
439+
* Matthew Suozzo

ChangeLog

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Pylint's ChangeLog
2727

2828
Closes #3992
2929

30+
* Fix column metadata for anomalous backslash lints
3031

3132
What's New in Pylint 2.6.1?
3233
===========================

pylint/checkers/strings.py

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -705,7 +705,7 @@ def process_tokens(self, tokens):
705705
elif tok_type == tokenize.STRING:
706706
# 'token' is the whole un-parsed token; we can look at the start
707707
# of it to see whether it's a raw or unicode string etc.
708-
self.process_string_token(token, start[0])
708+
self.process_string_token(token, start[0], start[1])
709709
# We figure the next token, ignoring comments & newlines:
710710
j = i + 1
711711
while j < len(tokens) and tokens[j].type in (
@@ -799,7 +799,7 @@ def check_for_concatenated_strings(self, elements, iterable_type):
799799
"implicit-str-concat", line=elt.lineno, args=(iterable_type,)
800800
)
801801

802-
def process_string_token(self, token, start_row):
802+
def process_string_token(self, token, start_row, start_col):
803803
quote_char = None
804804
index = None
805805
for index, char in enumerate(token):
@@ -811,21 +811,30 @@ def process_string_token(self, token, start_row):
811811

812812
prefix = token[:index].lower() # markers like u, b, r.
813813
after_prefix = token[index:]
814-
if after_prefix[:3] == after_prefix[-3:] == 3 * quote_char:
815-
string_body = after_prefix[3:-3]
816-
else:
817-
string_body = after_prefix[1:-1] # Chop off quotes
814+
# Chop off quotes
815+
quote_length = (
816+
3 if after_prefix[:3] == after_prefix[-3:] == 3 * quote_char else 1
817+
)
818+
string_body = after_prefix[quote_length:-quote_length]
818819
# No special checks on raw strings at the moment.
819820
if "r" not in prefix:
820-
self.process_non_raw_string_token(prefix, string_body, start_row)
821+
self.process_non_raw_string_token(
822+
prefix,
823+
string_body,
824+
start_row,
825+
start_col + len(prefix) + quote_length,
826+
)
821827

822-
def process_non_raw_string_token(self, prefix, string_body, start_row):
828+
def process_non_raw_string_token(
829+
self, prefix, string_body, start_row, string_start_col
830+
):
823831
"""check for bad escapes in a non-raw string.
824832
825833
prefix: lowercase string of eg 'ur' string prefix markers.
826834
string_body: the un-parsed body of the string, not including the quote
827835
marks.
828836
start_row: integer line number in the source.
837+
string_start_col: integer col number of the string start in the source.
829838
"""
830839
# Walk through the string; if we see a backslash then escape the next
831840
# character, and skip over it. If we see a non-escaped character,
@@ -844,6 +853,16 @@ def process_non_raw_string_token(self, prefix, string_body, start_row):
844853
# of the string would be a SyntaxError.
845854
next_char = string_body[index + 1]
846855
match = string_body[index : index + 2]
856+
# The column offset will vary depending on whether the string token
857+
# is broken across lines. Calculate relative to the nearest line
858+
# break or relative to the start of the token's line.
859+
last_newline = string_body.rfind("\n", 0, index)
860+
if last_newline == -1:
861+
line = start_row
862+
col_offset = index + string_start_col
863+
else:
864+
line = start_row + string_body.count("\n", 0, index)
865+
col_offset = index - last_newline - 1
847866
if next_char in self.UNICODE_ESCAPE_CHARACTERS:
848867
if "u" in prefix:
849868
pass
@@ -852,16 +871,16 @@ def process_non_raw_string_token(self, prefix, string_body, start_row):
852871
else:
853872
self.add_message(
854873
"anomalous-unicode-escape-in-string",
855-
line=start_row,
874+
line=line,
856875
args=(match,),
857-
col_offset=index,
876+
col_offset=col_offset,
858877
)
859878
elif next_char not in self.ESCAPE_CHARACTERS:
860879
self.add_message(
861880
"anomalous-backslash-in-string",
862-
line=start_row,
881+
line=line,
863882
args=(match,),
864-
col_offset=index,
883+
col_offset=col_offset,
865884
)
866885
# Whether it was a valid escape or not, backslash followed by
867886
# another character can always be consumed whole: the second

tests/functional/e/excess_escapes.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ anomalous-backslash-in-string:16::"Anomalous backslash in string: '\o'. String c
66
anomalous-backslash-in-string:16::"Anomalous backslash in string: '\o'. String constant might be missing an r prefix."
77
anomalous-backslash-in-string:18::"Anomalous backslash in string: '\8'. String constant might be missing an r prefix."
88
anomalous-backslash-in-string:18::"Anomalous backslash in string: '\9'. String constant might be missing an r prefix."
9-
anomalous-backslash-in-string:28::"Anomalous backslash in string: '\P'. String constant might be missing an r prefix."
9+
anomalous-backslash-in-string:30::"Anomalous backslash in string: '\P'. String constant might be missing an r prefix."

0 commit comments

Comments
 (0)