@@ -705,7 +705,7 @@ def process_tokens(self, tokens):
705
705
elif tok_type == tokenize .STRING :
706
706
# 'token' is the whole un-parsed token; we can look at the start
707
707
# of it to see whether it's a raw or unicode string etc.
708
- self .process_string_token (token , start [0 ])
708
+ self .process_string_token (token , start [0 ], start [ 1 ] )
709
709
# We figure the next token, ignoring comments & newlines:
710
710
j = i + 1
711
711
while j < len (tokens ) and tokens [j ].type in (
@@ -799,7 +799,7 @@ def check_for_concatenated_strings(self, elements, iterable_type):
799
799
"implicit-str-concat" , line = elt .lineno , args = (iterable_type ,)
800
800
)
801
801
802
- def process_string_token (self , token , start_row ):
802
+ def process_string_token (self , token , start_row , start_col ):
803
803
quote_char = None
804
804
index = None
805
805
for index , char in enumerate (token ):
@@ -811,21 +811,30 @@ def process_string_token(self, token, start_row):
811
811
812
812
prefix = token [:index ].lower () # markers like u, b, r.
813
813
after_prefix = token [index :]
814
- if after_prefix [:3 ] == after_prefix [- 3 :] == 3 * quote_char :
815
- string_body = after_prefix [3 :- 3 ]
816
- else :
817
- string_body = after_prefix [1 :- 1 ] # Chop off quotes
814
+ # Chop off quotes
815
+ quote_length = (
816
+ 3 if after_prefix [:3 ] == after_prefix [- 3 :] == 3 * quote_char else 1
817
+ )
818
+ string_body = after_prefix [quote_length :- quote_length ]
818
819
# No special checks on raw strings at the moment.
819
820
if "r" not in prefix :
820
- self .process_non_raw_string_token (prefix , string_body , start_row )
821
+ self .process_non_raw_string_token (
822
+ prefix ,
823
+ string_body ,
824
+ start_row ,
825
+ start_col + len (prefix ) + quote_length ,
826
+ )
821
827
822
- def process_non_raw_string_token (self , prefix , string_body , start_row ):
828
+ def process_non_raw_string_token (
829
+ self , prefix , string_body , start_row , string_start_col
830
+ ):
823
831
"""check for bad escapes in a non-raw string.
824
832
825
833
prefix: lowercase string of eg 'ur' string prefix markers.
826
834
string_body: the un-parsed body of the string, not including the quote
827
835
marks.
828
836
start_row: integer line number in the source.
837
+ string_start_col: integer col number of the string start in the source.
829
838
"""
830
839
# Walk through the string; if we see a backslash then escape the next
831
840
# character, and skip over it. If we see a non-escaped character,
@@ -844,6 +853,16 @@ def process_non_raw_string_token(self, prefix, string_body, start_row):
844
853
# of the string would be a SyntaxError.
845
854
next_char = string_body [index + 1 ]
846
855
match = string_body [index : index + 2 ]
856
+ # The column offset will vary depending on whether the string token
857
+ # is broken across lines. Calculate relative to the nearest line
858
+ # break or relative to the start of the token's line.
859
+ last_newline = string_body .rfind ("\n " , 0 , index )
860
+ if last_newline == - 1 :
861
+ line = start_row
862
+ col_offset = index + string_start_col
863
+ else :
864
+ line = start_row + string_body .count ("\n " , 0 , index )
865
+ col_offset = index - last_newline - 1
847
866
if next_char in self .UNICODE_ESCAPE_CHARACTERS :
848
867
if "u" in prefix :
849
868
pass
@@ -852,16 +871,16 @@ def process_non_raw_string_token(self, prefix, string_body, start_row):
852
871
else :
853
872
self .add_message (
854
873
"anomalous-unicode-escape-in-string" ,
855
- line = start_row ,
874
+ line = line ,
856
875
args = (match ,),
857
- col_offset = index ,
876
+ col_offset = col_offset ,
858
877
)
859
878
elif next_char not in self .ESCAPE_CHARACTERS :
860
879
self .add_message (
861
880
"anomalous-backslash-in-string" ,
862
- line = start_row ,
881
+ line = line ,
863
882
args = (match ,),
864
- col_offset = index ,
883
+ col_offset = col_offset ,
865
884
)
866
885
# Whether it was a valid escape or not, backslash followed by
867
886
# another character can always be consumed whole: the second
0 commit comments