@@ -772,7 +772,8 @@ translate_into_utf8(const char* str, const char* enc) {
772
772
773
773
774
774
static char *
775
- translate_newlines (const char * s , int exec_input , struct tok_state * tok ) {
775
+ translate_newlines (const char * s , int exec_input , int preserve_crlf ,
776
+ struct tok_state * tok ) {
776
777
int skip_next_lf = 0 ;
777
778
size_t needed_length = strlen (s ) + 2 , final_length ;
778
779
char * buf , * current ;
@@ -792,7 +793,7 @@ translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
792
793
break ;
793
794
}
794
795
}
795
- if (c == '\r' ) {
796
+ if (! preserve_crlf && c == '\r' ) {
796
797
skip_next_lf = 1 ;
797
798
c = '\n' ;
798
799
}
@@ -822,14 +823,14 @@ translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
822
823
inside TOK. */
823
824
824
825
static char *
825
- decode_str (const char * input , int single , struct tok_state * tok )
826
+ decode_str (const char * input , int single , struct tok_state * tok , int preserve_crlf )
826
827
{
827
828
PyObject * utf8 = NULL ;
828
829
char * str ;
829
830
const char * s ;
830
831
const char * newl [2 ] = {NULL , NULL };
831
832
int lineno = 0 ;
832
- tok -> input = str = translate_newlines (input , single , tok );
833
+ tok -> input = str = translate_newlines (input , single , preserve_crlf , tok );
833
834
if (str == NULL )
834
835
return NULL ;
835
836
tok -> enc = NULL ;
@@ -881,14 +882,14 @@ decode_str(const char *input, int single, struct tok_state *tok)
881
882
/* Set up tokenizer for string */
882
883
883
884
struct tok_state *
884
- _PyTokenizer_FromString (const char * str , int exec_input )
885
+ _PyTokenizer_FromString (const char * str , int exec_input , int preserve_crlf )
885
886
{
886
887
struct tok_state * tok = tok_new ();
887
888
char * decoded ;
888
889
889
890
if (tok == NULL )
890
891
return NULL ;
891
- decoded = decode_str (str , exec_input , tok );
892
+ decoded = decode_str (str , exec_input , tok , preserve_crlf );
892
893
if (decoded == NULL ) {
893
894
_PyTokenizer_Free (tok );
894
895
return NULL ;
@@ -902,13 +903,13 @@ _PyTokenizer_FromString(const char *str, int exec_input)
902
903
/* Set up tokenizer for UTF-8 string */
903
904
904
905
struct tok_state *
905
- _PyTokenizer_FromUTF8 (const char * str , int exec_input )
906
+ _PyTokenizer_FromUTF8 (const char * str , int exec_input , int preserve_crlf )
906
907
{
907
908
struct tok_state * tok = tok_new ();
908
909
char * translated ;
909
910
if (tok == NULL )
910
911
return NULL ;
911
- tok -> input = translated = translate_newlines (str , exec_input , tok );
912
+ tok -> input = translated = translate_newlines (str , exec_input , preserve_crlf , tok );
912
913
if (translated == NULL ) {
913
914
_PyTokenizer_Free (tok );
914
915
return NULL ;
@@ -1050,7 +1051,7 @@ tok_underflow_interactive(struct tok_state *tok) {
1050
1051
}
1051
1052
char * newtok = PyOS_Readline (tok -> fp ? tok -> fp : stdin , stdout , tok -> prompt );
1052
1053
if (newtok != NULL ) {
1053
- char * translated = translate_newlines (newtok , 0 , tok );
1054
+ char * translated = translate_newlines (newtok , 0 , 0 , tok );
1054
1055
PyMem_Free (newtok );
1055
1056
if (translated == NULL ) {
1056
1057
return 0 ;
@@ -1594,6 +1595,9 @@ tok_decimal_tail(struct tok_state *tok)
1594
1595
static inline int
1595
1596
tok_continuation_line (struct tok_state * tok ) {
1596
1597
int c = tok_nextc (tok );
1598
+ if (c == '\r' ) {
1599
+ c = tok_nextc (tok );
1600
+ }
1597
1601
if (c != '\n' ) {
1598
1602
tok -> done = E_LINECONT ;
1599
1603
return -1 ;
@@ -1693,7 +1697,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
1693
1697
}
1694
1698
}
1695
1699
tok_backup (tok , c );
1696
- if (c == '#' || c == '\n' ) {
1700
+ if (c == '#' || c == '\n' || c == '\r' ) {
1697
1701
/* Lines with only whitespace and/or comments
1698
1702
shouldn't affect the indentation and are
1699
1703
not passed to the parser as NEWLINE tokens,
@@ -1822,7 +1826,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
1822
1826
const char * prefix , * type_start ;
1823
1827
int current_starting_col_offset ;
1824
1828
1825
- while (c != EOF && c != '\n' ) {
1829
+ while (c != EOF && c != '\n' && c != '\r' ) {
1826
1830
c = tok_nextc (tok );
1827
1831
}
1828
1832
@@ -2002,6 +2006,10 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
2002
2006
return MAKE_TOKEN (NAME );
2003
2007
}
2004
2008
2009
+ if (c == '\r' ) {
2010
+ c = tok_nextc (tok );
2011
+ }
2012
+
2005
2013
/* Newline */
2006
2014
if (c == '\n' ) {
2007
2015
tok -> atbol = 1 ;
@@ -2405,7 +2413,10 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
2405
2413
else {
2406
2414
end_quote_size = 0 ;
2407
2415
if (c == '\\' ) {
2408
- tok_nextc (tok ); /* skip escaped char */
2416
+ c = tok_nextc (tok ); /* skip escaped char */
2417
+ if (c == '\r' ) {
2418
+ c = tok_nextc (tok );
2419
+ }
2409
2420
}
2410
2421
}
2411
2422
}
@@ -2696,6 +2707,9 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
2696
2707
return MAKE_TOKEN (FSTRING_MIDDLE );
2697
2708
} else if (c == '\\' ) {
2698
2709
int peek = tok_nextc (tok );
2710
+ if (peek == '\r' ) {
2711
+ peek = tok_nextc (tok );
2712
+ }
2699
2713
// Special case when the backslash is right before a curly
2700
2714
// brace. We have to restore and return the control back
2701
2715
// to the loop for the next iteration.
0 commit comments