From 20da58269741459dbee6cd3bee77c88e5408b34b Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Wed, 26 Jul 2023 17:07:10 -0700
Subject: [PATCH 01/14] update traceback.c

---
 Lib/lib2to3/Grammar3.11.4.final.0.pickle      | Bin 0 -> 15313 bytes
 .../PatternGrammar3.11.4.final.0.pickle       | Bin 0 -> 1225 bytes
 Python/traceback.c                            |  62 +++++++++++++-----
 3 files changed, 46 insertions(+), 16 deletions(-)
 create mode 100644 Lib/lib2to3/Grammar3.11.4.final.0.pickle
 create mode 100644 Lib/lib2to3/PatternGrammar3.11.4.final.0.pickle
diff --git a/Lib/lib2to3/Grammar3.11.4.final.0.pickle b/Lib/lib2to3/Grammar3.11.4.final.0.pickle
new file mode 100644
index 0000000000000000000000000000000000000000..c3acebc758875b5438cfebaab64116d80c0d1c9b
GIT binary patch
literal 15313
zcmc&*X>eS}amL^xKoGp|`{wcjK=8irbNG1Q7qkQxoE-uS2)iIf%aSG8y6m+qTPt6Z
zY|FAK*%l?+KV110SEZ6v@-L}M<yR`HN~MzgNL5n#5m%~GseE70%-gpB2vUa~g0J73
zo}QlWo}Qk0v#+14d-Kn?SINI;N~vOV{$B6RzMl5nVDHV$K#8ca-PxW@M>f|#SSUqR
zs@Raub#-K(>K`aY%9je6JnE`du`!*?rStjhtz0RpQN=Oofm=OU*42`7;MQPoCWlg;
zDvnJLWDCagsaM4&G|LPWI`V~H5R6g9=5+pEuCt?iFxT0Y=`KYLY$GT~(x{5H=|W#`
zDH_Xu2XBRbn?QJTAd|lBd5uHYojs@(Hq9*c_4apk_d$X2kWorm^e_Q@T%iEfqluxe
zufG&c0#nvO>U<xRn+yS2?i++LoIq0ql`CX(gBg#S3XWZw&c1<kA=6cgrj?8Ad^&jg
z+P;B&DVhP%x-vb!?Mzi1*Ol!o<THid^lb_>OBKg=^%cOXBc1QarF%1_XtpX&sKkNM
z9GKH19o1ZjmdSPYb!Bt6I?!cLDVm4QGk4NGVPNNz<f+a~f1v}$8q9<D0*IS=D%0r&
zS_p%?VP7N_{R6&YG2~{28|fvgSl^xQEKtLxAnfkxyXy&;fe>c*rRA`1Ji03&@~xh}
zn-IuZGlkl0w=b-O=2;jU{t;`w3Yu5qp!sUB@yPB0`o@|v!kJ=i8R0r=K}S95-kahb
ztq_=Mc#F58UpDKadL7Qo*FlL~U%{8!!LF~Tt0U&H9@RuR;tgO9g<WAIOb!PEKlm4f
z-Gt8i;g?>?&8k?}-*=ZIy@iwm*<6?oTTvcJXY;|YwxQR7Okr>!=i6=vLEdD$3w}cF
z06`vxz{GW0?u0kxG5Y11ybGe`d$OGwy7z9j?@jjv>+gYR`9gZYv)hZIcYOA-V(@0Z
zb0FJ~dAc876Bph1AAlP9!E6BzbP&y;Du&^ZJTVX)hJ9!bM|=eALTOzoItq57*l`O+
zFGa_asC>E`Y_%#*j;Rt-9*5@@y8C4yBWQ<b*UJfrRB*$8l6aW1hh{!SSqAQT6P;#H
z_xdwkh*K~Og*^i^-oc2)h_mS8j_=|eSmD`Go<|pVTo)H$%e#g#)0yldbiB)02b;hY
zmqO9iUWN=(i+)Pc6_lQW!?<~N6{PpF8F&*UC`H%6*cDy%^|<U<-B5$YW*4~IT^MKl
zM#wfbh}5PQ0ov3dESq|SV>1R(*fd1-$}}R}nz0D5rYUMvW*h>lX^xtd86S;PW<u1g
z%tS;=GYJvWOh#NZQxFf$R0KgYEt;at^k}LwGZ5a)OawJE3z5vsj%F$|2QkacMUXP{
z5R}aPXs$8~5PHnQXudLw5Mj*XXrVGoqD9IqjTS4jELx(>as&>uB3h<QDq610%4mf$
ztD=-LtD}|5tcg}Bvldg`v|xIh)@ZFVZBdId>!Ma=+M_mQ)<^4<*$}lWvoTt)%%*6A
zGMl4~%51@;Gh3t0%51|VGuxxB%It`?DYG-$uFS4zhcdgPoyzQqb}6$L)57eF_9(MI
z+N;cgXrD3%;qvBCbU>NIaBp)2o^6i8sm-zIh%y=;Y>vZcO$0ABC!*uZoQxu6PDLk_
zIUSu;<_x^foQ+N^a}Mrh&PQjJxe%RG=3;bSnM=_HWiCe-mAL{(F;}C@%3On2nCsD1
zWp2O)Oh<HGnJ1zf%A^O2^>BYoh0^s>%Kg_$tCvc;T2}$q5N58IbQO_lJb_BDk^-Lf
zYN3+KjIUf1SFRmi+2Sj8tF8t=vS`ya=%|WicGRkCr2-f}c&6)OrtKtj;;>Fjjc@LV
zQ@W1Sdc8pD$T3Dg(&GlHXcM@xviB+$L#LZaAuZ}flQF~f8Rtn7`m|Sa99f@bq1|S0
z2d6>kIaH_g7ExEI^wwY_F_x!I=~_{wK?ui6=S?)7UZU#&SLk}cl)#mGj2Jnkw@G=s
zZb01*LRzRnH+p?{O7$*1)>Z7*O}=7}RP5E`TtyX&Vj*D-C0qxNKPte}QD|XFOjE!J
zuULzHqQ!oJ2Lv7zcu3%3fky-$6?jab7I<7B<_okrK}c;XJhquw_FjP<AFwO@DA1Vn
z#ugfo-ULNx#FLbnOixik@(~I?2!Z3wDmG%fDI(0WFk+|0uRtz%lAj5FewIA|lhWtd
z3nWC@DL~UXhI)Ds^?WSqxk^V%x^7Bul>RReI$qR22zOB`FA2OX&|$8~^HqV@1fCIi
zU3_ac`zte}#IvP3p1VRFB)-8q(W_(Fyh-UNq-9#*O@UhkZWh=nuuEV@;8wzN=S~wn
zfmmn!vVOYTV^jK;2%Z)wbI5UE=<ysH0|yh^)ww|R!(70{C5)g@uyf}Eo?$lbT%Mf6
zh1fVL^mf|!hU&5~DIc<|$lDsseZzZ7O1FzMZuD;A0%pLr&@(aY&V}4av5qr5V1rXz
zPIQDU!XfSkbwb3VI0yhJWA=m2MTyW~6<gDsHy8|Ph8ErI2i)~q6{19|P<Z>8*gm&K
zO}BQS`HC<_l8&r<V)FK3UW6vW#Tp)7=|&)ocW>N8N%z$(Q;p<6=v+)C%ZFn@1z=Fd
zhuZ>UzYi7zU1H6e?3utq9AmaVGjhAKP2C8=<ed5<RU!uT#ZvVTSNF-dvP~2lkgoC+
zC?Won;z2vgAqLurWW7SZgHZHzOq_GLkxM}j*h*VMZwu&^V4kQ4mhKr^)=SGV0+$Lb
zNIf(h^u}>5;t+L5sO}1UO5i<#Umyfq*KHWpII70;N-Rwa_WIca_jGX<G29t9rJt5L
zBa0YWw$R1d=9FIu(Fs>Xr-h!#u2~&c3;ji!OU(C-*jc(Dr+EAl>xJ)KmdIGmX`<Au
zpj6;YhoG&KRxB?8&jypV`2Ha<{f-{?C29Ax46}HnQ_Q<lEKX92#58)WDft|`kj{qg
z#iXWE=&3QW+n|`(!(4fW7%pJ76oq%Ks8A>I?0G^e@_bl+!^XpJH2et7@SW)wf`yW^
zI;FoX#`y{%xk=~=qdPMew1tiJi(b$;9Kt=^G)BAfEKliYh3U{)5%!g?D^hVHT7hxg
zaFNc16*rYY6xNE?SfC9eP#h>_VStSw2i?!<9@gTQNK56`l%=KE-a-%6^1SGTb`R0=
zWzq5#!eOR+HJ0?N!{btIT)9ou_*$^58yfT|_#ogMPhK%NDZQL%T@1>}gSg3hveRD|
z*0Eg&gEGmH(sN@;UK4V$qw9w%g&<xI#vvBAixY;~lmaFDj2F&U@04uF)i!K`Nys(X
zt(<-U=EXvcys}KK^pMsNv_eE-C-kJacQ+F}_xUmRC%r}D@k%(sH)5*SWxQS+K5E~L
zNxmhFs{N>?u<N1nQtuoA#n=aBHPvrOlM-1t5<Jjfr8W$7coU?RobSivWl>Z5O>)uS
z7Wf^3-xc^hf!`PSmcX|KzC&2<)!U_xSc(vqV%RU)%Nx>mh{UdK3NwNtg!jml{()FP
z+y!i0cY!K)!Sd90<j#A%I6w5_a4?g)r?k{K;o`;985QPpiv5V)yXj9Apj3@__mqBD
z*hx8rk-@mHAImV;34BlBPXsm!beaU6e=0h^68dzJl7VWvC?Bi&N|<S~;!qeDw!EU{
z7&cT<si{!Zw{+2vXAhMU{l3EN`Pe>{BI@(r&gB^gyHhV$1M$kiI^}jnP6eT<WL8mM
zp>=qDzaLEf{;=V#h>ssUMBJ2?nhM^Dh25=b!CN6`>_f@vE3q1{?DR=FNf1@GNx(VD
zNQ5$&LT`@r&-^~hS%Mg$<ik#(VQUl?c#+FX%c<D3KNsG=2)tVYOL?0ir&A;B$=oiD
z+c&<RBv?Qa|5DQ8Pb|>hy`7{yoqH*9l9dmNsPG3AN)&bx_P%UDVw2tXqR|}~)!Tfm
z4~^D~)$UUOXOIM|5UkmjB8MGkr2y~AdzOd+3iJUE7^xt?ld6p`S!F$|$PacR=kZd3
zcQxO$TYLOTO;||S<pdBqaX-)#r4gF?Mw3v!uO|x@bRJ9AZF-7e(L%66;i-Z}y99?S
zazGj4hm)grP1bFCnvkQBs|H=XdPtP^l6^R2G#*rr#)HQijR&83G(?O))M$J-;%Geh
z1f%gle}1Fkg296jmp&?o48QDD#H9}(8keL7CPb^SCWZHQV7V^Sao{TpL!21x{pORs
z*_79xA#XM}%G#6zpk5Rt_?XiAX~o4~UbN`2;YACQGYWdiZ-;gxZZGsNeRWFzYJ{a$
z-jKBH*W8rw=1uNU^x~MsuO-gJVX1;@N07tYHB+qZ>Q3jwZ#do0AWmRdc|NZ3O0Mxj
zj%cqm4#5!b6^>?=9)}C3r5b0WJYM5lug0;J#%Tr*FEzx~cx&a`DBjkT#v6sk`zwzE
zH8$_uWovA5HMXLR{2W!<l^WngTIr>@mf^yVUjE<<+!*47TIrQq4Zg&00!(iZH7RBg
zF|Z*EHcvFg?^F5Fno`P9n&X$Pd(UvVRUUHSC$1L*Gn#BYu94)S2_C^1Ka{=pIkCde
zZEpuwBS|xgdias<J*bBvrh53b?qifN7%bsuyWf;c`2Fs;p%Z#Y()2@2j$irynWXzb
zwjb94TnBL-!gUze5nM-c9m9p=dVL%hj_P?(&%ws;gTF(+a5+kW9}52mB{&~T<CUGD
zs;bDHt_MlkVuG>@1xp_!*+gYmCCKQRo^+D3ZNBCru$ORM#&rc3PPz3pTsX7V*KuJ0
z$$YZ1`+W<}luu~jDas!8tvRPShd6&IJf%v`2QT4NWiLnzPBtntP1&ms0S-h=SGKDH
zF+<tG3dBrhpLPgLR5qEV><fY@PxsH@yR()3$g#)Bll>fJ|EK~nSJ^+VK+IG2Pb(1f
zmHkUWIEh9w>toEK1<L-lW1Dl5EL8R%90K8qqAgPPKOWJ~DDz{nvj3H6U56q2l#(t{
zwz1lI?nnVQIh_~~m*W~Tt(Gb~HNjydIY!ZBnX>bJYX)-$@(`2}i0SCdm0cke^iG;?
zg|a&x0xnBLO4*a12LnEWJlm~Q_L3v>vu~BMw;jT}$7=Xz1!9e|&pHGa4{~0sY{?<8
zf)LT7>|26J$@&5;yJ}VTT}KArAX%HT4?NjuR(%XlS_h9!a2m<FkHMu~*}r%Mmq)A%
z>y`bh3Kcgf`*)9E`51z2RQ4Yq!Q~NxZBq7MonWvuZNFLB|8xkf@<eP=_IHUshOQDL
zxb#+K>uNYDxJe+3ZOSfi2skGZ+m&s12y7pS*rDt;K|DNej4}&$D!Vt)`eE}qY<W0K
z!d542a~`$DNp5RKP<fZK$48NXi$sV8K?*J!zweC~W6h9F*KTFcJJYu~)9+FC`Y56g
z75n#KyuHd=ClW@E-mp*ECr1(K|I?a@J?MUA?<S;7#uk`19`DDP#s`#r`Sam)P}$cL
zoc>TVVTg|%QudvM6iLejcN%3jA6EALN3?znn;ud2H=mu$QDuMo*|{83_RkVr9yThE
zp_NwlZxftGvg~7UIj-zKCAfSBc_L;1J;4d_h70-$W&e9P_N21awX!Vu4a6yB#|gp(
zeZR7h!D(!394WTaj0k6x-IXAv#N6rn{?01<>ceZ#Df>5xTHofpvVT7udqLTM5iD7=
z<BQ7vn?v9QmxxQs{%@k)ryPOH$~M%AE%44vwpXwXbO;<W5OG!6IfD39E8;a}7bRLh
zY=pV`J%+zrS9YZn4$B~gyP@p5Bm$oo_M~rSdgSv$T~(|*7@azIxx^#D69BaU2<m(s
zLoktGJi!El83gSFjRdm@@b#?w%%4o~#jJd$@1NxpefNwH^xbFlu2{n_dZhuLdfXGf
zM9*}3B){aB>RS2c+x6o<CU^MitZ!fMz72O+D2K1U)2@I|x^q2!ooUa$248r4p8P`F
zJ=V!5+5TBR$9B*4?o(`6sKE!to<u$ZcF(o^#Mh(c6J9>grb!kOY$e!2Am*&*Qw;!i
zP<))i$98#C#TukvuyqrWhXpyEk9G1mj`T|jwi7v?NPMG8xI*A^ftLw&1JTC>aa|B&
ziD(nVQ9;ZhVwOCvWQ$b<tpqCwmJuu`SVxc|SWD1Cu%2KI!D;{;$PgYBxI|d0Vs$nL
zU2vels)+(O2%I1U^VoPQ!F+<b1k(r>5U@Fi2Jek1;*(^;MuE)&rwLps@U*~-0%r(p
z5I9%hbb)&b$rh(>U^QEiM?A9LWk;~$s-PybyW>JrBd|%}7J*9zjuSXl;5LEV1>PV;
zN41z-8FJhth|T1<u3VR;+v00yqE`!?BXEttwF1uyoGfs?z>NZ{38{-r4uo1V87uIV
zR4pN*ULF_oagRJU^Kpuh?G!jh;7oxF1uho2Okhf2i@?nSw+h@Xa6h50Bd8~sNid0^
z4FGS5geL`_5EuzOKnT&tbZ2u&N#JzCm*S*7C-8#6^8zmkRD^mFNv`oRQ*5}Mxp%j3
zpi7QVZC!m8emb&acx$$eyp6X_yiKspyiK$dc$;J=@iy5`;cbeY#@kdogSTmR7H`w-
z9NuQwdA!ZE3wWDl7x6aRF5zvCUB=s7yMniQb|r7~?P}f@*tNVZw5_}?vg>$TY<Kdu
z)^_rC#`f`c-9F3PZTkXmIr}<qPun+n`;vW^x3Ahi;qB)L)P=s=nVcxgV|kntSL1xR
z7Ux%WILE5TE6Es~_cq{nrbhgY#c@Cr&diqJe0M26OkRc$m6yxHTaDw`8l2=#!MEW}
zI7yw3-+acQ)lw|X+w}%;*oM|SaaOzsRQvEP`F{LLbO7t>L2x+)E{E|=_YrV8s@LMs
zumy*P>v3qf28V{LacDSCU&I-~C7dr`#%tsioJC*7x!N_HPxtH1`WZ;{8qS(u$ARLv
zAo-6V`MX%kevB8p_aMnna2EZmK_s%c95q>-Bt^|7r|6oBBpAIak*K5BFck$k9lEtx
zPDIn|63JkCqt8~+o63nvdUGOWNpJC)9FlLLw+>BK(c3E$H1v*SGLhbuOm)$_hh%r@
zz2$5<eKMrb=u?T*Dt$W9p`^m-v&oD#eJ+uDq|cW#+w=vWxutJ-ByxUqM@2rJegdWl
zS#mm^%v;kp%c)qpGm$c*yDBnWbfzMoO?MB?;nTM&^U}15yFrxF*<{X<w!@O$bWb96
zNcSc(Wpv+&8A1BVWJ01INTvzue8{%ag*ahN4_2fO>N`yGOJ?-xyA_F8`l-r1EPXFH
zfb)<qR3v=qFAmEl)6Wh|Y}3yT&ui1q56|+`FI1+w=`Y7+$wSg#smxu{FIHqQ>X#}L
z*YwNfOf~%q<_Pi{-Gq5{Xj+v1n$Nt{#d0#0{yN;LoYJYkQISxlzd1BrQom7|p{7ff
zDWLkzL^7oQb}|!De}|=zeyG2j%rVs8tIQ+R-;b^0hUBee&Yga{BF|30Q%;G~KS(CJ
z=^vJp1ocltHkrO(PBPQ)$H`~<gP}=r`e#FvEA=npL^AzhNFCD;${A()qh!{Y{@9I(
ZB!1~%Cey<7uLlt=kgJD$wJJ5(@_*S)Y#0Cl

literal 0
HcmV?d00001

diff --git a/Lib/lib2to3/PatternGrammar3.11.4.final.0.pickle b/Lib/lib2to3/PatternGrammar3.11.4.final.0.pickle
new file mode 100644
index 0000000000000000000000000000000000000000..67e7d83e9194d5e1ddc268c53ef5873b98871f01
GIT binary patch
literal 1225
zcmaJ>J#QL85WT%`Fkp-UTeecAxgtv8I!&acnw%9yNs)r$2%8)-hKRdM<N_&8Ty44e
zJNa#Sv%6qO0me@|UvFlI+4&Xy`5SuXv(cIARuA{n)#7H^+)w3Nvnt7p`b!BR_k=om
zy{P4SS=93%Qs+Xb&Yq&uxi3`mM%KlA0U?0@t^8Kha`tICuXP>@m41-lB{ZOmgbJ-b
z7LDrKlsC2j&WwCzfE;BA+A;!N8H1us5Lu>qEXr1+;;IH)r6)QwUlJr9burQ8lWz~6
zIY{e4Dch;V$f|vrJfpRq)OJJ}osfr+17pV-k<Ml5Qm{?O2S*a^NCI}Gn7t^W&}2t+
zWP&p4u?jh0Mko#t`4*?<VA(S2Gaa2795A8vbvHhDC1f--Y$Nt#k8_uXjK<ZLQDAt&
zV}+r@*i|^D1i5XU7nJT*o6%Nw*Rn+;@g@7=8vo~NA$r#YZ|8JK4ksz43TI!7YC8|T
zjpd?{d&kw`KOxaP@-&vCBWBFdvm<6q*Yk}g{IC_cFlH|xAp=X8w^tmB_H*-TU=v~j
z-|aI#)q=TP6jO;uFhQ87qPO|`kDoQDd~wU2V@McU3_=CVRSh#4YXBW8V2lk4qaLv9
zkYU88z$jCe`wTsXD+ZsT!*I<o2H*;OYy+f9@8rYJ)q2KVEq_x~skmO<$))KsM$bn_
z13VG#F$NaW0B>N3-i)Y=2Xl%>^f3u%bOno#@#?^O4eJ<xZMtk!$L$f*1+zvD7X_|-
iuo@_JyMS7UJBHd%3x#@n>w_*D72)x(u~$6Ny!;2V#C%Zz

literal 0
HcmV?d00001

diff --git a/Python/traceback.c b/Python/traceback.c
index a75b7833af4e05..a75812d2219245 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -413,10 +413,9 @@ _Py_WriteIndentedMargin(int indent, const char *margin, PyObject *f)
     return 0;
 }
 
+
 static int
-display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int indent,
-                                int margin_indent, const char *margin,
-                                int *truncation, PyObject **line)
+get_source_lines(PyObject *filename, int lineno, int end_lineno, PyObject **lines)
 {
     int fd;
     int i;
@@ -428,13 +427,14 @@ display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int
     PyObject *lineobj = NULL;
     PyObject *res;
     char buf[MAXPATHLEN+1];
-    int kind;
-    const void *data;
 
     /* open the file */
     if (filename == NULL)
         return 0;
 
+    if (lines == NULL)
+        return 0;
+
     /* Do not attempt to open things like <string> or <stdin> */
     assert(PyUnicode_Check(filename));
     if (PyUnicode_READ_CHAR(filename, 0) == '<') {
@@ -496,15 +496,30 @@ display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int
     }
     Py_DECREF(binary);
 
-    /* get the line number lineno */
-    for (i = 0; i < lineno; i++) {
-        Py_XDECREF(lineobj);
+    /* get lines between lineno and end_lineno, inclusive */
+    PyObject *lines_accum = PyUnicode_FromString("");
+    if (!lines_accum) {
+        goto cleanup_fob;
+    }
+    for (i = 1; i <= end_lineno; i++) {
         lineobj = PyFile_GetLine(fob, -1);
-        if (!lineobj) {
-            PyErr_Clear();
-            break;
+        if (i >= lineno) {
+            if (!lineobj || !PyUnicode_Check(lineobj)) {
+                Py_XSETREF(lineobj, PyUnicode_FromString("\n"));
+                if (!lineobj) {
+                    goto cleanup_fob;
+                }
+            }
+            Py_SETREF(lines_accum, PyUnicode_Concat(lines_accum, lineobj));
+            if (!lines_accum) {
+                goto cleanup_fob;
+            }
         }
     }
+    *lines = Py_NewRef(lines_accum);
+cleanup_fob:
+    Py_XDECREF(lines_accum);
+    PyErr_Clear();
     res = PyObject_CallMethodNoArgs(fob, &_Py_ID(close));
     if (res) {
         Py_DECREF(res);
@@ -513,9 +528,25 @@ display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int
         PyErr_Clear();
     }
     Py_DECREF(fob);
-    if (!lineobj || !PyUnicode_Check(lineobj)) {
+
+    return 0;
+}
+
+static int
+display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int indent,
+                                int margin_indent, const char *margin,
+                                int *truncation, PyObject **line)
+{
+    PyObject *lineobj = NULL;
+    int i;
+    int result;
+    int kind;
+    const void *data;
+
+    result = get_source_lines(filename, lineno, lineno, &lineobj);
+    if (result || lineobj == NULL) {
         Py_XDECREF(lineobj);
-        return -1;
+        return result;
     }
 
     if (line) {
@@ -562,10 +593,10 @@ display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int
         goto error;
     }
 
-    Py_DECREF(lineobj);
+    Py_XDECREF(lineobj);
     return 0;
 error:
-    Py_DECREF(lineobj);
+    Py_XDECREF(lineobj);
     return -1;
 }
 
@@ -1356,4 +1387,3 @@ _Py_DumpTracebackThreads(int fd, PyInterpreterState *interp,
 
     return NULL;
 }
-

From 2e63abc96106b7e0eaa43ce60afa1184a6ce551f Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Fri, 28 Jul 2023 18:02:15 -0700
Subject: [PATCH 02/14] update tests and traceback.py

---
 Lib/test/test_traceback.py |  90 +++++++++---
 Lib/traceback.py           | 285 ++++++++++++++++++++++++++++---------
 2 files changed, 288 insertions(+), 87 deletions(-)

diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index aa8405bd25d120..54092822eb9507 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -427,6 +427,7 @@ def f():
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_f+1}, in f\n'
             '    if True: raise ValueError("basic caret tests")\n'
             '             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n'
@@ -445,6 +446,7 @@ def f_with_unicode():
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_f+1}, in f_with_unicode\n'
             '    if True: raise ValueError("Ĥellö Wörld")\n'
             '             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n'
@@ -462,6 +464,7 @@ def foo(a: THIS_DOES_NOT_EXIST ) -> int:
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_f+1}, in f_with_type\n'
             '    def foo(a: THIS_DOES_NOT_EXIST ) -> int:\n'
             '               ^^^^^^^^^^^^^^^^^^^\n'
@@ -482,9 +485,14 @@ def f_with_multiline():
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_f+1}, in f_with_multiline\n'
             '    if True: raise ValueError(\n'
-            '             ^^^^^^^^^^^^^^^^^'
+            '             ^^^^^^^^^^^^^^^^^\n'
+            '        "error over multiple lines"\n'
+            '        ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n'
+            '    )\n'
+            '    ^'
         )
         result_lines = self.get_exception(f_with_multiline)
         self.assertEqual(result_lines, expected_f.splitlines())
@@ -513,13 +521,14 @@ def f_with_multiline():
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_f+2}, in f_with_multiline\n'
             '    return compile(code, "?", "exec")\n'
-            '           ^^^^^^^^^^^^^^^^^^^^^^^^^^\n'
+            '           ~~~~~~~^^^^^^^^^^^^^^^^^^^\n'
             '  File "?", line 7\n'
             '    foo(a, z\n'
             '           ^'
-            )
+        )
 
         result_lines = self.get_exception(f_with_multiline)
         self.assertEqual(result_lines, expected_f.splitlines())
@@ -538,9 +547,12 @@ def f_with_multiline():
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_f+2}, in f_with_multiline\n'
             '    2 + 1 /\n'
-            '        ^^^'
+            '        ~~^\n'
+            '    0\n'
+            '    ~'
         )
         result_lines = self.get_exception(f_with_multiline)
         self.assertEqual(result_lines, expected_f.splitlines())
@@ -555,6 +567,7 @@ def f_with_binary_operator():
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n'
             '    return 10 + divisor / 0 + 30\n'
             '                ~~~~~~~~^~~\n'
@@ -572,6 +585,7 @@ def f_with_binary_operator():
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n'
             '    return 10 + áóí / 0 + 30\n'
             '                ~~~~^~~\n'
@@ -589,6 +603,7 @@ def f_with_binary_operator():
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n'
             '    return 10 + divisor // 0 + 30\n'
             '                ~~~~~~~~^^~~\n'
@@ -624,6 +639,7 @@ def f_with_subscript():
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_f+2}, in f_with_subscript\n'
             "    return some_dict['x']['y']['z']\n"
             '           ~~~~~~~~~~~~~~~~~~~^^^^^\n'
@@ -641,6 +657,7 @@ def f_with_subscript():
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_f+2}, in f_with_subscript\n'
             "    return some_dict['ó']['á']['í']['beta']\n"
             '           ~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^\n'
@@ -682,6 +699,7 @@ def test_traceback_specialization_with_syntax_error(self):
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{TESTFN}", line {lineno_f}, in <module>\n'
             "    1 $ 0 / 1 / 2\n"
             '    ^^^^^\n'
@@ -704,6 +722,7 @@ def test_traceback_very_long_line(self):
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{TESTFN}", line {lineno_f}, in <module>\n'
             f'    {source}\n'
             f'    {" "*len("if True: ") + "^"*256}\n'
@@ -721,6 +740,7 @@ def f_with_subscript():
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_f+2}, in f_with_subscript\n'
             "    some_dict['x']['y']['z']\n"
             '    ~~~~~~~~~~~~~~~~~~~^^^^^\n'
@@ -740,6 +760,7 @@ def exc():
              f'  + Exception Group Traceback (most recent call last):\n'
              f'  |   File "{__file__}", line {self.callable_line}, in get_exception\n'
              f'  |     callable()\n'
+             f'  |     ~~~~~~~~^^\n'
              f'  |   File "{__file__}", line {exc.__code__.co_firstlineno + 1}, in exc\n'
              f'  |     if True: raise ExceptionGroup("eg", [ValueError(1), TypeError(2)])\n'
              f'  |              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n'
@@ -805,6 +826,7 @@ def g(): pass
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_applydescs + 1}, in applydecs\n'
             '    @dec_error\n'
             '     ^^^^^^^^^\n'
@@ -823,6 +845,7 @@ class A: pass
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_applydescs_class + 1}, in applydecs_class\n'
             '    @dec_error\n'
             '     ^^^^^^^^^\n'
@@ -841,6 +864,7 @@ def f():
             "Traceback (most recent call last):",
             f"  File \"{__file__}\", line {self.callable_line}, in get_exception",
             "    callable()",
+            "    ~~~~~~~~^^",
             f"  File \"{__file__}\", line {f.__code__.co_firstlineno + 2}, in f",
             "    .method",
             "     ^^^^^^",
@@ -857,6 +881,7 @@ def f():
             "Traceback (most recent call last):",
             f"  File \"{__file__}\", line {self.callable_line}, in get_exception",
             "    callable()",
+            "    ~~~~~~~~^^",
             f"  File \"{__file__}\", line {f.__code__.co_firstlineno + 2}, in f",
             "    method",
         ]
@@ -872,6 +897,7 @@ def f():
             "Traceback (most recent call last):",
             f"  File \"{__file__}\", line {self.callable_line}, in get_exception",
             "    callable()",
+            "    ~~~~~~~~^^",
             f"  File \"{__file__}\", line {f.__code__.co_firstlineno + 2}, in f",
             "    . method",
             "      ^^^^^^",
@@ -887,6 +913,7 @@ def f():
             "Traceback (most recent call last):",
             f"  File \"{__file__}\", line {self.callable_line}, in get_exception",
             "    callable()",
+            "    ~~~~~~~~^^",
             f"  File \"{__file__}\", line {f.__code__.co_firstlineno + 1}, in f",
             "    ｗｉｄｔｈ",
         ]
@@ -903,6 +930,7 @@ def f():
             "Traceback (most recent call last):",
             f"  File \"{__file__}\", line {self.callable_line}, in get_exception",
             "    callable()",
+            "    ~~~~~~~~^^",
             f"  File \"{__file__}\", line {f.__code__.co_firstlineno + 2}, in f",
             "    raise ValueError(ｗｉｄｔｈ)",
         ]
@@ -921,9 +949,12 @@ def f():
             "Traceback (most recent call last):",
             f"  File \"{__file__}\", line {self.callable_line}, in get_exception",
             "    callable()",
+            "    ~~~~~~~~^^",
             f"  File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f",
             "    print(1, ｗｗｗ(",
-            "             ^^^^",
+            "             ~~~^",
+            "            ｔｈ))",
+            "            ^^^"
         ]
         self.assertEqual(actual, expected)
 
@@ -997,6 +1028,9 @@ def check_traceback_format(self, cleanup_func=None):
             raise Error("unable to create test traceback string")
 
         # Make sure that Python and the traceback module format the same thing
+        print(traceback_fmt)
+        print(python_fmt)
+        breakpoint()
         self.assertEqual(traceback_fmt, python_fmt)
         # Now verify the _tb func output
         self.assertEqual(tbstderr.getvalue(), tbfile.getvalue())
@@ -1072,12 +1106,16 @@ def f():
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {lineno_f+5}, in _check_recursive_traceback_display\n'
             '    f()\n'
+            '    ~^^\n'
             f'  File "{__file__}", line {lineno_f+1}, in f\n'
             '    f()\n'
+            '    ~^^\n'
             f'  File "{__file__}", line {lineno_f+1}, in f\n'
             '    f()\n'
+            '    ~^^\n'
             f'  File "{__file__}", line {lineno_f+1}, in f\n'
             '    f()\n'
+            '    ~^^\n'
             # XXX: The following line changes depending on whether the tests
             # are run through the interactive interpreter or with -m
             # It also varies depending on the platform (stack size)
@@ -1118,13 +1156,13 @@ def g(count=10):
         result_g = (
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
-            '           ^^^^^^^^^^\n'
+            '           ~^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
-            '           ^^^^^^^^^^\n'
+            '           ~^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
-            '           ^^^^^^^^^^\n'
+            '           ~^^^^^^^^^\n'
             '  [Previous line repeated 7 more times]\n'
             f'  File "{__file__}", line {lineno_g+3}, in g\n'
             '    raise ValueError\n'
@@ -1134,6 +1172,7 @@ def g(count=10):
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {lineno_g+7}, in _check_recursive_traceback_display\n'
             '    g()\n'
+            '    ~^^\n'
         )
         expected = (tb_line + result_g).splitlines()
         actual = stderr_g.getvalue().splitlines()
@@ -1158,18 +1197,20 @@ def h(count=10):
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {lineno_h+7}, in _check_recursive_traceback_display\n'
             '    h()\n'
+            '    ~^^\n'
             f'  File "{__file__}", line {lineno_h+2}, in h\n'
             '    return h(count-1)\n'
-            '           ^^^^^^^^^^\n'
+            '           ~^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_h+2}, in h\n'
             '    return h(count-1)\n'
-            '           ^^^^^^^^^^\n'
+            '           ~^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_h+2}, in h\n'
             '    return h(count-1)\n'
-            '           ^^^^^^^^^^\n'
+            '           ~^^^^^^^^^\n'
             '  [Previous line repeated 7 more times]\n'
             f'  File "{__file__}", line {lineno_h+3}, in h\n'
             '    g()\n'
+            '    ~^^\n'
         )
         expected = (result_h + result_g).splitlines()
         actual = stderr_h.getvalue().splitlines()
@@ -1186,21 +1227,22 @@ def h(count=10):
         result_g = (
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
-            '           ^^^^^^^^^^\n'
+            '           ~^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
-            '           ^^^^^^^^^^\n'
+            '           ~^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
-            '           ^^^^^^^^^^\n'
+            '           ~^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_g+3}, in g\n'
             '    raise ValueError\n'
             'ValueError\n'
         )
         tb_line = (
             'Traceback (most recent call last):\n'
-            f'  File "{__file__}", line {lineno_g+77}, in _check_recursive_traceback_display\n'
+            f'  File "{__file__}", line {lineno_g+80}, in _check_recursive_traceback_display\n'
             '    g(traceback._RECURSIVE_CUTOFF)\n'
+            '    ~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n'
         )
         expected = (tb_line + result_g).splitlines()
         actual = stderr_g.getvalue().splitlines()
@@ -1217,13 +1259,13 @@ def h(count=10):
         result_g = (
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
-            '           ^^^^^^^^^^\n'
+            '           ~^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
-            '           ^^^^^^^^^^\n'
+            '           ~^^^^^^^^^\n'
             f'  File "{__file__}", line {lineno_g+2}, in g\n'
             '    return g(count-1)\n'
-            '           ^^^^^^^^^^\n'
+            '           ~^^^^^^^^^\n'
             '  [Previous line repeated 1 more time]\n'
             f'  File "{__file__}", line {lineno_g+3}, in g\n'
             '    raise ValueError\n'
@@ -1231,8 +1273,9 @@ def h(count=10):
         )
         tb_line = (
             'Traceback (most recent call last):\n'
-            f'  File "{__file__}", line {lineno_g+108}, in _check_recursive_traceback_display\n'
+            f'  File "{__file__}", line {lineno_g+112}, in _check_recursive_traceback_display\n'
             '    g(traceback._RECURSIVE_CUTOFF + 1)\n'
+            '    ~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n'
         )
         expected = (tb_line + result_g).splitlines()
         actual = stderr_g.getvalue().splitlines()
@@ -1698,6 +1741,7 @@ def exc():
              f'  + Exception Group Traceback (most recent call last):\n'
              f'  |   File "{__file__}", line {self.callable_line}, in get_exception\n'
              f'  |     exception_or_callable()\n'
+             f'  |     ~~~~~~~~~~~~~~~~~~~~~^^\n'
              f'  |   File "{__file__}", line {exc.__code__.co_firstlineno + 1}, in exc\n'
              f'  |     raise ExceptionGroup("eg", [ValueError(1), TypeError(2)])\n'
              f'  | ExceptionGroup: eg (2 sub-exceptions)\n'
@@ -1733,6 +1777,7 @@ def exc():
                     f'  + Exception Group Traceback (most recent call last):\n'
                     f'  |   File "{__file__}", line {self.callable_line}, in get_exception\n'
                     f'  |     exception_or_callable()\n'
+                    f'  |     ~~~~~~~~~~~~~~~~~~~~~^^\n'
                     f'  |   File "{__file__}", line {exc.__code__.co_firstlineno + 5}, in exc\n'
                     f'  |     raise EG("eg2", [ValueError(3), TypeError(4)]) from e\n'
                     f'  | ExceptionGroup: eg2 (2 sub-exceptions)\n'
@@ -1784,6 +1829,7 @@ def exc():
              f'Traceback (most recent call last):\n'
              f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
              f'    exception_or_callable()\n'
+             f'    ~~~~~~~~~~~~~~~~~~~~~^^\n'
              f'  File "{__file__}", line {exc.__code__.co_firstlineno + 8}, in exc\n'
              f'    raise ImportError(5)\n'
              f'ImportError: 5\n')
@@ -1830,6 +1876,7 @@ def exc():
                     f'  + Exception Group Traceback (most recent call last):\n'
                     f'  |   File "{__file__}", line {self.callable_line}, in get_exception\n'
                     f'  |     exception_or_callable()\n'
+                    f'  |     ~~~~~~~~~~~~~~~~~~~~~^^\n'
                     f'  |   File "{__file__}", line {exc.__code__.co_firstlineno + 11}, in exc\n'
                     f'  |     raise EG("top", [VE(5)])\n'
                     f'  | ExceptionGroup: top (1 sub-exception)\n'
@@ -1989,6 +2036,7 @@ def exc():
         expected = (f'  + Exception Group Traceback (most recent call last):\n'
                     f'  |   File "{__file__}", line {self.callable_line}, in get_exception\n'
                     f'  |     exception_or_callable()\n'
+                    f'  |     ~~~~~~~~~~~~~~~~~~~~~^^\n'
                     f'  |   File "{__file__}", line {exc.__code__.co_firstlineno + 9}, in exc\n'
                     f'  |     raise ExceptionGroup("nested", excs)\n'
                     f'  | ExceptionGroup: nested (2 sub-exceptions)\n'
@@ -2040,6 +2088,7 @@ def exc():
         expected = (f'  + Exception Group Traceback (most recent call last):\n'
                     f'  |   File "{__file__}", line {self.callable_line}, in get_exception\n'
                     f'  |     exception_or_callable()\n'
+                    f'  |     ~~~~~~~~~~~~~~~~~~~~~^^\n'
                     f'  |   File "{__file__}", line {exc.__code__.co_firstlineno + 10}, in exc\n'
                     f'  |     raise ExceptionGroup("nested", excs)\n'
                     f'  | ExceptionGroup: nested (2 sub-exceptions)\n'
@@ -2864,6 +2913,7 @@ def test_exception_group_format(self):
                     f'      | Traceback (most recent call last):',
                     f'      |   File "{__file__}", line {lno_g+9}, in _get_exception_group',
                     f'      |     f()',
+                    f'      |     ~^^',
                     f'      |   File "{__file__}", line {lno_f+1}, in f',
                     f'      |     1/0',
                     f'      |     ~^~',
@@ -2872,6 +2922,7 @@ def test_exception_group_format(self):
                     f'      | Traceback (most recent call last):',
                     f'      |   File "{__file__}", line {lno_g+13}, in _get_exception_group',
                     f'      |     g(42)',
+                    f'      |     ~^^^^',
                     f'      |   File "{__file__}", line {lno_g+1}, in g',
                     f'      |     raise ValueError(v)',
                     f'      | ValueError: 42',
@@ -2880,6 +2931,7 @@ def test_exception_group_format(self):
                     f'    | Traceback (most recent call last):',
                     f'    |   File "{__file__}", line {lno_g+20}, in _get_exception_group',
                     f'    |     g(24)',
+                    f'    |     ~^^^^',
                     f'    |   File "{__file__}", line {lno_g+1}, in g',
                     f'    |     raise ValueError(v)',
                     f'    | ValueError: 24',
diff --git a/Lib/traceback.py b/Lib/traceback.py
index 67941ff45988c2..edd766d4bb01d9 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -263,7 +263,7 @@ class FrameSummary:
     """
 
     __slots__ = ('filename', 'lineno', 'end_lineno', 'colno', 'end_colno',
-                 'name', '_line', 'locals')
+                 'name', '_line', '_line_dedented', 'locals')
 
     def __init__(self, filename, lineno, name, *, lookup_line=True,
             locals=None, line=None,
@@ -281,6 +281,7 @@ def __init__(self, filename, lineno, name, *, lookup_line=True,
         self.lineno = lineno
         self.name = name
         self._line = line
+        self._line_dedented = None
         if lookup_line:
             self.line
         self.locals = {k: _safe_string(v, 'local', func=repr)
@@ -323,9 +324,15 @@ def line(self):
         if self._line is None:
             if self.lineno is None:
                 return None
-            self._line = linecache.getline(self.filename, self.lineno)
-        return self._line.strip()
-
+            end_lineno = self.lineno if self.end_lineno is None else self.end_lineno
+            self._line = ""
+            for lineno in range(self.lineno, end_lineno + 1):
+                # treat errors and empty lines as the same
+                self._line += linecache.getline(self.filename, lineno).rstrip() + "\n"
+        if self._line_dedented is None:
+            if self._line is not None:
+                self._line_dedented = textwrap.dedent(self._line).rstrip()
+        return self._line_dedented
 
 def walk_stack(f):
     """Walk a stack yielding the frame and line number for each frame.
@@ -470,44 +477,105 @@ def format_frame_summary(self, frame_summary):
         row.append('  File "{}", line {}, in {}\n'.format(
             frame_summary.filename, frame_summary.lineno, frame_summary.name))
         if frame_summary.line:
-            stripped_line = frame_summary.line.strip()
-            row.append('    {}\n'.format(stripped_line))
-
-            orig_line_len = len(frame_summary._original_line)
-            frame_line_len = len(frame_summary.line.lstrip())
-            stripped_characters = orig_line_len - frame_line_len
             if (
-                frame_summary.colno is not None
-                and frame_summary.end_colno is not None
+                frame_summary.end_lineno is None or
+                frame_summary.colno is None or
+                frame_summary.end_colno is None
             ):
+                row.append(textwrap.indent(frame_summary.line, '    ') + "\n")
+            else:
+                all_lines_original = frame_summary._original_line.splitlines()
+                # character index of the start of the instruction
                 start_offset = _byte_offset_to_character_offset(
-                    frame_summary._original_line, frame_summary.colno) + 1
+                    all_lines_original[0], frame_summary.colno
+                )
+                # character index of the end of the instruction
                 end_offset = _byte_offset_to_character_offset(
-                    frame_summary._original_line, frame_summary.end_colno) + 1
-
-                anchors = None
+                    all_lines_original[-1], frame_summary.end_colno
+                )
+
+                all_lines = frame_summary.line.splitlines()
+                # adjust start/end offset based on dedent
+                dedent_characters = len(all_lines_original[0]) - len(all_lines[0])
+                start_offset -= dedent_characters
+                end_offset -= dedent_characters
+                start_offset = max(0, start_offset)
+                end_offset = max(0, end_offset)
+
+                # expression corresponding to the instruction so we can get anchors
+                segment = ""
+                # underline markers to be printed - start with `~` marker and replace with `^` later
+                markers = []
+
+                # Compute segment and initial markers
                 if frame_summary.lineno == frame_summary.end_lineno:
-                    with suppress(Exception):
-                        anchors = _extract_caret_anchors_from_line_segment(
-                            frame_summary._original_line[start_offset - 1:end_offset - 1]
-                        )
+                    segment = all_lines[0][start_offset:end_offset]
+                    markers.append(" " * start_offset + "~" * (end_offset - start_offset))
                 else:
-                    end_offset = stripped_characters + len(stripped_line)
-
-                # show indicators if primary char doesn't span the frame line
-                if end_offset - start_offset < len(stripped_line) or (
-                        anchors and anchors.right_start_offset - anchors.left_end_offset > 0):
-                    row.append('    ')
-                    row.append(' ' * (start_offset - stripped_characters))
-
-                    if anchors:
-                        row.append(anchors.primary_char * (anchors.left_end_offset))
-                        row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset))
-                        row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset))
+                    segment = all_lines[0][start_offset:] + "\n"
+                    markers.append(" " * start_offset + "~" * (len(all_lines[0]) - start_offset))
+                    for lineno in range(1, len(all_lines) - 1):
+                        line = all_lines[lineno]
+                        segment += line + "\n"
+                        # don't underline leading spaces
+                        num_spaces = len(line) - len(line.lstrip())
+                        markers.append(" " * num_spaces + "~" * (len(line) - num_spaces))
+                    segment += all_lines[-1][:end_offset]
+                    num_spaces = len(all_lines[-1]) - len(all_lines[-1].lstrip())
+                    markers.append(" " * num_spaces + "~" * (end_offset - num_spaces))
+
+                anchors: Optional[_Anchors] = None
+                try:
+                    anchors = _extract_caret_anchors_from_line_segment(segment)
+                except AssertionError:
+                    pass
+
+                if anchors is None:
+                    if len(all_lines[0][:start_offset].lstrip()) == 0 and len(all_lines[-1][end_offset:].rstrip()) == 0:
+                        # do not use markers if there are no anchors and the primary char spans all lines
+                        markers = None
                     else:
-                        row.append('^' * (end_offset - start_offset))
-
-                    row.append('\n')
+                        # replace `~` markers with `^` where necessary
+                        markers = [marker.replace("~", "^") for marker in markers]
+                else:
+                    # make markers mutable
+                    markers = [list(marker) for marker in markers]
+
+                    # anchor positions do not take start_offset into account
+                    anchors_left_end_offset = anchors.left_end_offset
+                    anchors_right_start_offset = anchors.right_start_offset
+                    if anchors.left_end_lineno == 0:
+                        anchors_left_end_offset += start_offset
+                    if anchors.right_start_lineno == 0:
+                        anchors_right_start_offset += start_offset
+
+                    # Turn `~` markers between anchors to primary/secondary characters (default, `~`, `^`)
+                    for line in range(len(markers)):
+                        for col in range(len(markers[line])):
+                            use_secondary = True
+                            if line < anchors.left_end_lineno:
+                                use_secondary = False
+                            elif line == anchors.left_end_lineno and col < anchors_left_end_offset:
+                                use_secondary = False
+                            elif (
+                                line == anchors.right_start_lineno
+                                and col >= anchors_right_start_offset
+                            ):
+                                use_secondary = False
+                            elif line > anchors.right_start_lineno:
+                                use_secondary = False
+                            if markers[line][col] == "~":
+                                markers[line][col] = anchors.secondary_char if use_secondary else anchors.primary_char
+
+                    # make markers into strings again
+                    markers = ["".join(marker) for marker in markers]
+
+                result = ""
+                for i in range(len(all_lines)):
+                    result += all_lines[i] + "\n"
+                    if markers is not None:
+                        result += markers[i] + "\n"
+                row.append(textwrap.indent(textwrap.dedent(result), '    '))
 
         if frame_summary.locals:
             for name, value in sorted(frame_summary.locals.items()):
@@ -571,7 +639,9 @@ def _byte_offset_to_character_offset(str, offset):
 _Anchors = collections.namedtuple(
     "_Anchors",
     [
+        "left_end_lineno",
         "left_end_offset",
+        "right_start_lineno",
         "right_start_offset",
         "primary_char",
         "secondary_char",
@@ -580,49 +650,128 @@ def _byte_offset_to_character_offset(str, offset):
 )
 
 def _extract_caret_anchors_from_line_segment(segment):
+    """
+    Given source code `segment` corresponding to a FrameSummary, determine:
+        - for binary ops, the location of the binary op
+        - for indexing and function calls, the location of the brackets.
+    `segment` is expected to be a valid Python expression.
+    """
     import ast
 
     try:
-        tree = ast.parse(segment)
+        # Without brackets, `segment` is parsed as a statement.
+        # We expect an expression, so wrap `segment` in
+        # brackets to handle multi-line expressions.
+        tree = ast.parse("(\n" + segment + "\n)")
     except SyntaxError:
         return None
 
     if len(tree.body) != 1:
         return None
 
-    normalize = lambda offset: _byte_offset_to_character_offset(segment, offset)
+    lines = segment.split("\n")
+
+    # get character index given byte offset
+    def normalize(lineno, offset):
+        return _byte_offset_to_character_offset(lines[lineno], offset)
+
+    # Gets the next valid character index in `lines`, if
+    # the current location is not valid. Handles empty lines.
+    def next_valid_char(lineno, col):
+        while lineno < len(lines) and col >= len(lines[lineno]):
+            col = 0
+            lineno += 1
+        assert lineno < len(lines) and col < len(lines[lineno])
+        return lineno, col
+
+    # Get the next valid character index in `lines`.
+    def increment(lineno, col):
+        col += 1
+        lineno, col = next_valid_char(lineno, col)
+        assert lineno < len(lines) and col < len(lines[lineno])
+        return lineno, col
+
+    # Get the next valid character at least on the next line
+    def nextline(lineno, col):
+        col = 0
+        lineno += 1
+        lineno, col = next_valid_char(lineno, col)
+        assert lineno < len(lines) and col < len(lines[lineno])
+        return lineno, col
+
+    # Get the next valid non-"\#" character that satisfies the `stop` predicate
+    def increment_until(lineno, col, stop):
+        while not stop(ch := lines[lineno][col]) or ch in "\\#":
+            if ch in "\\#":
+                lineno, col = nextline(lineno, col)
+            else:
+                lineno, col = increment(lineno, col)
+        return lineno, col
+
+    # Get the lineno/col position of the end of `expr`. If `force_valid` is True,
+    # forces the position to be a valid character (e.g. if the position is beyond the
+    # end of the line, move to the next line)
+    def setup_positions(expr, force_valid=True):
+        # -2 since end_lineno is 1-indexed and because we added an extra
+        # bracket to `segment` when calling ast.parse
+        lineno = expr.end_lineno - 2
+        col = normalize(lineno, expr.end_col_offset)
+        return next_valid_char(lineno, col) if force_valid else (lineno, col)
+
+
     statement = tree.body[0]
-    match statement:
-        case ast.Expr(expr):
-            match expr:
-                case ast.BinOp():
-                    operator_start = normalize(expr.left.end_col_offset)
-                    operator_end = normalize(expr.right.col_offset)
-                    operator_str = segment[operator_start:operator_end]
-                    operator_offset = len(operator_str) - len(operator_str.lstrip())
-
-                    left_anchor = expr.left.end_col_offset + operator_offset
-                    right_anchor = left_anchor + 1
-                    if (
-                        operator_offset + 1 < len(operator_str)
-                        and not operator_str[operator_offset + 1].isspace()
-                    ):
-                        right_anchor += 1
-
-                    while left_anchor < len(segment) and ((ch := segment[left_anchor]).isspace() or ch in ")#"):
-                        left_anchor += 1
-                        right_anchor += 1
-                    return _Anchors(normalize(left_anchor), normalize(right_anchor))
-                case ast.Subscript():
-                    left_anchor = normalize(expr.value.end_col_offset)
-                    right_anchor = normalize(expr.slice.end_col_offset + 1)
-                    while left_anchor < len(segment) and ((ch := segment[left_anchor]).isspace() or ch != "["):
-                        left_anchor += 1
-                    while right_anchor < len(segment) and ((ch := segment[right_anchor]).isspace() or ch != "]"):
-                        right_anchor += 1
-                    if right_anchor < len(segment):
-                        right_anchor += 1
-                    return _Anchors(left_anchor, right_anchor)
+    if isinstance(statement, ast.Expr):
+        expr = statement.value
+        if isinstance(expr, ast.BinOp):
+            # ast gives these locations for BinOp subexpressions
+            # ( left_expr ) + ( right_expr )
+            #   left^^^^^       right^^^^^
+            lineno, col = setup_positions(expr.left)
+
+            # First operator character is the first non-space character not in ")\\#"
+            lineno, col = increment_until(lineno, col, lambda x: not x.isspace() and x != ')')
+
+            # binary op is 1 or 2 characters long, on the same line,
+            # before the right subexpression
+            right_col = col + 1
+            if (
+                right_col < len(lines[lineno])
+                and (
+                    # operator char should not be in the right subexpression
+                    expr.right.lineno - 2 > lineno or
+                    right_col < normalize(expr.right.lineno - 2, expr.right.col_offset)
+                )
+                and not (ch := lines[lineno][right_col]).isspace()
+                and ch not in "\\#"
+            ):
+                right_col += 1
+
+            # right_col can be invalid since it is exclusive
+            return _Anchors(lineno, col, lineno, right_col)
+        elif isinstance(expr, ast.Subscript):
+            # ast gives these locations for value and slice subexpressions
+            # ( value_expr ) [ slice_expr ]
+            #   value^^^^^     slice^^^^^
+            # subscript^^^^^^^^^^^^^^^^^^^^
+
+            # find left bracket
+            left_lineno, left_col = setup_positions(expr.value)
+            left_lineno, left_col = increment_until(left_lineno, left_col, lambda x: x == '[')
+            # find right bracket (final character of expression)
+            right_lineno, right_col = setup_positions(expr, force_valid=False)
+            return _Anchors(left_lineno, left_col, right_lineno, right_col)
+        elif isinstance(expr, ast.Call):
+            # ast gives these locations for function call expressions
+            # ( func_expr ) (args, kwargs)
+            #   func^^^^^
+            # call^^^^^^^^^^^^^^^^^^^^^^^^
+
+            # find left bracket
+            left_lineno, left_col = setup_positions(expr.func)
+            left_lineno, left_col = increment_until(left_lineno, left_col, lambda x: x == '(')
+            # find right bracket (final character of expression)
+            right_lineno, right_col = setup_positions(expr, force_valid=False)
+            return _Anchors(left_lineno, left_col, right_lineno, right_col)
 
     return None
 

From db094ed4c5469e188bb407f445c4f052121b0abe Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Wed, 2 Aug 2023 17:16:53 -0700
Subject: [PATCH 03/14] wip more updates to traceback.c

---
 Python/traceback.c | 212 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 170 insertions(+), 42 deletions(-)

diff --git a/Python/traceback.c b/Python/traceback.c
index a75812d2219245..380fa54dbed936 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -532,6 +532,33 @@ get_source_lines(PyObject *filename, int lineno, int end_lineno, PyObject **line
     return 0;
 }
 
+static int
+_write_line_with_margin_and_indent(PyObject *f, PyObject *line, int indent, int margin_indent, const char *margin) {
+    if (line == NULL) {
+        return -1;
+    }
+
+    if (_Py_WriteIndentedMargin(margin_indent, margin, f) < 0) {
+        return -1;
+    }
+
+    /* Write some spaces before the line */
+    if (_Py_WriteIndent(indent, f) < 0) {
+        return -1;
+    }
+
+    /* finally display the line */
+    if (PyFile_WriteObject(lineobj, f, Py_PRINT_RAW) < 0) {
+        return -1;
+    }
+
+    if (PyFile_WriteString("\n", f) < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
 static int
 display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int indent,
                                 int margin_indent, const char *margin,
@@ -558,7 +585,7 @@ display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int
     data = PyUnicode_DATA(lineobj);
     for (i=0; i < PyUnicode_GET_LENGTH(lineobj); i++) {
         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
-        if (ch != ' ' && ch != '\t' && ch != '\014')
+        if (!IS_WHITESPACE(ch))
             break;
     }
     if (i) {
@@ -575,21 +602,7 @@ display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int
         *truncation = i - indent;
     }
 
-    if (_Py_WriteIndentedMargin(margin_indent, margin, f) < 0) {
-        goto error;
-    }
-
-    /* Write some spaces before the line */
-    if (_Py_WriteIndent(indent, f) < 0) {
-        goto error;
-    }
-
-    /* finally display the line */
-    if (PyFile_WriteObject(lineobj, f, Py_PRINT_RAW) < 0) {
-        goto error;
-    }
-
-    if (PyFile_WriteString("\n", f) < 0) {
+    if (_write_line_with_margin_and_indent(f, lineobj, indent, margin_indent, margin)) {
         goto error;
     }
 
@@ -627,7 +640,9 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent,
 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f'))
 
 static int
-extract_anchors_from_expr(const char *segment_str, expr_ty expr, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
+extract_anchors_from_expr(const char *segment_str, expr_ty expr,
+                          Py_ssize_t *left_anchor_lineno, *right_anchor_lineno,
+                          Py_ssize_t *left_anchor_col, Py_ssize_t *right_anchor_col,
                           char** primary_error_char, char** secondary_error_char)
 {
     switch (expr->kind) {
@@ -686,12 +701,16 @@ extract_anchors_from_expr(const char *segment_str, expr_ty expr, Py_ssize_t *lef
 }
 
 static int
-extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
+extract_anchors_from_stmt(const char *segment_str, stmt_ty statement,
+                          Py_ssize_t *left_anchor_lineno, *right_anchor_lineno,
+                          Py_ssize_t *left_anchor_col, Py_ssize_t *right_anchor_col,
                           char** primary_error_char, char** secondary_error_char)
 {
     switch (statement->kind) {
         case Expr_kind: {
-            return extract_anchors_from_expr(segment_str, statement->v.Expr.value, left_anchor, right_anchor,
+            return extract_anchors_from_expr(segment_str, statement->v.Expr.value,
+                                             left_anchor_lineno, right_anchor_lineno,
+                                             left_anchor_col, right_anchor_col,
                                              primary_error_char, secondary_error_char);
         }
         default:
@@ -700,17 +719,39 @@ extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, Py_ssize_t
 }
 
 static int
-extract_anchors_from_line(PyObject *filename, PyObject *line,
+extract_anchors_from_line(PyObject *filename, PyObject *lines,
+                          Py_ssize_t lineno, Py_ssize_t end_lineno,
                           Py_ssize_t start_offset, Py_ssize_t end_offset,
-                          Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
+                          Py_ssize_t *left_anchor_lineno, *right_anchor_lineno,
+                          Py_ssize_t *left_anchor_col, Py_ssize_t *right_anchor_col,
                           char** primary_error_char, char** secondary_error_char)
 {
     int res = -1;
     PyArena *arena = NULL;
-    PyObject *segment = PyUnicode_Substring(line, start_offset, end_offset);
-    if (!segment) {
+    PyObject *segment = NULL;
+    PyObject *tmp;
+
+    *tmp = PyUnicode_Join(PyUnicode_FromString("\n"), lines);
+    if (!tmp) {
         goto done;
     }
+    Py_SETREF(segment, tmp);
+
+    Py_ssize_t num_lines = PyList_Size(lines);
+    PyObject *last_string = PyList_GET_ITEM(lines, num_lines - 1);
+    Py_ssize_t right_end_offset = PyUnicode_GET_LENGTH(last_string) - end_offset;
+    Py_ssize_t end_join_offset = PyUnicode_GET_LENGTH(segment) - right_end_offset;
+    tmp = PyUnicode_Substring(segment, 0, PyUnicode_GET_LENGTH(segment) - end_join_offset);
+    if (!tmp) {
+        goto done;
+    }
+    Py_SETREF(segment, tmp);
+
+    tmp = PyUnicode_Substring(segment, start_offset, PyUnicode_GET_LENGTH(segment));
+    if (!tmp) {
+        goto done;
+    }
+    Py_SETREF(segment, tmp);
 
     const char *segment_str = PyUnicode_AsUTF8(segment);
     if (!segment_str) {
@@ -736,7 +777,9 @@ extract_anchors_from_line(PyObject *filename, PyObject *line,
     assert(module->kind == Module_kind);
     if (asdl_seq_LEN(module->v.Module.body) == 1) {
         stmt_ty statement = asdl_seq_GET(module->v.Module.body, 0);
-        res = extract_anchors_from_stmt(segment_str, statement, left_anchor, right_anchor,
+        res = extract_anchors_from_stmt(segment_str, statement,
+                                        left_anchor_lineno, right_anchor_lineno,
+                                        left_anchor_col, right_anchor_col,
                                         primary_error_char, secondary_error_char);
     } else {
         res = 0;
@@ -821,21 +864,10 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     }
 
     int err = 0;
-
-    int truncation = _TRACEBACK_SOURCE_LINE_INDENT;
-    PyObject* source_line = NULL;
-    int rc = display_source_line_with_margin(
-            f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT,
-            margin_indent, margin, &truncation, &source_line);
-    if (rc != 0 || !source_line) {
-        /* ignore errors since we can't report them, can we? */
-        err = ignore_source_errors();
-        goto done;
-    }
+    bool done_dedent = 0;
 
     int code_offset = tb->tb_lasti;
     PyCodeObject* code = _PyFrame_GetCode(frame->f_frame);
-    const Py_ssize_t source_line_len = PyUnicode_GET_LENGTH(source_line);
 
     int start_line;
     int end_line;
@@ -843,16 +875,92 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     int end_col_byte_offset;
     if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset,
                               &end_line, &end_col_byte_offset)) {
+        start_line = end_line = lineno;
+        start_col_byte_offset = -1;
+    }
+
+    if (start_line < 0) {
+        start_line = lineno;
+    }
+    if (end_line < 0) {
+        end_line = lineno;
+    }
+    PyObject* source_lines = NULL;
+    PyObject* source_lines_dedented = NULL;
+    Py_ssize_t num_source_lines;
+    PyObject* markers = NULL;
+    int rc = get_source_lines(filename, start_line, end_line, &source_lines);
+    if (rc != 0 || !source_lines) {
+        /* ignore errors since we can't report them, can we? */
+        err = ignore_source_errors();
+        goto done;
+    }
+
+    num_source_lines = PyList_Size(source_lines);
+    if (num_source_lines == 0) {
+        // error
+    }
+
+    // fix error lines (replace None's with "")
+    for (Py_ssize_t i = 0; i < num_source_lines; ++i) {
+        PyObject* line = PyList_GET_ITEM(source_lines, i);
+        if (line == NULL || !PyUnicode_Check(line)) {
+            // check for errors
+            PyList_SetItem(source_lines, i, PyUnicode_FromString(""));
+        }
+    }
+
+    // dedent lines
+    int truncation = 0;
+    Py_ssize_t reference_lineno = 0;
+    for (;; ++truncation) {
+        PyObject* reference_line = PyList_GET_ITEM(source_lines, reference_lineno);
+        while (truncation >= PyUnicode_GET_LENGTH(reference_line)) {
+            ++reference_lineno;
+            if (reference_line >= num_source_lines) {
+                goto dedent_compute_end;
+            }
+            reference_line = PyList_GET_ITEM(source_lines, reference_lineno);
+        }
+        Py_UCS4 reference_ch = PyUnicode_READ_CHAR(reference_line, truncation);
+        if (!IS_WHITESPACE(reference_ch)) {
+            goto dedent_compute_end;
+        }
+        for (Py_ssize_t i = reference_line + 1; i < num_source_lines; ++i) {
+            PyObject* line = PyList_GET_ITEM(source_lines, i);
+            if (truncation < PyUnicode_GET_LENGTH(line)) {
+                Py_UCS4 ch = PyUnicode_READ_CHAR(line, truncation);
+                if (!IS_WHITESPACE(ch) || ch != reference_ch) {
+                    goto dedent_compute_end;
+                }
+            }
+        }
+    }
+dedent_compute_end:
+    source_lines_dedented = PyList_New(num_source_lines);
+    if (source_lines_dedented == NULL) {
         goto done;
     }
+    for (Py_ssize_t i = 0; i < num_source_lines; ++i) {
+        PyObject* truncated_line;
+        PyObject* line = PyList_GET_ITEM(source_lines, i);
+        Py_ssize_t line_len = PyUnicode_GET_LENGTH(line);
+        if (truncation >= line_len) {
+            truncated_line = PyUnicode_FromString("");
+        } else {
+            truncated_line = PyUnicode_Substring(line, truncation, line_len);
+        }
+        PyList_SET_ITEM(source_lines_dedented_tmp, i, truncated_line);
+    }
+    done_dedent = 1;
 
-    if (start_line < 0 || end_line < 0
-        || start_col_byte_offset < 0
+    if (start_col_byte_offset < 0
         || end_col_byte_offset < 0)
     {
         goto done;
     }
 
+
     // When displaying errors, we will use the following generic structure:
     //
     //  ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE
@@ -871,19 +979,22 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     // spans the whole line.
 
     // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
-    assert(source_line);
-    Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
+    Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(PyList_GET_ITEM(source_lines, 0), start_col_byte_offset);
     if (start_offset < 0) {
         err = ignore_source_errors() < 0;
         goto done;
     }
 
-    Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
+    Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(PyList_GET_ITEM(source_lines, num_source_lines - 1), end_col_byte_offset);
     if (end_offset < 0) {
         err = ignore_source_errors() < 0;
         goto done;
     }
 
+    // adjust start/end offset based on dedent
+    start_offset = (start_offset < truncation) ? 0 : start_offset - truncation;
+    end_offset = (end_offset < truncation) ? 0 : end_offset - truncation;
+
     Py_ssize_t left_end_offset = -1;
     Py_ssize_t right_start_offset = -1;
 
@@ -936,7 +1047,24 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     }
 
 done:
-    Py_XDECREF(source_line);
+    if (source_lines_dedented != NULL && done_dedent) {
+        Py_ssize_t num_markers = 0;
+        if (markers != NULL) {
+            num_markers = PyList_Size(markers);
+        }
+        for (Py_ssize_t i = 0; i < num_source_lines; ++i) {
+            PyObject* line = PyList_GET_ITEM(source_lines_dedented, i);
+            if (_write_line_with_margin_and_indent(f, line, _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin)) {
+                continue;
+            }
+            if (i < num_markers) {
+                _write_line_with_margin_and_indent(f, PyList_GET_ITEM(markers, i), _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin);
+            }
+        }
+    }
+    Py_XDECREF(source_lines);
+    Py_XDECREF(source_lines_dedented);
+    Py_XDECREF(markers);
     return err;
 }
 

From 5d8cefff378427ae200ee11c8ccd0c548eaace66 Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Fri, 18 Aug 2023 09:44:12 -0700
Subject: [PATCH 04/14] wip more updates to traceback.c

---
 Python/traceback.c | 166 +++++++++++++++++++++++++++++----------------
 1 file changed, 108 insertions(+), 58 deletions(-)

diff --git a/Python/traceback.c b/Python/traceback.c
index 380fa54dbed936..abf4cb7d88f2d4 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -839,6 +839,105 @@ print_error_location_carets(PyObject *f, int offset, Py_ssize_t start_offset, Py
     return 0;
 }
 
+// C implementation of textwrap.dedent.
+// Returns a new reference to the dedented string, NULL on failure.
+// Sets `truncation` to the number of characters truncated.
+// In abnormal cases (errors, whitespace-only input), `truncation` is set to 0.
+static PyObject*
+dedent(PyObject *lines, Py_ssize_t *truncation) {
+    *truncation = 0;
+    PyObject *split = PyUnicode_Splitlines(lines, 0);
+    if (!split) {
+        return NULL;
+    }
+    // Replace whitespace only lines with empty lines
+    Py_ssize_t num_lines = PyList_Size(split);
+    assert(num_lines > 0);
+    for (Py_ssize_t i = 0; i < num_lines; i++) {
+        PyObject* line = PyList_GET_ITEM(split, i);
+        int kind = PyUnicode_KIND(line);
+        const void *data = PyUnicode_DATA(line);
+        bool has_non_ws = 0;
+        for (Py_ssize_t j = 0; j < PyUnicode_GET_LENGTH(line); j++) {
+            Py_UCS4 ch = PyUnicode_READ(kind, data, j);
+            if (!IS_WHITESPACE(ch)) {
+                has_non_ws = 1;
+                break;
+            }
+        }
+        if (!has_non_ws) {
+            PyObject *empty = PyUnicode_FromString("");
+            if (!empty) {
+                goto error;
+            }
+            PyList_SET_ITEM(split, i, empty);
+        }
+    }
+
+    // Find a reference line - the first non-empty line.
+    // It is guaranteed to have a non-whitespace character.
+    Py_ssize_t ref_lineno = 0;
+    for (; ref_lineno < num_lines; ref_lineno++) {
+        if (PyUnicode_GET_LENGTH(PyList_GET_ITEM(split, ref_lineno)) > 0) {
+            break;
+        }
+    }
+    if (ref_lineno == num_lines) {
+        // empty input
+        goto done;
+    }
+
+    // Compute the number of characters to dedent by.
+    // Increment `col` until either lines[ref_line][col] is non-ws,
+    // or there is another line i with lines[i][col] != lines[ref_line][col].
+    Py_ssize_t col = 0;
+    PyObject *ref_line = PyList_GET_ITEM(split, ref_lineno);
+    Py_ssize_t ref_line_len = PyUnicode_GET_LENGTH(ref_line);
+    for (; col < ref_line_len; col++) {
+        Py_UCS4 ref_ch = PyUnicode_READ_CHAR(ref_line, col);
+        if (!IS_WHITESPACE(ref_ch)) {
+            goto dedent_compute_end;
+        }
+        // every line before ref_line is empty
+        for (Py_ssize_t i = ref_line + 1; i < num_lines; i++) {
+            PyObject* line = PyList_GET_ITEM(split, i);
+            if (PyUnicode_GET_LENGTH(line) == 0) {
+                continue;
+            }
+            assert(col < PyUnicode_GET_LENGTH(line));
+            Py_UCS4 ch = PyUnicode_READ_CHAR(line, col);
+            if (ch != ref_ch) {
+                goto dedent_compute_end;
+            }
+        }
+    }
+dedent_compute_end:
+
+    // truncate strings
+    if (col == 0) {
+        goto done;
+    }
+    for (Py_ssize_t i = 0; i < num_lines; i++) {
+        PyObject* line = PyList_GET_ITEM(split, i);
+        Py_ssize_t line_len = PyUnicode_GET_LENGTH(line);
+        if (line_len == 0) {
+            continue;
+        }
+        assert(col < line_len);
+        PyObject* truncated_line = PyUnicode_Substring(line, col, line_len);
+        if (!truncated_line) {
+            goto error;
+        }
+        PyList_SET_ITEM(split, i, truncated_line);
+    }
+
+done:
+    return split;
+error:
+    Py_XDECREF(split);
+    return NULL;
+}
+
 static int
 tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno,
                PyFrameObject *frame, PyObject *name, int margin_indent, const char *margin)
@@ -864,7 +963,6 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     }
 
     int err = 0;
-    bool done_dedent = 0;
 
     int code_offset = tb->tb_lasti;
     PyCodeObject* code = _PyFrame_GetCode(frame->f_frame);
@@ -885,74 +983,26 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     if (end_line < 0) {
         end_line = lineno;
     }
-    PyObject* source_lines = NULL;
-    PyObject* source_lines_dedented = NULL;
+
+    PyObject* lines_original = NULL;
+    PyObject* lines = NULL;
     Py_ssize_t num_source_lines;
-    PyObject* markers = NULL;
-    int rc = get_source_lines(filename, start_line, end_line, &source_lines);
+    int rc = get_source_lines(filename, start_line, end_line, &lines_original);
     if (rc != 0 || !source_lines) {
         /* ignore errors since we can't report them, can we? */
         err = ignore_source_errors();
         goto done;
     }
 
-    num_source_lines = PyList_Size(source_lines);
-    if (num_source_lines == 0) {
-        // error
-    }
-
-    // fix error lines (replace None's with "")
-    for (Py_ssize_t i = 0; i < num_source_lines; ++i) {
-        PyObject* line = PyList_GET_ITEM(source_lines, i);
-        if (line == NULL || !PyUnicode_Check(line)) {
-            // check for errors
-            PyList_SetItem(source_lines, i, PyUnicode_FromString(""));
-        }
-    }
-
-    // dedent lines
     int truncation = 0;
-    Py_ssize_t reference_lineno = 0;
-    for (;; ++truncation) {
-        PyObject* reference_line = PyList_GET_ITEM(source_lines, reference_lineno);
-        while (truncation >= PyUnicode_GET_LENGTH(reference_line)) {
-            ++reference_lineno;
-            if (reference_line >= num_source_lines) {
-                goto dedent_compute_end;
-            }
-            reference_line = PyList_GET_ITEM(source_lines, reference_lineno);
-        }
-        Py_UCS4 reference_ch = PyUnicode_READ_CHAR(reference_line, truncation);
-        if (!IS_WHITESPACE(reference_ch)) {
-            goto dedent_compute_end;
-        }
-        for (Py_ssize_t i = reference_line + 1; i < num_source_lines; ++i) {
-            PyObject* line = PyList_GET_ITEM(source_lines, i);
-            if (truncation < PyUnicode_GET_LENGTH(line)) {
-                Py_UCS4 ch = PyUnicode_READ_CHAR(line, truncation);
-                if (!IS_WHITESPACE(ch) || ch != reference_ch) {
-                    goto dedent_compute_end;
-                }
-            }
-        }
-    }
-dedent_compute_end:
-    source_lines_dedented = PyList_New(num_source_lines);
-    if (source_lines_dedented == NULL) {
+    lines = dedent(lines_original, &truncation);
+    if (!lines) {
         goto done;
     }
-    for (Py_ssize_t i = 0; i < num_source_lines; ++i) {
-        PyObject* truncated_line;
-        PyObject* line = PyList_GET_ITEM(source_lines, i);
-        Py_ssize_t line_len = PyUnicode_GET_LENGTH(line);
-        if (truncation >= line_len) {
-            truncated_line = PyUnicode_FromString("");
-        } else {
-            truncated_line = PyUnicode_Substring(line, truncation, line_len);
-        }
-        PyList_SET_ITEM(source_lines_dedented_tmp, i, truncated_line);
+    PyObject *lines_split = PyUnicode_Splitlines(lines, 0);
+    if (!lines_split) {
+        goto done;
     }
-    done_dedent = 1;
 
     if (start_col_byte_offset < 0
         || end_col_byte_offset < 0)

From f35d975a35ca3b8be6d99d8996b0d37a88ead590 Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Tue, 12 Sep 2023 17:28:20 -0700
Subject: [PATCH 05/14] wip done initial traceback.c implementation

---
 Lib/traceback.py   |  77 +++------
 Python/traceback.c | 416 +++++++++++++++++++++++++++++++--------------
 2 files changed, 312 insertions(+), 181 deletions(-)

diff --git a/Lib/traceback.py b/Lib/traceback.py
index edd766d4bb01d9..116785e4f99be0 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -502,27 +502,8 @@ def format_frame_summary(self, frame_summary):
                 start_offset = max(0, start_offset)
                 end_offset = max(0, end_offset)
 
-                # expression corresponding to the instruction so we can get anchors
-                segment = ""
-                # underline markers to be printed - start with `~` marker and replace with `^` later
-                markers = []
-
-                # Compute segment and initial markers
-                if frame_summary.lineno == frame_summary.end_lineno:
-                    segment = all_lines[0][start_offset:end_offset]
-                    markers.append(" " * start_offset + "~" * (end_offset - start_offset))
-                else:
-                    segment = all_lines[0][start_offset:] + "\n"
-                    markers.append(" " * start_offset + "~" * (len(all_lines[0]) - start_offset))
-                    for lineno in range(1, len(all_lines) - 1):
-                        line = all_lines[lineno]
-                        segment += line + "\n"
-                        # don't underline leading spaces
-                        num_spaces = len(line) - len(line.lstrip())
-                        markers.append(" " * num_spaces + "~" * (len(line) - num_spaces))
-                    segment += all_lines[-1][:end_offset]
-                    num_spaces = len(all_lines[-1]) - len(all_lines[-1].lstrip())
-                    markers.append(" " * num_spaces + "~" * (end_offset - num_spaces))
+                segment = "\n".join(all_lines)
+                segment = segment[start_offset:len(segment) - (len(all_lines[-1]) - end_offset)]
 
                 anchors: Optional[_Anchors] = None
                 try:
@@ -530,16 +511,10 @@ def format_frame_summary(self, frame_summary):
                 except AssertionError:
                     pass
 
-                if anchors is None:
-                    if len(all_lines[0][:start_offset].lstrip()) == 0 and len(all_lines[-1][end_offset:].rstrip()) == 0:
-                        # do not use markers if there are no anchors and the primary char spans all lines
-                        markers = None
-                    else:
-                        # replace `~` markers with `^` where necessary
-                        markers = [marker.replace("~", "^") for marker in markers]
-                else:
-                    # make markers mutable
-                    markers = [list(marker) for marker in markers]
+                carets = None
+                # only use carets if there are anchors or the carets do not span all lines
+                if anchors or all_lines[0][:start_offset].lstrip() or all_lines[-1][end_offset:].rstrip():
+                    carets = []
 
                     # anchor positions do not take start_offset into account
                     anchors_left_end_offset = anchors.left_end_offset
@@ -549,32 +524,26 @@ def format_frame_summary(self, frame_summary):
                     if anchors.right_start_lineno == 0:
                         anchors_right_start_offset += start_offset
 
-                    # Turn `~` markers between anchors to primary/secondary characters (default, `~`, `^`)
-                    for line in range(len(markers)):
-                        for col in range(len(markers[line])):
-                            use_secondary = True
-                            if line < anchors.left_end_lineno:
-                                use_secondary = False
-                            elif line == anchors.left_end_lineno and col < anchors_left_end_offset:
-                                use_secondary = False
-                            elif (
-                                line == anchors.right_start_lineno
-                                and col >= anchors_right_start_offset
-                            ):
-                                use_secondary = False
-                            elif line > anchors.right_start_lineno:
-                                use_secondary = False
-                            if markers[line][col] == "~":
-                                markers[line][col] = anchors.secondary_char if use_secondary else anchors.primary_char
-
-                    # make markers into strings again
-                    markers = ["".join(marker) for marker in markers]
+                    for i in range(len(all_lines)):
+                        num_spaces = len(all_lines[i]) - len(all_lines[i].lstrip())
+                        caret_line = []
+                        for j in range(len(all_lines[i])):
+                            if j < num_spaces:
+                                caret_line.append(' ')
+                            elif (i == 0 and j < start_offset) or (i == len(all_lines) - 1 and j >= end_offset):
+                                caret_line.append(' ')
+                            elif (i > anchors.left_end_lineno or j >= anchors_left_end_offset) and (i < anchors.right_start_lineno or j < anchors_right_start_offset):
+                                caret_line.append(anchors.secondary_char)
+                            else:
+                                caret_line.append(anchors.primary_char)
+
+                    carets.append("".join(caret_line))
 
                 result = ""
                 for i in range(len(all_lines)):
                     result += all_lines[i] + "\n"
-                    if markers is not None:
-                        result += markers[i] + "\n"
+                    if carets is not None:
+                        result += carets[i] + "\n"
                 row.append(textwrap.indent(textwrap.dedent(result), '    '))
 
         if frame_summary.locals:
@@ -688,7 +657,6 @@ def next_valid_char(lineno, col):
     def increment(lineno, col):
         col += 1
         lineno, col = next_valid_char(lineno, col)
-        assert lineno < len(lines) and col < len(lines[lineno])
         return lineno, col
 
     # Get the next valid character at least on the next line
@@ -696,7 +664,6 @@ def nextline(lineno, col):
         col = 0
         lineno += 1
         lineno, col = next_valid_char(lineno, col)
-        assert lineno < len(lines) and col < len(lines[lineno])
         return lineno, col
 
     # Get the next valid non-"\#" character that satisfies the `stop` predicate
diff --git a/Python/traceback.c b/Python/traceback.c
index abf4cb7d88f2d4..5b59267104b45e 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -548,7 +548,7 @@ _write_line_with_margin_and_indent(PyObject *f, PyObject *line, int indent, int
     }
 
     /* finally display the line */
-    if (PyFile_WriteObject(lineobj, f, Py_PRINT_RAW) < 0) {
+    if (PyFile_WriteObject(line, f, Py_PRINT_RAW) < 0) {
         return -1;
     }
 
@@ -559,6 +559,8 @@ _write_line_with_margin_and_indent(PyObject *f, PyObject *line, int indent, int
     return 0;
 }
 
+#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f'))
+
 static int
 display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int indent,
                                 int margin_indent, const char *margin,
@@ -637,11 +639,91 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent,
  *  TypeError: 'NoneType' object is not subscriptable
  */
 
-#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f'))
+// helper functions for anchor extraction
+const char *_get_segment_str(PyObject *segment_lines, Py_ssize_t lineno)
+{
+    return PyUnicode_AsUTF8(PyList_GET_ITEM(segment_lines, lineno));
+}
+
+static int
+_next_valid_offset(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offset)
+{
+    const char *segment_str = NULL;
+    while (*lineno < PyList_GET_SIZE(segment_lines)) {
+        segment_str = _get_segment_str(segment_lines, *lineno);
+        if (!segment_str) {
+            return -1;
+        }
+        if (*offset < (Py_ssize_t)strlen(segment_str)) {
+            break;
+        }
+        *offset = 0;
+        ++*lineno;
+    }
+    assert(*lineno < PyList_GET_SIZE(segment_lines));
+    assert(segment_str);
+    assert(*offset < (Py_ssize_t)strlen(segment_str));
+    return 0;
+}
 
 static int
-extract_anchors_from_expr(const char *segment_str, expr_ty expr,
-                          Py_ssize_t *left_anchor_lineno, *right_anchor_lineno,
+_increment_offset(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offset)
+{
+    ++*offset;
+    return _next_valid_offset(segment_lines, lineno, offset);
+}
+
+static int
+_nextline(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offset)
+{
+    *offset = 0;
+    ++*lineno;
+    return _next_valid_offset(segment_lines, lineno, offset);
+}
+
+static int
+_increment_until(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offset, int (*stop)(char))
+{
+    while (1) {
+        const char *segment_str = _get_segment_str(segment_lines, *lineno);
+        if (!segment_str) {
+            return -1;
+        }
+        char ch = segment_str[*offset];
+        // jump to next line if we encounter line break or comment
+        if (ch == '\\' || ch == '#') {
+            if (_nextline(segment_lines, lineno, offset)) {
+                return -1;
+            }
+        } else if (!stop(ch)) {
+            if (_increment_offset(segment_lines, lineno, offset)) {
+                return -1;
+            }
+        } else {
+            break;
+        }
+    }
+    return 0;
+}
+
+static int _is_op_char(char ch) {
+    if (!IS_WHITESPACE(ch) && ch != ')') {
+        return 1;
+    }
+    return 0;
+}
+
+static int _is_open_bracket_char(char ch) {
+    return ch == '[';
+}
+
+static int _is_open_paren_char(char ch) {
+    return ch == '(';
+}
+
+static int
+extract_anchors_from_expr(PyObject *segment_lines, expr_ty expr,
+                          Py_ssize_t *left_anchor_lineno, Py_ssize_t *right_anchor_lineno,
                           Py_ssize_t *left_anchor_col, Py_ssize_t *right_anchor_col,
                           char** primary_error_char, char** secondary_error_char)
 {
@@ -649,66 +731,88 @@ extract_anchors_from_expr(const char *segment_str, expr_ty expr,
         case BinOp_kind: {
             expr_ty left = expr->v.BinOp.left;
             expr_ty right = expr->v.BinOp.right;
-            for (int i = left->end_col_offset; i < right->col_offset; i++) {
-                if (IS_WHITESPACE(segment_str[i])) {
-                    continue;
-                }
-
-                *left_anchor = i;
-                *right_anchor = i + 1;
-
-                // Check whether if this a two-character operator (e.g //)
-                if (i + 1 < right->col_offset && !IS_WHITESPACE(segment_str[i + 1])) {
-                    ++*right_anchor;
-                }
+            *left_anchor_lineno = left->end_lineno - 2;
+            *left_anchor_col = left->end_col_offset;
+            if (_next_valid_offset(segment_lines, left_anchor_lineno, left_anchor_col)) {
+                return 0;
+            }
+            // keep going until the current char is not whitespace or ')'
+            if (_increment_until(segment_lines, left_anchor_lineno, left_anchor_col, _is_op_char)) {
+                return 0;
+            }
+            // Check whether if this is a two-character operator (e.g. //)
+            *right_anchor_lineno = *left_anchor_lineno;
+            *right_anchor_col = *left_anchor_col + 1;
 
-                // Keep going if the current char is not ')'
-                if (i+1 < right->col_offset && (segment_str[i] == ')')) {
-                    continue;
+            const char *segment_str = _get_segment_str(segment_lines, *left_anchor_lineno);
+            if (!segment_str) {
+                return 0;
+            }
+            if (
+                *right_anchor_col < (Py_ssize_t) strlen(segment_str) &&
+                (
+                    // operator char should not be in the right subexpression
+                    right->lineno - 2 > *right_anchor_lineno ||
+                    *right_anchor_col < right->col_offset
+                )
+            ) {
+                char ch = segment_str[*right_anchor_col];
+                if (!IS_WHITESPACE(ch) && ch != '\\' && ch != '#') {
+                    ++*right_anchor_col;
                 }
-
-                // Set the error characters
-                *primary_error_char = "~";
-                *secondary_error_char = "^";
-                break;
             }
+            // Set the error characters
+            *primary_error_char = "~";
+            *secondary_error_char = "^";
             return 1;
         }
         case Subscript_kind: {
-            *left_anchor = expr->v.Subscript.value->end_col_offset;
-            *right_anchor = expr->v.Subscript.slice->end_col_offset + 1;
-            Py_ssize_t str_len = strlen(segment_str);
-
-            // Move right_anchor and left_anchor forward to the first non-whitespace character that is not ']' and '['
-            while (*left_anchor < str_len && (IS_WHITESPACE(segment_str[*left_anchor]) || segment_str[*left_anchor] != '[')) {
-                ++*left_anchor;
-            }
-            while (*right_anchor < str_len && (IS_WHITESPACE(segment_str[*right_anchor]) || segment_str[*right_anchor] != ']')) {
-                ++*right_anchor;
+            *left_anchor_lineno = expr->v.Subscript.value->end_lineno - 2;
+            *left_anchor_col = expr->v.Subscript.value->end_col_offset;
+            if (_next_valid_offset(segment_lines, left_anchor_lineno, left_anchor_col)) {
+                return 0;
             }
-            if (*right_anchor < str_len){
-                *right_anchor += 1;
+            if (_increment_until(segment_lines, left_anchor_lineno, left_anchor_col, _is_open_bracket_char)) {
+                return 0;
             }
+            *right_anchor_lineno = expr->end_lineno - 2;
+            *right_anchor_col = expr->end_col_offset;
 
             // Set the error characters
             *primary_error_char = "~";
             *secondary_error_char = "^";
             return 1;
         }
+        case Call_kind:
+            *left_anchor_lineno = expr->v.Call.func->end_lineno - 2;
+            *left_anchor_col = expr->v.Call.func->end_col_offset;
+            if (_next_valid_offset(segment_lines, left_anchor_lineno, left_anchor_col)) {
+                return 0;
+            }
+            if (_increment_until(segment_lines, left_anchor_lineno, left_anchor_col, _is_open_paren_char)) {
+                return 0;
+            }
+            *right_anchor_lineno = expr->end_lineno - 2;
+            *right_anchor_col = expr->end_col_offset;
+
+            // Set the error characters
+            *primary_error_char = "~";
+            *secondary_error_char = "^";
+            return 1;
         default:
             return 0;
     }
 }
 
 static int
-extract_anchors_from_stmt(const char *segment_str, stmt_ty statement,
-                          Py_ssize_t *left_anchor_lineno, *right_anchor_lineno,
+extract_anchors_from_stmt(PyObject *segment_lines, stmt_ty statement,
+                          Py_ssize_t *left_anchor_lineno, Py_ssize_t *right_anchor_lineno,
                           Py_ssize_t *left_anchor_col, Py_ssize_t *right_anchor_col,
                           char** primary_error_char, char** secondary_error_char)
 {
     switch (statement->kind) {
         case Expr_kind: {
-            return extract_anchors_from_expr(segment_str, statement->v.Expr.value,
+            return extract_anchors_from_expr(segment_lines, statement->v.Expr.value,
                                              left_anchor_lineno, right_anchor_lineno,
                                              left_anchor_col, right_anchor_col,
                                              primary_error_char, secondary_error_char);
@@ -720,34 +824,52 @@ extract_anchors_from_stmt(const char *segment_str, stmt_ty statement,
 
 static int
 extract_anchors_from_line(PyObject *filename, PyObject *lines,
-                          Py_ssize_t lineno, Py_ssize_t end_lineno,
                           Py_ssize_t start_offset, Py_ssize_t end_offset,
-                          Py_ssize_t *left_anchor_lineno, *right_anchor_lineno,
+                          Py_ssize_t *left_anchor_lineno, Py_ssize_t *right_anchor_lineno,
                           Py_ssize_t *left_anchor_col, Py_ssize_t *right_anchor_col,
                           char** primary_error_char, char** secondary_error_char)
 {
     int res = -1;
     PyArena *arena = NULL;
     PyObject *segment = NULL;
+    PyObject *segment_lines = NULL;
     PyObject *tmp;
 
-    *tmp = PyUnicode_Join(PyUnicode_FromString("\n"), lines);
+    segment = PyUnicode_FromString("\n");
+    if (!segment) {
+        goto done;
+    }
+
+    tmp = PyUnicode_Join(segment, lines);
     if (!tmp) {
         goto done;
     }
     Py_SETREF(segment, tmp);
 
+    // truncate segment
     Py_ssize_t num_lines = PyList_Size(lines);
     PyObject *last_string = PyList_GET_ITEM(lines, num_lines - 1);
     Py_ssize_t right_end_offset = PyUnicode_GET_LENGTH(last_string) - end_offset;
     Py_ssize_t end_join_offset = PyUnicode_GET_LENGTH(segment) - right_end_offset;
-    tmp = PyUnicode_Substring(segment, 0, PyUnicode_GET_LENGTH(segment) - end_join_offset);
+    tmp = PyUnicode_Substring(segment, start_offset, PyUnicode_GET_LENGTH(segment) - end_join_offset);
     if (!tmp) {
         goto done;
     }
     Py_SETREF(segment, tmp);
 
-    tmp = PyUnicode_Substring(segment, start_offset, PyUnicode_GET_LENGTH(segment));
+    segment_lines = PyUnicode_Splitlines(segment, 0);
+    if (!segment_lines) {
+        goto done;
+    }
+
+    // segment = "(\n" + segment + "\n)"
+    tmp = PyUnicode_Concat(PyUnicode_FromString("(\n"), segment);
+    if (!tmp) {
+        goto done;
+    }
+    Py_SETREF(segment, tmp);
+
+    tmp = PyUnicode_Concat(segment, PyUnicode_FromString("\n)"));
     if (!tmp) {
         goto done;
     }
@@ -777,7 +899,7 @@ extract_anchors_from_line(PyObject *filename, PyObject *lines,
     assert(module->kind == Module_kind);
     if (asdl_seq_LEN(module->v.Module.body) == 1) {
         stmt_ty statement = asdl_seq_GET(module->v.Module.body, 0);
-        res = extract_anchors_from_stmt(segment_str, statement,
+        res = extract_anchors_from_stmt(segment_lines, statement,
                                         left_anchor_lineno, right_anchor_lineno,
                                         left_anchor_col, right_anchor_col,
                                         primary_error_char, secondary_error_char);
@@ -789,13 +911,22 @@ extract_anchors_from_line(PyObject *filename, PyObject *lines,
     if (res > 0) {
         // Normalize the AST offsets to byte offsets and adjust them with the
         // start of the actual line (instead of the source code segment).
-        assert(segment != NULL);
-        assert(*left_anchor >= 0);
-        assert(*right_anchor >= 0);
-        *left_anchor = _PyPegen_byte_offset_to_character_offset(segment, *left_anchor) + start_offset;
-        *right_anchor = _PyPegen_byte_offset_to_character_offset(segment, *right_anchor) + start_offset;
+        assert(segment_lines != NULL);
+        assert(*left_anchor_lineno >= 0);
+        assert(*left_anchor_col >= 0);
+        assert(*right_anchor_lineno >= 0);
+        assert(*right_anchor_col >= 0);
+        *left_anchor_col = _PyPegen_byte_offset_to_character_offset(PyList_GET_ITEM(segment_lines, *left_anchor_lineno), *left_anchor_col);
+        *right_anchor_col = _PyPegen_byte_offset_to_character_offset(PyList_GET_ITEM(segment_lines, *right_anchor_lineno), *right_anchor_col);
+        if (*left_anchor_lineno == 0) {
+            *left_anchor_col += start_offset;
+        }
+        if (*right_anchor_lineno == 0) {
+            *right_anchor_col += start_offset;
+        }
     }
     Py_XDECREF(segment);
+    Py_XDECREF(segment_lines);
     if (arena) {
         _PyArena_Free(arena);
     }
@@ -815,28 +946,58 @@ ignore_source_errors(void) {
     return 0;
 }
 
-static inline int
-print_error_location_carets(PyObject *f, int offset, Py_ssize_t start_offset, Py_ssize_t end_offset,
-                            Py_ssize_t right_start_offset, Py_ssize_t left_end_offset,
-                            const char *primary, const char *secondary) {
+static PyObject*
+compute_error_location_carets(PyObject *lines, Py_ssize_t start_offset, Py_ssize_t end_offset,
+                              Py_ssize_t left_end_lineno, Py_ssize_t right_start_lineno,
+                              Py_ssize_t left_end_offset, Py_ssize_t right_start_offset,
+                              const char *primary, const char *secondary)
+{
+    Py_ssize_t num_lines = PyList_Size(lines);
+    PyObject *carets = PyList_New(num_lines);
+    PyObject *caret_line = NULL;
+    if (!carets) {
+        goto error;
+    }
     int special_chars = (left_end_offset != -1 || right_start_offset != -1);
-    const char *str;
-    while (++offset <= end_offset) {
-        if (offset <= start_offset) {
-            str = " ";
-        } else if (special_chars && left_end_offset < offset && offset <= right_start_offset) {
-            str = secondary;
-        } else {
-            str = primary;
+    for (Py_ssize_t i = 0; i < num_lines; i++) {
+        PyObject *line = PyList_GET_ITEM(lines, i);
+        Py_ssize_t len = (i == num_lines - 1) ? end_offset : PyUnicode_GET_LENGTH(line);
+        caret_line = PyList_New(len);
+        if (!caret_line) {
+            goto error;
         }
-        if (PyFile_WriteString(str, f) < 0) {
-            return -1;
+        int kind = PyUnicode_KIND(line);
+        const void *data = PyUnicode_DATA(line);
+        bool has_non_ws = 0;
+        for (Py_ssize_t j = 0; j < len; ++j) {
+            const char *ch = " ";
+            if (!has_non_ws) {
+                Py_UCS4 ch = PyUnicode_READ(kind, data, j);
+                if (!IS_WHITESPACE(ch)) {
+                    has_non_ws = 1;
+                }
+            }
+            if (has_non_ws && (i > 0 || j >= left_end_offset) && (i < num_lines - 1 || j < right_start_offset)) {
+                ch = primary;
+            }
+            if (special_chars && left_end_lineno <= i && i <= right_start_lineno) {
+                if ((left_end_lineno < i || left_end_offset <= j) && (i < right_start_lineno || j < right_start_offset)) {
+                    ch = secondary;
+                }
+            }
+            PyObject *str = PyUnicode_FromString(ch);
+            if (!str) {
+                goto error;
+            }
+            PyList_SET_ITEM(caret_line, j, str);
         }
+        PyList_SET_ITEM(carets, i, caret_line);
     }
-    if (PyFile_WriteString("\n", f) < 0) {
-        return -1;
-    }
-    return 0;
+    return carets;
+error:
+    Py_XDECREF(carets);
+    Py_XDECREF(caret_line);
+    return NULL;
 }
 
 // C implementation of textwrap.dedent.
@@ -899,7 +1060,7 @@ dedent(PyObject *lines, Py_ssize_t *truncation) {
             goto dedent_compute_end;
         }
         // every line before ref_line is empty
-        for (Py_ssize_t i = ref_line + 1; i < num_lines; i++) {
+        for (Py_ssize_t i = ref_lineno + 1; i < num_lines; i++) {
             PyObject* line = PyList_GET_ITEM(split, i);
             if (PyUnicode_GET_LENGTH(line) == 0) {
                 continue;
@@ -938,6 +1099,19 @@ dedent(PyObject *lines, Py_ssize_t *truncation) {
     return NULL;
 }
 
+static int
+_is_all_whitespace(PyObject *line)
+{
+    int kind = PyUnicode_KIND(line);
+    const void *data = PyUnicode_DATA(line);
+    for (Py_ssize_t i = 0; i < PyUnicode_GET_LENGTH(line); i++) {
+        Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+        if (!IS_WHITESPACE(ch))
+            return 0;
+    }
+    return 1;
+}
+
 static int
 tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno,
                PyFrameObject *frame, PyObject *name, int margin_indent, const char *margin)
@@ -986,23 +1160,26 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
 
     PyObject* lines_original = NULL;
     PyObject* lines = NULL;
-    Py_ssize_t num_source_lines;
+    PyObject* lines_split = NULL;
+    PyObject* carets = NULL;
+    Py_ssize_t num_lines = 0;
     int rc = get_source_lines(filename, start_line, end_line, &lines_original);
-    if (rc != 0 || !source_lines) {
+    if (rc || !lines_original) {
         /* ignore errors since we can't report them, can we? */
         err = ignore_source_errors();
         goto done;
     }
 
-    int truncation = 0;
+    Py_ssize_t truncation = 0;
     lines = dedent(lines_original, &truncation);
     if (!lines) {
         goto done;
     }
-    PyObject *lines_split = PyUnicode_Splitlines(lines, 0);
+    lines_split = PyUnicode_Splitlines(lines, 0);
     if (!lines_split) {
         goto done;
     }
+    num_lines = PyList_Size(lines_split);
 
     if (start_col_byte_offset < 0
         || end_col_byte_offset < 0)
@@ -1029,13 +1206,13 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     // spans the whole line.
 
     // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
-    Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(PyList_GET_ITEM(source_lines, 0), start_col_byte_offset);
+    Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(PyList_GET_ITEM(lines_split, 0), start_col_byte_offset);
     if (start_offset < 0) {
         err = ignore_source_errors() < 0;
         goto done;
     }
 
-    Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(PyList_GET_ITEM(source_lines, num_source_lines - 1), end_col_byte_offset);
+    Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(PyList_GET_ITEM(lines_split, num_lines - 1), end_col_byte_offset);
     if (end_offset < 0) {
         err = ignore_source_errors() < 0;
         goto done;
@@ -1045,76 +1222,63 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     start_offset = (start_offset < truncation) ? 0 : start_offset - truncation;
     end_offset = (end_offset < truncation) ? 0 : end_offset - truncation;
 
+    Py_ssize_t left_end_lineno = -1;
     Py_ssize_t left_end_offset = -1;
+    Py_ssize_t right_start_lineno = -1;
     Py_ssize_t right_start_offset = -1;
 
     char *primary_error_char = "^";
     char *secondary_error_char = primary_error_char;
 
-    if (start_line == end_line) {
-        int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset,
-                                            &left_end_offset, &right_start_offset,
-                                            &primary_error_char, &secondary_error_char);
-        if (res < 0 && ignore_source_errors() < 0) {
-            goto done;
-        }
-    }
-    else {
-        // If this is a multi-line expression, then we will highlight until
-        // the last non-whitespace character.
-        const char *source_line_str = PyUnicode_AsUTF8(source_line);
-        if (!source_line_str) {
-            goto done;
-        }
-
-        Py_ssize_t i = source_line_len;
-        while (--i >= 0) {
-            if (!IS_WHITESPACE(source_line_str[i])) {
-                break;
-            }
-        }
-
-        end_offset = i + 1;
-    }
-
-    // Elide indicators if primary char spans the frame line
-    Py_ssize_t stripped_line_len = source_line_len - truncation - _TRACEBACK_SOURCE_LINE_INDENT;
-    bool has_secondary_ranges = (left_end_offset != -1 || right_start_offset != -1);
-    if (end_offset - start_offset == stripped_line_len && !has_secondary_ranges) {
+    res = extract_anchors_from_line(filename, lines_split, start_offset, end_offset,
+                                        &left_end_lineno, &right_start_lineno,
+                                        &left_end_offset, &right_start_offset,
+                                        &primary_error_char, &secondary_error_char);
+    if (res < 0 && ignore_source_errors() < 0) {
         goto done;
     }
 
-    if (_Py_WriteIndentedMargin(margin_indent, margin, f) < 0) {
-        err = -1;
-        goto done;
+    // Elide indicators if primary char spans the frame line
+    if (res == 0) {
+        PyObject *tmp = PyUnicode_Substring(PyList_GET_ITEM(lines_split, 0), 0, start_offset);
+        int before_start_empty = tmp && _is_all_whitespace(tmp);
+        Py_XDECREF(tmp);
+        PyObject *last_line = PyList_GET_ITEM(lines_split, num_lines - 1);
+        tmp = PyUnicode_Substring(last_line, end_offset, PyUnicode_GET_LENGTH(last_line));
+        int after_end_empty = tmp && _is_all_whitespace(tmp);
+        Py_XDECREF(tmp);
+        if (before_start_empty && after_end_empty) {
+            goto done;
+        }
     }
 
-    if (print_error_location_carets(f, truncation, start_offset, end_offset,
-                                    right_start_offset, left_end_offset,
-                                    primary_error_char, secondary_error_char) < 0) {
-        err = -1;
-        goto done;
-    }
+    carets = compute_error_location_carets(lines_split, start_offset, end_offset,
+                                            left_end_lineno, right_start_lineno,
+                                            left_end_offset, right_start_offset,
+                                            primary_error_char, secondary_error_char);
 
 done:
-    if (source_lines_dedented != NULL && done_dedent) {
-        Py_ssize_t num_markers = 0;
-        if (markers != NULL) {
-            num_markers = PyList_Size(markers);
+    if (lines_split != NULL) {
+        Py_ssize_t num_carets = 0;
+        if (carets != NULL) {
+            num_carets = PyList_Size(carets);
         }
-        for (Py_ssize_t i = 0; i < num_source_lines; ++i) {
-            PyObject* line = PyList_GET_ITEM(source_lines_dedented, i);
+        for (Py_ssize_t i = 0; i < num_lines; ++i) {
+            PyObject* line = PyList_GET_ITEM(lines_split, i);
             if (_write_line_with_margin_and_indent(f, line, _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin)) {
-                continue;
+                break;
             }
-            if (i < num_markers) {
-                _write_line_with_margin_and_indent(f, PyList_GET_ITEM(markers, i), _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin);
+            if (i < num_carets) {
+                if (_write_line_with_margin_and_indent(f, PyList_GET_ITEM(carets, i), _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin)) {
+                    break;
+                }
             }
         }
     }
-    Py_XDECREF(source_lines);
-    Py_XDECREF(source_lines_dedented);
-    Py_XDECREF(markers);
+    Py_XDECREF(lines_original);
+    Py_XDECREF(lines);
+    Py_XDECREF(lines_split);
+    Py_XDECREF(carets);
     return err;
 }
 

From 9d02a5cbac4c4096bc33cacc6fb334fa5ed6e5f8 Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Fri, 15 Sep 2023 14:21:03 -0700
Subject: [PATCH 06/14] initial review

---
 Lib/test/test_traceback.py |   1 -
 Lib/traceback.py           | 181 ++++++++++++-----------
 Python/traceback.c         | 284 ++++++++++++++++++++++---------------
 3 files changed, 271 insertions(+), 195 deletions(-)

diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index 54092822eb9507..bc66215eedb4ae 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -1030,7 +1030,6 @@ def check_traceback_format(self, cleanup_func=None):
         # Make sure that Python and the traceback module format the same thing
         print(traceback_fmt)
         print(python_fmt)
-        breakpoint()
         self.assertEqual(traceback_fmt, python_fmt)
         # Now verify the _tb func output
         self.assertEqual(tbstderr.getvalue(), tbfile.getvalue())
diff --git a/Lib/traceback.py b/Lib/traceback.py
index 116785e4f99be0..dd19cfff208fd0 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -484,7 +484,9 @@ def format_frame_summary(self, frame_summary):
             ):
                 row.append(textwrap.indent(frame_summary.line, '    ') + "\n")
             else:
-                all_lines_original = frame_summary._original_line.splitlines()
+                all_lines_original = frame_summary._original_line.splitlines()[
+                    :frame_summary.end_lineno - frame_summary.lineno + 1
+                ]
                 # character index of the start of the instruction
                 start_offset = _byte_offset_to_character_offset(
                     all_lines_original[0], frame_summary.colno
@@ -494,7 +496,9 @@ def format_frame_summary(self, frame_summary):
                     all_lines_original[-1], frame_summary.end_colno
                 )
 
-                all_lines = frame_summary.line.splitlines()
+                all_lines = frame_summary.line.splitlines()[
+                    :frame_summary.end_lineno - frame_summary.lineno + 1
+                ]
                 # adjust start/end offset based on dedent
                 dedent_characters = len(all_lines_original[0]) - len(all_lines[0])
                 start_offset -= dedent_characters
@@ -516,35 +520,48 @@ def format_frame_summary(self, frame_summary):
                 if anchors or all_lines[0][:start_offset].lstrip() or all_lines[-1][end_offset:].rstrip():
                     carets = []
 
-                    # anchor positions do not take start_offset into account
-                    anchors_left_end_offset = anchors.left_end_offset
-                    anchors_right_start_offset = anchors.right_start_offset
-                    if anchors.left_end_lineno == 0:
-                        anchors_left_end_offset += start_offset
-                    if anchors.right_start_lineno == 0:
-                        anchors_right_start_offset += start_offset
-
-                    for i in range(len(all_lines)):
-                        num_spaces = len(all_lines[i]) - len(all_lines[i].lstrip())
+                    anchors_left_end_offset = 0
+                    anchors_right_start_offset = 0
+                    primary_char = "^"
+                    secondary_char = "^"
+                    if anchors:
+                        anchors_left_end_offset = anchors.left_end_offset
+                        anchors_right_start_offset = anchors.right_start_offset
+                        # anchor positions do not take start_offset into account
+                        if anchors.left_end_lineno == 0:
+                            anchors_left_end_offset += start_offset
+                        if anchors.right_start_lineno == 0:
+                            anchors_right_start_offset += start_offset
+                        primary_char = anchors.primary_char
+                        secondary_char = anchors.secondary_char
+
+                    for lineno in range(len(all_lines)):
+                        num_spaces = len(all_lines[lineno]) - len(all_lines[lineno].lstrip())
                         caret_line = []
-                        for j in range(len(all_lines[i])):
-                            if j < num_spaces:
-                                caret_line.append(' ')
-                            elif (i == 0 and j < start_offset) or (i == len(all_lines) - 1 and j >= end_offset):
+                        for col in range(len(all_lines[lineno])):
+                            if col < num_spaces or (
+                                lineno == 0 and col < start_offset
+                            ) or (
+                                lineno == len(all_lines) - 1 and col >= end_offset
+                            ):
                                 caret_line.append(' ')
-                            elif (i > anchors.left_end_lineno or j >= anchors_left_end_offset) and (i < anchors.right_start_lineno or j < anchors_right_start_offset):
-                                caret_line.append(anchors.secondary_char)
+                            elif anchors and (
+                                lineno > anchors.left_end_lineno or col >= anchors_left_end_offset
+                            ) and (
+                                lineno < anchors.right_start_lineno or col < anchors_right_start_offset
+                            ):
+                                caret_line.append(secondary_char)
                             else:
-                                caret_line.append(anchors.primary_char)
+                                caret_line.append(primary_char)
 
-                    carets.append("".join(caret_line))
+                        carets.append("".join(caret_line))
 
-                result = ""
+                result = []
                 for i in range(len(all_lines)):
-                    result += all_lines[i] + "\n"
+                    result.append(all_lines[i] + "\n")
                     if carets is not None:
-                        result += carets[i] + "\n"
-                row.append(textwrap.indent(textwrap.dedent(result), '    '))
+                        result.append(carets[i] + "\n")
+                row.append(textwrap.indent(textwrap.dedent("".join(result)), '    '))
 
         if frame_summary.locals:
             for name, value in sorted(frame_summary.locals.items()):
@@ -638,7 +655,7 @@ def _extract_caret_anchors_from_line_segment(segment):
     if len(tree.body) != 1:
         return None
 
-    lines = segment.split("\n")
+    lines = segment.splitlines()
 
     # get character index given byte offset
     def normalize(lineno, offset):
@@ -668,11 +685,14 @@ def nextline(lineno, col):
 
     # Get the next valid non-"\#" character that satisfies the `stop` predicate
     def increment_until(lineno, col, stop):
-        while not stop(ch := lines[lineno][col]) or ch in "\\#":
+        while True:
+            ch = lines[lineno][col]
             if ch in "\\#":
                 lineno, col = nextline(lineno, col)
-            else:
+            elif not stop(ch):
                 lineno, col = increment(lineno, col)
+            else:
+                break
         return lineno, col
 
     # Get the lineno/col position of the end of `expr`. If `force_valid` is True,
@@ -680,65 +700,66 @@ def increment_until(lineno, col, stop):
     # end of the line, move to the next line)
     def setup_positions(expr, force_valid=True):
         # -2 since end_lineno is 1-indexed and because we added an extra
-        # bracket to `segment` when calling ast.parse
+        # bracket + newline to `segment` when calling ast.parse
         lineno = expr.end_lineno - 2
         col = normalize(lineno, expr.end_col_offset)
         return next_valid_char(lineno, col) if force_valid else (lineno, col)
 
 
     statement = tree.body[0]
-    if isinstance(statement, ast.Expr):
-        expr = statement.value
-        if isinstance(expr, ast.BinOp):
-            # ast gives these locations for BinOp subexpressions
-            # ( left_expr ) + ( right_expr )
-            #   left^^^^^       right^^^^^
-            lineno, col = setup_positions(expr.left)
-
-            # First operator character is the first non-space character not in ")\\#"
-            lineno, col = increment_until(lineno, col, lambda x: not x.isspace() and x != ')')
-
-            # binary op is 1 or 2 characters long, on the same line,
-            # before the right subexpression
-            right_col = col + 1
-            if (
-                right_col < len(lines[lineno])
-                and (
-                    # operator char should not be in the right subexpression
-                    expr.right.lineno - 2 > lineno or
-                    right_col < normalize(expr.right.lineno - 2, expr.right.col_offset)
-                )
-                and not (ch := lines[lineno][right_col]).isspace()
-                and ch not in "\\#"
-            ):
-                right_col += 1
-
-            # right_col can be invalid since it is exclusive
-            return _Anchors(lineno, col, lineno, right_col)
-        elif isinstance(expr, ast.Subscript):
-            # ast gives these locations for value and slice subexpressions
-            # ( value_expr ) [ slice_expr ]
-            #   value^^^^^     slice^^^^^
-            # subscript^^^^^^^^^^^^^^^^^^^^
-
-            # find left bracket
-            left_lineno, left_col = setup_positions(expr.value)
-            left_lineno, left_col = increment_until(left_lineno, left_col, lambda x: x == '[')
-            # find right bracket (final character of expression)
-            right_lineno, right_col = setup_positions(expr, force_valid=False)
-            return _Anchors(left_lineno, left_col, right_lineno, right_col)
-        elif isinstance(expr, ast.Call):
-            # ast gives these locations for function call expressions
-            # ( func_expr ) (args, kwargs)
-            #   func^^^^^
-            # call^^^^^^^^^^^^^^^^^^^^^^^^
-
-            # find left bracket
-            left_lineno, left_col = setup_positions(expr.func)
-            left_lineno, left_col = increment_until(left_lineno, left_col, lambda x: x == '(')
-            # find right bracket (final character of expression)
-            right_lineno, right_col = setup_positions(expr, force_valid=False)
-            return _Anchors(left_lineno, left_col, right_lineno, right_col)
+    match statement:
+        case ast.Expr(expr):
+            match expr:
+                case ast.BinOp():
+                    # ast gives these locations for BinOp subexpressions
+                    # ( left_expr ) + ( right_expr )
+                    #   left^^^^^       right^^^^^
+                    lineno, col = setup_positions(expr.left)
+
+                    # First operator character is the first non-space/')' character
+                    lineno, col = increment_until(lineno, col, lambda x: not x.isspace() and x != ')')
+
+                    # binary op is 1 or 2 characters long, on the same line,
+                    # before the right subexpression
+                    right_col = col + 1
+                    if (
+                        right_col < len(lines[lineno])
+                        and (
+                            # operator char should not be in the right subexpression
+                            expr.right.lineno - 2 > lineno or
+                            right_col < normalize(expr.right.lineno - 2, expr.right.col_offset)
+                        )
+                        and not (ch := lines[lineno][right_col]).isspace()
+                        and ch not in "\\#"
+                    ):
+                        right_col += 1
+
+                    # right_col can be invalid since it is exclusive
+                    return _Anchors(lineno, col, lineno, right_col)
+                case ast.Subscript():
+                    # ast gives these locations for value and slice subexpressions
+                    # ( value_expr ) [ slice_expr ]
+                    #   value^^^^^     slice^^^^^
+                    # subscript^^^^^^^^^^^^^^^^^^^^
+
+                    # find left bracket
+                    left_lineno, left_col = setup_positions(expr.value)
+                    left_lineno, left_col = increment_until(left_lineno, left_col, lambda x: x == '[')
+                    # find right bracket (final character of expression)
+                    right_lineno, right_col = setup_positions(expr, force_valid=False)
+                    return _Anchors(left_lineno, left_col, right_lineno, right_col)
+                case ast.Call():
+                    # ast gives these locations for function call expressions
+                    # ( func_expr ) (args, kwargs)
+                    #   func^^^^^
+                    # call^^^^^^^^^^^^^^^^^^^^^^^^
+
+                    # find left bracket
+                    left_lineno, left_col = setup_positions(expr.func)
+                    left_lineno, left_col = increment_until(left_lineno, left_col, lambda x: x == '(')
+                    # find right bracket (final character of expression)
+                    right_lineno, right_col = setup_positions(expr, force_valid=False)
+                    return _Anchors(left_lineno, left_col, right_lineno, right_col)
 
     return None
 
diff --git a/Python/traceback.c b/Python/traceback.c
index 5b59267104b45e..519f32534544be 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -413,6 +413,17 @@ _Py_WriteIndentedMargin(int indent, const char *margin, PyObject *f)
     return 0;
 }
 
+static PyObject*
+join_string_list(const char *join, PyObject* seq)
+{
+    PyObject *separator = PyUnicode_FromString(join);
+    if (!separator) {
+        return NULL;
+    }
+    PyObject *result = PyUnicode_Join(separator, seq);
+    Py_DECREF(separator);
+    return result;
+}
 
 static int
 get_source_lines(PyObject *filename, int lineno, int end_lineno, PyObject **lines)
@@ -497,7 +508,7 @@ get_source_lines(PyObject *filename, int lineno, int end_lineno, PyObject **line
     Py_DECREF(binary);
 
     /* get lines between lineno and end_lineno, inclusive */
-    PyObject *lines_accum = PyUnicode_FromString("");
+    PyObject *lines_accum = PyList_New(end_lineno - lineno + 1);
     if (!lines_accum) {
         goto cleanup_fob;
     }
@@ -510,13 +521,10 @@ get_source_lines(PyObject *filename, int lineno, int end_lineno, PyObject **line
                     goto cleanup_fob;
                 }
             }
-            Py_SETREF(lines_accum, PyUnicode_Concat(lines_accum, lineobj));
-            if (!lines_accum) {
-                goto cleanup_fob;
-            }
+            PyList_SET_ITEM(lines_accum, i - lineno, lineobj);
         }
     }
-    *lines = Py_NewRef(lines_accum);
+    *lines = join_string_list("", lines_accum);
 cleanup_fob:
     Py_XDECREF(lines_accum);
     PyErr_Clear();
@@ -533,7 +541,9 @@ get_source_lines(PyObject *filename, int lineno, int end_lineno, PyObject **line
 }
 
 static int
-_write_line_with_margin_and_indent(PyObject *f, PyObject *line, int indent, int margin_indent, const char *margin) {
+_write_line_with_margin_and_indent(PyObject *f, PyObject *line, int indent,
+                                   int margin_indent, const char *margin)
+{
     if (line == NULL) {
         return -1;
     }
@@ -562,7 +572,8 @@ _write_line_with_margin_and_indent(PyObject *f, PyObject *line, int indent, int
 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f'))
 
 static int
-display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int indent,
+display_source_line_with_margin(PyObject *f, PyObject *filename,
+                                int lineno, int indent,
                                 int margin_indent, const char *margin,
                                 int *truncation, PyObject **line)
 {
@@ -640,21 +651,22 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent,
  */
 
 // helper functions for anchor extraction
-const char *_get_segment_str(PyObject *segment_lines, Py_ssize_t lineno)
+const char *_get_segment_str(PyObject *segment_lines, Py_ssize_t lineno, Py_ssize_t *size)
 {
-    return PyUnicode_AsUTF8(PyList_GET_ITEM(segment_lines, lineno));
+    return PyUnicode_AsUTF8AndSize(PyList_GET_ITEM(segment_lines, lineno), size);
 }
 
 static int
 _next_valid_offset(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offset)
 {
+    Py_ssize_t str_len = 0;
     const char *segment_str = NULL;
     while (*lineno < PyList_GET_SIZE(segment_lines)) {
-        segment_str = _get_segment_str(segment_lines, *lineno);
+        segment_str = _get_segment_str(segment_lines, *lineno, &str_len);
         if (!segment_str) {
             return -1;
         }
-        if (*offset < (Py_ssize_t)strlen(segment_str)) {
+        if (*offset < str_len) {
             break;
         }
         *offset = 0;
@@ -662,7 +674,7 @@ _next_valid_offset(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offs
     }
     assert(*lineno < PyList_GET_SIZE(segment_lines));
     assert(segment_str);
-    assert(*offset < (Py_ssize_t)strlen(segment_str));
+    assert(*offset < str_len);
     return 0;
 }
 
@@ -682,11 +694,13 @@ _nextline(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offset)
 }
 
 static int
-_increment_until(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offset, int (*stop)(char))
+_increment_until(PyObject *segment_lines, Py_ssize_t *lineno,
+                 Py_ssize_t *offset, int (*stop)(char))
 {
     while (1) {
-        const char *segment_str = _get_segment_str(segment_lines, *lineno);
-        if (!segment_str) {
+        Py_ssize_t str_len;
+        const char *segment_str = _get_segment_str(segment_lines, *lineno, &str_len);
+        if (!segment_str || *offset >= str_len) {
             return -1;
         }
         char ch = segment_str[*offset];
@@ -706,18 +720,24 @@ _increment_until(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offset
     return 0;
 }
 
-static int _is_op_char(char ch) {
+static int
+_is_op_char(char ch)
+{
     if (!IS_WHITESPACE(ch) && ch != ')') {
         return 1;
     }
     return 0;
 }
 
-static int _is_open_bracket_char(char ch) {
+static int
+_is_open_bracket_char(char ch)
+{
     return ch == '[';
 }
 
-static int _is_open_paren_char(char ch) {
+static int
+_is_open_paren_char(char ch)
+{
     return ch == '(';
 }
 
@@ -733,23 +753,31 @@ extract_anchors_from_expr(PyObject *segment_lines, expr_ty expr,
             expr_ty right = expr->v.BinOp.right;
             *left_anchor_lineno = left->end_lineno - 2;
             *left_anchor_col = left->end_col_offset;
-            if (_next_valid_offset(segment_lines, left_anchor_lineno, left_anchor_col)) {
+            if (_next_valid_offset(
+                segment_lines, left_anchor_lineno, left_anchor_col
+            )) {
                 return 0;
             }
             // keep going until the current char is not whitespace or ')'
-            if (_increment_until(segment_lines, left_anchor_lineno, left_anchor_col, _is_op_char)) {
+            if (_increment_until(
+                segment_lines, left_anchor_lineno, left_anchor_col, _is_op_char
+            )) {
                 return 0;
             }
-            // Check whether if this is a two-character operator (e.g. //)
             *right_anchor_lineno = *left_anchor_lineno;
             *right_anchor_col = *left_anchor_col + 1;
 
-            const char *segment_str = _get_segment_str(segment_lines, *left_anchor_lineno);
+            Py_ssize_t str_len = 0;
+            const char *segment_str = _get_segment_str(
+                segment_lines, *left_anchor_lineno, &str_len
+            );
             if (!segment_str) {
                 return 0;
             }
+
+            // Check whether if this is a two-character operator (e.g. //)
             if (
-                *right_anchor_col < (Py_ssize_t) strlen(segment_str) &&
+                *right_anchor_col < str_len &&
                 (
                     // operator char should not be in the right subexpression
                     right->lineno - 2 > *right_anchor_lineno ||
@@ -769,10 +797,14 @@ extract_anchors_from_expr(PyObject *segment_lines, expr_ty expr,
         case Subscript_kind: {
             *left_anchor_lineno = expr->v.Subscript.value->end_lineno - 2;
             *left_anchor_col = expr->v.Subscript.value->end_col_offset;
-            if (_next_valid_offset(segment_lines, left_anchor_lineno, left_anchor_col)) {
+            if (_next_valid_offset(
+                segment_lines, left_anchor_lineno, left_anchor_col
+            )) {
                 return 0;
             }
-            if (_increment_until(segment_lines, left_anchor_lineno, left_anchor_col, _is_open_bracket_char)) {
+            if (_increment_until(
+                segment_lines, left_anchor_lineno, left_anchor_col, _is_open_bracket_char
+            )) {
                 return 0;
             }
             *right_anchor_lineno = expr->end_lineno - 2;
@@ -786,10 +818,14 @@ extract_anchors_from_expr(PyObject *segment_lines, expr_ty expr,
         case Call_kind:
             *left_anchor_lineno = expr->v.Call.func->end_lineno - 2;
             *left_anchor_col = expr->v.Call.func->end_col_offset;
-            if (_next_valid_offset(segment_lines, left_anchor_lineno, left_anchor_col)) {
+            if (_next_valid_offset(
+                segment_lines, left_anchor_lineno, left_anchor_col
+            )) {
                 return 0;
             }
-            if (_increment_until(segment_lines, left_anchor_lineno, left_anchor_col, _is_open_paren_char)) {
+            if (_increment_until(
+                segment_lines, left_anchor_lineno, left_anchor_col, _is_open_paren_char
+            )) {
                 return 0;
             }
             *right_anchor_lineno = expr->end_lineno - 2;
@@ -835,41 +871,48 @@ extract_anchors_from_line(PyObject *filename, PyObject *lines,
     PyObject *segment_lines = NULL;
     PyObject *tmp;
 
-    segment = PyUnicode_FromString("\n");
+    segment = join_string_list("\n", lines);
     if (!segment) {
         goto done;
     }
 
-    tmp = PyUnicode_Join(segment, lines);
-    if (!tmp) {
-        goto done;
-    }
-    Py_SETREF(segment, tmp);
-
     // truncate segment
     Py_ssize_t num_lines = PyList_Size(lines);
     PyObject *last_string = PyList_GET_ITEM(lines, num_lines - 1);
     Py_ssize_t right_end_offset = PyUnicode_GET_LENGTH(last_string) - end_offset;
     Py_ssize_t end_join_offset = PyUnicode_GET_LENGTH(segment) - right_end_offset;
-    tmp = PyUnicode_Substring(segment, start_offset, PyUnicode_GET_LENGTH(segment) - end_join_offset);
+    tmp = PyUnicode_Substring(
+        segment, start_offset, PyUnicode_GET_LENGTH(segment) - end_join_offset
+    );
     if (!tmp) {
         goto done;
     }
     Py_SETREF(segment, tmp);
 
+    // same as `lines`, but first/last strings are truncated
     segment_lines = PyUnicode_Splitlines(segment, 0);
     if (!segment_lines) {
         goto done;
     }
 
     // segment = "(\n" + segment + "\n)"
-    tmp = PyUnicode_Concat(PyUnicode_FromString("(\n"), segment);
+    PyObject *paren_str = PyUnicode_FromString("(\n");
+    if (!paren_str) {
+        goto done;
+    }
+    tmp = PyUnicode_Concat(paren_str, segment);
+    Py_DECREF(paren_str);
     if (!tmp) {
         goto done;
     }
     Py_SETREF(segment, tmp);
 
-    tmp = PyUnicode_Concat(segment, PyUnicode_FromString("\n)"));
+    paren_str = PyUnicode_FromString("\n)");
+    if (!paren_str) {
+        goto done;
+    }
+    tmp = PyUnicode_Concat(segment, paren_str);
+    Py_DECREF(paren_str);
     if (!tmp) {
         goto done;
     }
@@ -916,8 +959,12 @@ extract_anchors_from_line(PyObject *filename, PyObject *lines,
         assert(*left_anchor_col >= 0);
         assert(*right_anchor_lineno >= 0);
         assert(*right_anchor_col >= 0);
-        *left_anchor_col = _PyPegen_byte_offset_to_character_offset(PyList_GET_ITEM(segment_lines, *left_anchor_lineno), *left_anchor_col);
-        *right_anchor_col = _PyPegen_byte_offset_to_character_offset(PyList_GET_ITEM(segment_lines, *right_anchor_lineno), *right_anchor_col);
+        *left_anchor_col = _PyPegen_byte_offset_to_character_offset(
+            PyList_GET_ITEM(segment_lines, *left_anchor_lineno), *left_anchor_col
+        );
+        *right_anchor_col = _PyPegen_byte_offset_to_character_offset(
+            PyList_GET_ITEM(segment_lines, *right_anchor_lineno), *right_anchor_col
+        );
         if (*left_anchor_lineno == 0) {
             *left_anchor_col += start_offset;
         }
@@ -958,10 +1005,13 @@ compute_error_location_carets(PyObject *lines, Py_ssize_t start_offset, Py_ssize
     if (!carets) {
         goto error;
     }
-    int special_chars = (left_end_offset != -1 || right_start_offset != -1);
-    for (Py_ssize_t i = 0; i < num_lines; i++) {
-        PyObject *line = PyList_GET_ITEM(lines, i);
-        Py_ssize_t len = (i == num_lines - 1) ? end_offset : PyUnicode_GET_LENGTH(line);
+    int special_chars = (
+        left_end_lineno != -1 && left_end_offset != -1 &&
+        right_start_lineno != -1 && right_start_offset != -1
+    );
+    for (Py_ssize_t lineno = 0; lineno < num_lines; lineno++) {
+        PyObject *line = PyList_GET_ITEM(lines, lineno);
+        Py_ssize_t len = (lineno == num_lines - 1) ? end_offset : PyUnicode_GET_LENGTH(line);
         caret_line = PyList_New(len);
         if (!caret_line) {
             goto error;
@@ -969,29 +1019,40 @@ compute_error_location_carets(PyObject *lines, Py_ssize_t start_offset, Py_ssize
         int kind = PyUnicode_KIND(line);
         const void *data = PyUnicode_DATA(line);
         bool has_non_ws = 0;
-        for (Py_ssize_t j = 0; j < len; ++j) {
-            const char *ch = " ";
+        for (Py_ssize_t col = 0; col < len; col++) {
+            const char *ch = primary;
             if (!has_non_ws) {
-                Py_UCS4 ch = PyUnicode_READ(kind, data, j);
+                Py_UCS4 ch = PyUnicode_READ(kind, data, col);
                 if (!IS_WHITESPACE(ch)) {
                     has_non_ws = 1;
                 }
             }
-            if (has_non_ws && (i > 0 || j >= left_end_offset) && (i < num_lines - 1 || j < right_start_offset)) {
-                ch = primary;
-            }
-            if (special_chars && left_end_lineno <= i && i <= right_start_lineno) {
-                if ((left_end_lineno < i || left_end_offset <= j) && (i < right_start_lineno || j < right_start_offset)) {
-                    ch = secondary;
-                }
-            }
+            if (
+                !has_non_ws ||
+                (lineno == 0 && col < start_offset) ||
+                (lineno == num_lines - 1 && col >= end_offset)
+            ) {
+                ch = " ";
+            } else if (
+                special_chars &&
+                (lineno > left_end_lineno || col >= left_end_offset) &&
+                (lineno < right_start_lineno || col < right_start_offset)
+            ) {
+                ch = secondary;
+            } // else ch = primary
+
             PyObject *str = PyUnicode_FromString(ch);
             if (!str) {
                 goto error;
             }
-            PyList_SET_ITEM(caret_line, j, str);
+            PyList_SET_ITEM(caret_line, col, str);
+        }
+        PyObject *caret_line_str = join_string_list("", caret_line);
+        if (!caret_line_str) {
+            goto error;
         }
-        PyList_SET_ITEM(carets, i, caret_line);
+        Py_DECREF(caret_line);
+        PyList_SET_ITEM(carets, lineno, caret_line_str);
     }
     return carets;
 error:
@@ -1000,8 +1061,21 @@ compute_error_location_carets(PyObject *lines, Py_ssize_t start_offset, Py_ssize
     return NULL;
 }
 
+static int
+_is_all_whitespace(PyObject *line)
+{
+    int kind = PyUnicode_KIND(line);
+    const void *data = PyUnicode_DATA(line);
+    for (Py_ssize_t i = 0; i < PyUnicode_GET_LENGTH(line); i++) {
+        Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+        if (!IS_WHITESPACE(ch))
+            return 0;
+    }
+    return 1;
+}
+
 // C implementation of textwrap.dedent.
-// Returns a new reference to the dedented string, NULL on failure.
+// Returns a new reference to a list of dedented lines, NULL on failure.
 // Sets `truncation` to the number of characters truncated.
 // In abnormal cases (errors, whitespace-only input), `truncation` is set to 0.
 static PyObject*
@@ -1015,23 +1089,12 @@ dedent(PyObject *lines, Py_ssize_t *truncation) {
     Py_ssize_t num_lines = PyList_Size(split);
     assert(num_lines > 0);
     for (Py_ssize_t i = 0; i < num_lines; i++) {
-        PyObject* line = PyList_GET_ITEM(split, i);
-        int kind = PyUnicode_KIND(line);
-        const void *data = PyUnicode_DATA(line);
-        bool has_non_ws = 0;
-        for (Py_ssize_t j = 0; j < PyUnicode_GET_LENGTH(line); j++) {
-            Py_UCS4 ch = PyUnicode_READ(kind, data, j);
-            if (!IS_WHITESPACE(ch)) {
-                has_non_ws = 1;
-                break;
-            }
-        }
-        if (!has_non_ws) {
+        if (_is_all_whitespace(PyList_GET_ITEM(split, i))) {
             PyObject *empty = PyUnicode_FromString("");
             if (!empty) {
                 goto error;
             }
-            PyList_SET_ITEM(split, i, empty);
+            PyList_SetItem(split, i, empty);
         }
     }
 
@@ -1065,6 +1128,9 @@ dedent(PyObject *lines, Py_ssize_t *truncation) {
             if (PyUnicode_GET_LENGTH(line) == 0) {
                 continue;
             }
+            // col >= len(line) implies the line is whitespace,
+            // which cannot happen since we replaced whitespace lines
+            // with empty strings.
             assert(col < PyUnicode_GET_LENGTH(line));
             Py_UCS4 ch = PyUnicode_READ_CHAR(line, col);
             if (ch != ref_ch) {
@@ -1074,6 +1140,7 @@ dedent(PyObject *lines, Py_ssize_t *truncation) {
     }
 dedent_compute_end:
 
+    *truncation = col;
     // truncate strings
     if (col == 0) {
         goto done;
@@ -1089,7 +1156,7 @@ dedent(PyObject *lines, Py_ssize_t *truncation) {
         if (!truncated_line) {
             goto error;
         }
-        PyList_SET_ITEM(split, i, truncated_line);
+        PyList_SetItem(split, i, truncated_line);
     }
 
 done:
@@ -1099,19 +1166,6 @@ dedent(PyObject *lines, Py_ssize_t *truncation) {
     return NULL;
 }
 
-static int
-_is_all_whitespace(PyObject *line)
-{
-    int kind = PyUnicode_KIND(line);
-    const void *data = PyUnicode_DATA(line);
-    for (Py_ssize_t i = 0; i < PyUnicode_GET_LENGTH(line); i++) {
-        Py_UCS4 ch = PyUnicode_READ(kind, data, i);
-        if (!IS_WHITESPACE(ch))
-            return 0;
-    }
-    return 1;
-}
-
 static int
 tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno,
                PyFrameObject *frame, PyObject *name, int margin_indent, const char *margin)
@@ -1148,7 +1202,7 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset,
                               &end_line, &end_col_byte_offset)) {
         start_line = end_line = lineno;
-        start_col_byte_offset = -1;
+        start_col_byte_offset = end_col_byte_offset = -1;
     }
 
     if (start_line < 0) {
@@ -1160,7 +1214,6 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
 
     PyObject* lines_original = NULL;
     PyObject* lines = NULL;
-    PyObject* lines_split = NULL;
     PyObject* carets = NULL;
     Py_ssize_t num_lines = 0;
     int rc = get_source_lines(filename, start_line, end_line, &lines_original);
@@ -1175,11 +1228,7 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     if (!lines) {
         goto done;
     }
-    lines_split = PyUnicode_Splitlines(lines, 0);
-    if (!lines_split) {
-        goto done;
-    }
-    num_lines = PyList_Size(lines_split);
+    num_lines = PyList_Size(lines);
 
     if (start_col_byte_offset < 0
         || end_col_byte_offset < 0)
@@ -1206,13 +1255,17 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     // spans the whole line.
 
     // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
-    Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(PyList_GET_ITEM(lines_split, 0), start_col_byte_offset);
+    Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(
+        PyList_GET_ITEM(lines, 0), start_col_byte_offset
+    );
     if (start_offset < 0) {
         err = ignore_source_errors() < 0;
         goto done;
     }
 
-    Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(PyList_GET_ITEM(lines_split, num_lines - 1), end_col_byte_offset);
+    Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(
+        PyList_GET_ITEM(lines, num_lines - 1), end_col_byte_offset
+    );
     if (end_offset < 0) {
         err = ignore_source_errors() < 0;
         goto done;
@@ -1230,54 +1283,57 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     char *primary_error_char = "^";
     char *secondary_error_char = primary_error_char;
 
-    res = extract_anchors_from_line(filename, lines_split, start_offset, end_offset,
-                                        &left_end_lineno, &right_start_lineno,
-                                        &left_end_offset, &right_start_offset,
-                                        &primary_error_char, &secondary_error_char);
+    res = extract_anchors_from_line(filename, lines, start_offset, end_offset,
+                                    &left_end_lineno, &right_start_lineno,
+                                    &left_end_offset, &right_start_offset,
+                                    &primary_error_char, &secondary_error_char);
     if (res < 0 && ignore_source_errors() < 0) {
         goto done;
     }
 
-    // Elide indicators if primary char spans the frame line
     if (res == 0) {
-        PyObject *tmp = PyUnicode_Substring(PyList_GET_ITEM(lines_split, 0), 0, start_offset);
+        // Elide indicators if primary char spans the frame line
+        PyObject *tmp = PyUnicode_Substring(PyList_GET_ITEM(lines, 0), 0, start_offset);
         int before_start_empty = tmp && _is_all_whitespace(tmp);
         Py_XDECREF(tmp);
-        PyObject *last_line = PyList_GET_ITEM(lines_split, num_lines - 1);
+        PyObject *last_line = PyList_GET_ITEM(lines, num_lines - 1);
         tmp = PyUnicode_Substring(last_line, end_offset, PyUnicode_GET_LENGTH(last_line));
         int after_end_empty = tmp && _is_all_whitespace(tmp);
         Py_XDECREF(tmp);
         if (before_start_empty && after_end_empty) {
             goto done;
         }
+        // clear anchor fields
+        left_end_lineno = left_end_offset = right_start_lineno = right_start_offset = -1;
     }
 
-    carets = compute_error_location_carets(lines_split, start_offset, end_offset,
-                                            left_end_lineno, right_start_lineno,
-                                            left_end_offset, right_start_offset,
-                                            primary_error_char, secondary_error_char);
+    carets = compute_error_location_carets(lines, start_offset, end_offset,
+                                           left_end_lineno, right_start_lineno,
+                                           left_end_offset, right_start_offset,
+                                           primary_error_char, secondary_error_char);
 
 done:
-    if (lines_split != NULL) {
+    if (lines != NULL) {
         Py_ssize_t num_carets = 0;
         if (carets != NULL) {
             num_carets = PyList_Size(carets);
         }
-        for (Py_ssize_t i = 0; i < num_lines; ++i) {
-            PyObject* line = PyList_GET_ITEM(lines_split, i);
-            if (_write_line_with_margin_and_indent(f, line, _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin)) {
+        for (Py_ssize_t lineno = 0; lineno < num_lines; lineno++) {
+            PyObject* line = PyList_GET_ITEM(lines, lineno);
+            if (_write_line_with_margin_and_indent(
+                f, line, _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin
+            )) {
                 break;
             }
-            if (i < num_carets) {
-                if (_write_line_with_margin_and_indent(f, PyList_GET_ITEM(carets, i), _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin)) {
-                    break;
-                }
+            if (lineno < num_carets && _write_line_with_margin_and_indent(
+                f, PyList_GET_ITEM(carets, lineno), _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin
+            )) {
+                break;
             }
         }
     }
     Py_XDECREF(lines_original);
     Py_XDECREF(lines);
-    Py_XDECREF(lines_split);
     Py_XDECREF(carets);
     return err;
 }

From 91fcb73b14ca0a9c5337bf89ff7ec80be9e72802 Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Mon, 18 Sep 2023 11:33:23 -0700
Subject: [PATCH 07/14] fix issues - get tests passing

---
 Lib/test/test_traceback.py |  8 +++-----
 Lib/traceback.py           | 20 ++++++++++----------
 Python/traceback.c         | 37 +++++++++++++++++++++++--------------
 3 files changed, 36 insertions(+), 29 deletions(-)

diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index bc66215eedb4ae..6c1d8a3a7c6210 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -473,8 +473,6 @@ def foo(a: THIS_DOES_NOT_EXIST ) -> int:
         self.assertEqual(result_lines, expected_f.splitlines())
 
     def test_caret_multiline_expression(self):
-        # Make sure no carets are printed for expressions spanning multiple
-        # lines.
         def f_with_multiline():
             if True: raise ValueError(
                 "error over multiple lines"
@@ -534,8 +532,6 @@ def f_with_multiline():
         self.assertEqual(result_lines, expected_f.splitlines())
 
     def test_caret_multiline_expression_bin_op(self):
-        # Make sure no carets are printed for expressions spanning multiple
-        # lines.
         def f_with_multiline():
             return (
                 2 + 1 /
@@ -622,6 +618,7 @@ def f_with_binary_operator():
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_f+3}, in f_with_binary_operator\n'
             '    return ( a   )   + b\n'
             '           ~~~~~~~~~~^~~\n'
@@ -676,6 +673,7 @@ def f_with_binary_operator():
             'Traceback (most recent call last):\n'
             f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
             '    callable()\n'
+            '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_f+3}, in f_with_binary_operator\n'
             '    return b     [    a  ] + c\n'
             '           ~~~~~~^^^^^^^^^\n'
@@ -1040,7 +1038,7 @@ def check_traceback_format(self, cleanup_func=None):
         # Make sure that the traceback is properly indented.
         tb_lines = python_fmt.splitlines()
         banner = tb_lines[0]
-        self.assertEqual(len(tb_lines), 5)
+        self.assertEqual(len(tb_lines), 6)
         location, source_line = tb_lines[-2], tb_lines[-1]
         self.assertTrue(banner.startswith('Traceback'))
         self.assertTrue(location.startswith('  File'))
diff --git a/Lib/traceback.py b/Lib/traceback.py
index dd19cfff208fd0..4019a62ced3ccc 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -279,6 +279,9 @@ def __init__(self, filename, lineno, name, *, lookup_line=True,
         """
         self.filename = filename
         self.lineno = lineno
+        self.end_lineno = end_lineno
+        self.colno = colno
+        self.end_colno = end_colno
         self.name = name
         self._line = line
         self._line_dedented = None
@@ -286,9 +289,6 @@ def __init__(self, filename, lineno, name, *, lookup_line=True,
             self.line
         self.locals = {k: _safe_string(v, 'local', func=repr)
             for k, v in locals.items()} if locals else None
-        self.end_lineno = end_lineno
-        self.colno = colno
-        self.end_colno = end_colno
 
     def __eq__(self, other):
         if isinstance(other, FrameSummary):
@@ -539,16 +539,16 @@ def format_frame_summary(self, frame_summary):
                         num_spaces = len(all_lines[lineno]) - len(all_lines[lineno].lstrip())
                         caret_line = []
                         for col in range(len(all_lines[lineno])):
-                            if col < num_spaces or (
-                                lineno == 0 and col < start_offset
-                            ) or (
-                                lineno == len(all_lines) - 1 and col >= end_offset
-                            ):
+                            if lineno == len(all_lines) - 1 and col >= end_offset:
+                                break
+                            elif col < num_spaces or (lineno == 0 and col < start_offset):
                                 caret_line.append(' ')
                             elif anchors and (
-                                lineno > anchors.left_end_lineno or col >= anchors_left_end_offset
+                                lineno > anchors.left_end_lineno or
+                                (lineno == anchors.left_end_lineno and col >= anchors_left_end_offset)
                             ) and (
-                                lineno < anchors.right_start_lineno or col < anchors_right_start_offset
+                                lineno < anchors.right_start_lineno or
+                                (lineno == anchors.right_start_lineno and col < anchors_right_start_offset)
                             ):
                                 caret_line.append(secondary_char)
                             else:
diff --git a/Python/traceback.c b/Python/traceback.c
index 519f32534544be..d93807b5489f7c 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -516,7 +516,7 @@ get_source_lines(PyObject *filename, int lineno, int end_lineno, PyObject **line
         lineobj = PyFile_GetLine(fob, -1);
         if (i >= lineno) {
             if (!lineobj || !PyUnicode_Check(lineobj)) {
-                Py_XSETREF(lineobj, PyUnicode_FromString("\n"));
+                Py_XSETREF(lineobj, PyUnicode_FromString(""));
                 if (!lineobj) {
                     goto cleanup_fob;
                 }
@@ -524,7 +524,7 @@ get_source_lines(PyObject *filename, int lineno, int end_lineno, PyObject **line
             PyList_SET_ITEM(lines_accum, i - lineno, lineobj);
         }
     }
-    *lines = join_string_list("", lines_accum);
+    *lines = join_string_list("\n", lines_accum);
 cleanup_fob:
     Py_XDECREF(lines_accum);
     PyErr_Clear();
@@ -879,10 +879,10 @@ extract_anchors_from_line(PyObject *filename, PyObject *lines,
     // truncate segment
     Py_ssize_t num_lines = PyList_Size(lines);
     PyObject *last_string = PyList_GET_ITEM(lines, num_lines - 1);
-    Py_ssize_t right_end_offset = PyUnicode_GET_LENGTH(last_string) - end_offset;
-    Py_ssize_t end_join_offset = PyUnicode_GET_LENGTH(segment) - right_end_offset;
+    Py_ssize_t offset_from_right = PyUnicode_GET_LENGTH(last_string) - end_offset;
+    Py_ssize_t join_end_offset = PyUnicode_GET_LENGTH(segment) - offset_from_right;
     tmp = PyUnicode_Substring(
-        segment, start_offset, PyUnicode_GET_LENGTH(segment) - end_join_offset
+        segment, start_offset, join_end_offset
     );
     if (!tmp) {
         goto done;
@@ -933,6 +933,10 @@ extract_anchors_from_line(PyObject *filename, PyObject *lines,
     mod_ty module = _PyParser_ASTFromString(segment_str, filename, Py_file_input,
                                             &flags, arena);
     if (!module) {
+        if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_SyntaxError)) {
+            PyErr_Clear();
+            res = 0;
+        }
         goto done;
     }
     if (!_PyAST_Optimize(module, arena, _Py_GetConfig()->optimization_level, 0)) {
@@ -1027,16 +1031,14 @@ compute_error_location_carets(PyObject *lines, Py_ssize_t start_offset, Py_ssize
                     has_non_ws = 1;
                 }
             }
-            if (
-                !has_non_ws ||
-                (lineno == 0 && col < start_offset) ||
-                (lineno == num_lines - 1 && col >= end_offset)
-            ) {
+            if (lineno == num_lines - 1 && col >= end_offset) {
+                break;
+            } else if (!has_non_ws || (lineno == 0 && col < start_offset)) {
                 ch = " ";
             } else if (
                 special_chars &&
-                (lineno > left_end_lineno || col >= left_end_offset) &&
-                (lineno < right_start_lineno || col < right_start_offset)
+                (lineno > left_end_lineno || (lineno == left_end_lineno && col >= left_end_offset)) &&
+                (lineno < right_start_lineno || (lineno == right_start_lineno && col < right_start_offset))
             ) {
                 ch = secondary;
             } // else ch = primary
@@ -1255,17 +1257,24 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     // spans the whole line.
 
     // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
+    PyObject *lines_original_split = PyUnicode_Splitlines(lines_original, 0);
+    assert(PyList_Size(lines_original_split) == num_lines);
+    if (!lines_original_split) {
+        goto done;
+    }
     Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(
-        PyList_GET_ITEM(lines, 0), start_col_byte_offset
+        PyList_GET_ITEM(lines_original_split, 0), start_col_byte_offset
     );
     if (start_offset < 0) {
         err = ignore_source_errors() < 0;
+        Py_DECREF(lines_original_split);
         goto done;
     }
 
     Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(
-        PyList_GET_ITEM(lines, num_lines - 1), end_col_byte_offset
+        PyList_GET_ITEM(lines_original_split, num_lines - 1), end_col_byte_offset
     );
+    Py_DECREF(lines_original_split);
     if (end_offset < 0) {
         err = ignore_source_errors() < 0;
         goto done;

From d155d35ed18e6505ac63a9fa1deee4b9987bff1c Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Mon, 18 Sep 2023 12:00:33 -0700
Subject: [PATCH 08/14] add function call traceback tests

---
 Lib/test/test_traceback.py | 73 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 70 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index 6c1d8a3a7c6210..9bfa20101b9696 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -611,7 +611,7 @@ def test_caret_for_binary_operators_with_spaces_and_parenthesis(self):
         def f_with_binary_operator():
             a = 1
             b = ""
-            return ( a   )   + b
+            return ( a   )   +b
 
         lineno_f = f_with_binary_operator.__code__.co_firstlineno
         expected_error = (
@@ -620,8 +620,8 @@ def f_with_binary_operator():
             '    callable()\n'
             '    ~~~~~~~~^^\n'
             f'  File "{__file__}", line {lineno_f+3}, in f_with_binary_operator\n'
-            '    return ( a   )   + b\n'
-            '           ~~~~~~~~~~^~~\n'
+            '    return ( a   )   +b\n'
+            '           ~~~~~~~~~~^~\n'
         )
         result_lines = self.get_exception(f_with_binary_operator)
         self.assertEqual(result_lines, expected_error.splitlines())
@@ -681,6 +681,73 @@ def f_with_binary_operator():
         result_lines = self.get_exception(f_with_binary_operator)
         self.assertEqual(result_lines, expected_error.splitlines())
 
+    def test_caret_for_call(self):
+        def f_with_call():
+            def f1(a):
+                def f2(b):
+                    raise RuntimeError("fail")
+                return f2
+            return f1("x")("y")
+
+        lineno_f = f_with_call.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ~~~~~~~~^^\n'
+            f'  File "{__file__}", line {lineno_f+5}, in f_with_call\n'
+            '    return f1("x")("y")\n'
+            '           ~~~~~~~^^^^^\n'
+            f'  File "{__file__}", line {lineno_f+3}, in f2\n'
+            '    raise RuntimeError("fail")\n'
+        )
+        result_lines = self.get_exception(f_with_call)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
+    def test_caret_for_call_unicode(self):
+        def f_with_call():
+            def f1(a):
+                def f2(b):
+                    raise RuntimeError("fail")
+                return f2
+            return f1("ó")("á")
+
+        lineno_f = f_with_call.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ~~~~~~~~^^\n'
+            f'  File "{__file__}", line {lineno_f+5}, in f_with_call\n'
+            '    return f1("ó")("á")\n'
+            '           ~~~~~~~^^^^^\n'
+            f'  File "{__file__}", line {lineno_f+3}, in f2\n'
+            '    raise RuntimeError("fail")\n'
+        )
+        result_lines = self.get_exception(f_with_call)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
+    def test_caret_for_call_with_spaces_and_parenthesis(self):
+        def f_with_binary_operator():
+            def f(a):
+                raise RuntimeError("fail")
+            return f     (    "x"  ) + 2
+
+        lineno_f = f_with_binary_operator.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ~~~~~~~~^^\n'
+            f'  File "{__file__}", line {lineno_f+3}, in f_with_binary_operator\n'
+            '    return f     (    "x"  ) + 2\n'
+            '           ~~~~~~^^^^^^^^^^^\n'
+            f'  File "{__file__}", line {lineno_f+2}, in f\n'
+            '    raise RuntimeError("fail")\n'
+        )
+        result_lines = self.get_exception(f_with_binary_operator)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
     def test_traceback_specialization_with_syntax_error(self):
         bytecode = compile("1 / 0 / 1 / 2\n", TESTFN, "exec")
 

From 7bee8ba564eef9dabc378a1277b07897a9aca977 Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Mon, 18 Sep 2023 13:52:40 -0700
Subject: [PATCH 09/14] add more tests

---
 Lib/test/test_traceback.py | 160 +++++++++++++++++++++++++++++++++++++
 Lib/traceback.py           |   4 +-
 2 files changed, 163 insertions(+), 1 deletion(-)

diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index 9bfa20101b9696..df08a87581cf61 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -626,6 +626,97 @@ def f_with_binary_operator():
         result_lines = self.get_exception(f_with_binary_operator)
         self.assertEqual(result_lines, expected_error.splitlines())
 
+    def test_caret_for_binary_operators_multiline(self):
+        def f_with_binary_operator():
+            b = 1
+            c = ""
+            a = b    \
+         +\
+               c  # test
+            return a
+
+        lineno_f = f_with_binary_operator.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ~~~~~~~~^^\n'
+            f'  File "{__file__}", line {lineno_f+3}, in f_with_binary_operator\n'
+            '       a = b    \\\n'
+            '           ~~~~~~\n'
+            '    +\\\n'
+            '    ^~\n'
+            '          c  # test\n'
+            '          ~\n'
+        )
+        result_lines = self.get_exception(f_with_binary_operator)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
+    def test_caret_for_binary_operators_multiline_two_char(self):
+        def f_with_binary_operator():
+            b = 1
+            c = ""
+            a = (
+                (b  # test +
+                    )  \
+                # +
+            << (
+
+                c  # test
+                \
+            )  # test
+            )
+            return a
+
+        lineno_f = f_with_binary_operator.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ~~~~~~~~^^\n'
+            f'  File "{__file__}", line {lineno_f+4}, in f_with_binary_operator\n'
+            '        (b  # test +\n'
+            '        ~~~~~~~~~~~~\n'
+            '            )  \\\n'
+            '            ~~~~\n'
+            '        # +\n'
+            '        ~~~\n'
+            '    << (\n'
+            '    ^^~~\n'
+            '    \n'
+            '    \n'
+            '        c  # test\n'
+            '        ~~~~~~~~~\n'
+            '        \\\n'
+            '        ~\n'
+            '    )  # test\n'
+            '    ~\n'
+        )
+        result_lines = self.get_exception(f_with_binary_operator)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
+    def test_caret_for_binary_operators_multiline_with_unicode(self):
+        def f_with_binary_operator():
+            b = 1
+            a = ("ááá" +
+                "áá") + b
+            return a
+
+        lineno_f = f_with_binary_operator.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ~~~~~~~~^^\n'
+            f'  File "{__file__}", line {lineno_f+2}, in f_with_binary_operator\n'
+            '    a = ("ááá" +\n'
+            '        ~~~~~~~~\n'
+            '        "áá") + b\n'
+            '        ~~~~~~^~~\n'
+        )
+        result_lines = self.get_exception(f_with_binary_operator)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
     def test_caret_for_subscript(self):
         def f_with_subscript():
             some_dict = {'x': {'y': None}}
@@ -681,6 +772,42 @@ def f_with_binary_operator():
         result_lines = self.get_exception(f_with_binary_operator)
         self.assertEqual(result_lines, expected_error.splitlines())
 
+    def test_caret_for_subscript_multiline(self):
+        def f_with_subscript():
+            bbbbb = {}
+            ccc = 1
+            ddd = 2
+            b = bbbbb \
+                [  ccc # test
+
+                 + ddd  \
+
+                ] # test
+            return b
+
+        lineno_f = f_with_subscript.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ~~~~~~~~^^\n'
+            f'  File "{__file__}", line {lineno_f+4}, in f_with_subscript\n'
+            '    b = bbbbb \\\n'
+            '        ~~~~~~~\n'
+            '        [  ccc # test\n'
+            '        ^^^^^^^^^^^^^\n'
+            '    \n'
+            '    \n'
+            '         + ddd  \\\n'
+            '         ^^^^^^^^\n'
+            '    \n'
+            '    \n'
+            '        ] # test\n'
+            '        ^\n'
+        )
+        result_lines = self.get_exception(f_with_subscript)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
     def test_caret_for_call(self):
         def f_with_call():
             def f1(a):
@@ -748,6 +875,39 @@ def f(a):
         result_lines = self.get_exception(f_with_binary_operator)
         self.assertEqual(result_lines, expected_error.splitlines())
 
+    def test_caret_for_call_multiline(self):
+        def f_with_call():
+            class C:
+                def y(self, a):
+                    def f(b):
+                        raise RuntimeError("fail")
+                    return f
+            def g(x):
+                return C()
+            a = (g(1).y)(
+                2
+            )(3)(4)
+            return a
+
+        lineno_f = f_with_call.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ~~~~~~~~^^\n'
+            f'  File "{__file__}", line {lineno_f+8}, in f_with_call\n'
+            '    a = (g(1).y)(\n'
+            '        ~~~~~~~~~\n'
+            '        2\n'
+            '        ~\n'
+            '    )(3)(4)\n'
+            '    ~^^^\n'
+            f'  File "{__file__}", line {lineno_f+4}, in f\n'
+            '    raise RuntimeError("fail")\n'
+        )
+        result_lines = self.get_exception(f_with_call)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
     def test_traceback_specialization_with_syntax_error(self):
         bytecode = compile("1 / 0 / 1 / 2\n", TESTFN, "exec")
 
diff --git a/Lib/traceback.py b/Lib/traceback.py
index 4019a62ced3ccc..455599e9f69bd7 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -561,7 +561,9 @@ def format_frame_summary(self, frame_summary):
                     result.append(all_lines[i] + "\n")
                     if carets is not None:
                         result.append(carets[i] + "\n")
-                row.append(textwrap.indent(textwrap.dedent("".join(result)), '    '))
+                row.append(
+                    textwrap.indent(textwrap.dedent("".join(result)), '    ', lambda line: True)
+                )
 
         if frame_summary.locals:
             for name, value in sorted(frame_summary.locals.items()):

From 34a5755ec54415126eb8638065eb001c1f22f692 Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Tue, 19 Sep 2023 14:57:08 -0700
Subject: [PATCH 10/14] make outputs abbreviated, only output 1 line when
 location information is missing

---
 Lib/test/test_traceback.py |  96 ++++++++++++--
 Lib/traceback.py           | 116 ++++++++++-------
 Python/traceback.c         | 261 +++++++++++++++++++++++++------------
 3 files changed, 334 insertions(+), 139 deletions(-)

diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index df08a87581cf61..416d2f33e350e0 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -660,9 +660,7 @@ def f_with_binary_operator():
                 (b  # test +
                     )  \
                 # +
-            << (
-
-                c  # test
+            << (c  # test
                 \
             )  # test
             )
@@ -681,12 +679,8 @@ def f_with_binary_operator():
             '            ~~~~\n'
             '        # +\n'
             '        ~~~\n'
-            '    << (\n'
-            '    ^^~~\n'
-            '    \n'
-            '    \n'
-            '        c  # test\n'
-            '        ~~~~~~~~~\n'
+            '    << (c  # test\n'
+            '    ^^~~~~~~~~~~~\n'
             '        \\\n'
             '        ~\n'
             '    )  # test\n'
@@ -908,6 +902,90 @@ def g(x):
         result_lines = self.get_exception(f_with_call)
         self.assertEqual(result_lines, expected_error.splitlines())
 
+    def test_many_lines(self):
+        def f():
+            x = 1
+            if True: x += (
+                "a" +
+                "a"
+            )  # test
+
+        lineno_f = f.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ~~~~~~~~^^\n'
+            f'  File "{__file__}", line {lineno_f+2}, in f\n'
+            '    if True: x += (\n'
+            '             ^^^^^^\n'
+            '    ...<2 lines>...\n'
+            '    )  # test\n'
+            '    ^\n'
+        )
+        result_lines = self.get_exception(f)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
+    def test_many_lines_no_caret(self):
+        def f():
+            x = 1
+            x += (
+                "a" +
+                "a"
+            )
+
+        lineno_f = f.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ~~~~~~~~^^\n'
+            f'  File "{__file__}", line {lineno_f+2}, in f\n'
+            '    x += (\n'
+            '    ...<2 lines>...\n'
+            '    )\n'
+        )
+        result_lines = self.get_exception(f)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
+    def test_many_lines_binary_op(self):
+        def f_with_binary_operator():
+            b = 1
+            c = "a"
+            a = (
+                b +
+                b
+            ) + (
+                c +
+                c +
+                c
+            )
+            return a
+
+        lineno_f = f_with_binary_operator.__code__.co_firstlineno
+        expected_error = (
+            'Traceback (most recent call last):\n'
+            f'  File "{__file__}", line {self.callable_line}, in get_exception\n'
+            '    callable()\n'
+            '    ~~~~~~~~^^\n'
+            f'  File "{__file__}", line {lineno_f+3}, in f_with_binary_operator\n'
+            '    a = (\n'
+            '        ~\n'
+            '        b +\n'
+            '        ~~~\n'
+            '        b\n'
+            '        ~\n'
+            '    ) + (\n'
+            '    ~~^~~\n'
+            '        c +\n'
+            '        ~~~\n'
+            '    ...<2 lines>...\n'
+            '    )\n'
+            '    ~\n'
+        )
+        result_lines = self.get_exception(f_with_binary_operator)
+        self.assertEqual(result_lines, expected_error.splitlines())
+
     def test_traceback_specialization_with_syntax_error(self):
         bytecode = compile("1 / 0 / 1 / 2\n", TESTFN, "exec")
 
diff --git a/Lib/traceback.py b/Lib/traceback.py
index 455599e9f69bd7..816b021c28370a 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -279,7 +279,7 @@ def __init__(self, filename, lineno, name, *, lookup_line=True,
         """
         self.filename = filename
         self.lineno = lineno
-        self.end_lineno = end_lineno
+        self.end_lineno = lineno if end_lineno is None else end_lineno
         self.colno = colno
         self.end_colno = end_colno
         self.name = name
@@ -478,15 +478,17 @@ def format_frame_summary(self, frame_summary):
             frame_summary.filename, frame_summary.lineno, frame_summary.name))
         if frame_summary.line:
             if (
-                frame_summary.end_lineno is None or
                 frame_summary.colno is None or
                 frame_summary.end_colno is None
             ):
-                row.append(textwrap.indent(frame_summary.line, '    ') + "\n")
+                # only output first line
+                row.append(textwrap.indent(frame_summary.line.partition('\n')[0], '    ') + "\n")
             else:
                 all_lines_original = frame_summary._original_line.splitlines()[
                     :frame_summary.end_lineno - frame_summary.lineno + 1
                 ]
+                colno = 0 if frame_summary.colno is None else frame_summary.colno
+                end_colno = len(all_lines_original[-1]) if frame_summary.end_colno is None else frame_summary.end_colno
                 # character index of the start of the instruction
                 start_offset = _byte_offset_to_character_offset(
                     all_lines_original[0], frame_summary.colno
@@ -515,52 +517,76 @@ def format_frame_summary(self, frame_summary):
                 except AssertionError:
                     pass
 
-                carets = None
+                show_carets = False
                 # only use carets if there are anchors or the carets do not span all lines
                 if anchors or all_lines[0][:start_offset].lstrip() or all_lines[-1][end_offset:].rstrip():
-                    carets = []
-
-                    anchors_left_end_offset = 0
-                    anchors_right_start_offset = 0
-                    primary_char = "^"
-                    secondary_char = "^"
-                    if anchors:
-                        anchors_left_end_offset = anchors.left_end_offset
-                        anchors_right_start_offset = anchors.right_start_offset
-                        # anchor positions do not take start_offset into account
-                        if anchors.left_end_lineno == 0:
-                            anchors_left_end_offset += start_offset
-                        if anchors.right_start_lineno == 0:
-                            anchors_right_start_offset += start_offset
-                        primary_char = anchors.primary_char
-                        secondary_char = anchors.secondary_char
-
-                    for lineno in range(len(all_lines)):
-                        num_spaces = len(all_lines[lineno]) - len(all_lines[lineno].lstrip())
-                        caret_line = []
-                        for col in range(len(all_lines[lineno])):
-                            if lineno == len(all_lines) - 1 and col >= end_offset:
-                                break
-                            elif col < num_spaces or (lineno == 0 and col < start_offset):
-                                caret_line.append(' ')
-                            elif anchors and (
-                                lineno > anchors.left_end_lineno or
-                                (lineno == anchors.left_end_lineno and col >= anchors_left_end_offset)
-                            ) and (
-                                lineno < anchors.right_start_lineno or
-                                (lineno == anchors.right_start_lineno and col < anchors_right_start_offset)
-                            ):
-                                caret_line.append(secondary_char)
-                            else:
-                                caret_line.append(primary_char)
-
-                        carets.append("".join(caret_line))
+                    show_carets = True
 
                 result = []
-                for i in range(len(all_lines)):
-                    result.append(all_lines[i] + "\n")
-                    if carets is not None:
-                        result.append(carets[i] + "\n")
+
+                # only display first line, last line, and lines around anchor start/end
+                significant_lines = {0, len(all_lines) - 1}
+
+                anchors_left_end_offset = 0
+                anchors_right_start_offset = 0
+                primary_char = "^"
+                secondary_char = "^"
+                if anchors:
+                    anchors_left_end_offset = anchors.left_end_offset
+                    anchors_right_start_offset = anchors.right_start_offset
+                    # anchor positions do not take start_offset into account
+                    if anchors.left_end_lineno == 0:
+                        anchors_left_end_offset += start_offset
+                    if anchors.right_start_lineno == 0:
+                        anchors_right_start_offset += start_offset
+                    primary_char = anchors.primary_char
+                    secondary_char = anchors.secondary_char
+                    significant_lines.update(
+                        range(anchors.left_end_lineno - 1, anchors.left_end_lineno + 2)
+                    )
+                    significant_lines.update(
+                        range(anchors.right_start_lineno - 1, anchors.right_start_lineno + 2)
+                    )
+
+                significant_lines.discard(-1)
+                significant_lines.discard(len(all_lines))
+
+                def output_line(lineno):
+                    result.append(all_lines[lineno] + "\n")
+                    if not show_carets:
+                        return
+                    num_spaces = len(all_lines[lineno]) - len(all_lines[lineno].lstrip())
+                    carets = []
+                    for col in range(len(all_lines[lineno])):
+                        if lineno == len(all_lines) - 1 and col >= end_offset:
+                            break
+                        elif col < num_spaces or (lineno == 0 and col < start_offset):
+                            carets.append(' ')
+                        elif anchors and (
+                            lineno > anchors.left_end_lineno or
+                            (lineno == anchors.left_end_lineno and col >= anchors_left_end_offset)
+                        ) and (
+                            lineno < anchors.right_start_lineno or
+                            (lineno == anchors.right_start_lineno and col < anchors_right_start_offset)
+                        ):
+                            carets.append(secondary_char)
+                        else:
+                            carets.append(primary_char)
+                    result.append("".join(carets) + "\n")
+
+                # display significant lines
+                sig_lines_list = sorted(significant_lines)
+                for i, lineno in enumerate(sig_lines_list):
+                    if i:
+                        linediff = lineno - sig_lines_list[i - 1]
+                        if linediff == 2:
+                            # 1 line in between - just output it
+                            output_line(lineno - 1)
+                        elif linediff > 2:
+                            # > 1 line in between - abbreviate
+                            result.append(f"...<{linediff - 1} lines>...\n")
+                    output_line(lineno)
+
                 row.append(
                     textwrap.indent(textwrap.dedent("".join(result)), '    ', lambda line: True)
                 )
diff --git a/Python/traceback.c b/Python/traceback.c
index d93807b5489f7c..7e71d417f0306e 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -997,70 +997,109 @@ ignore_source_errors(void) {
     return 0;
 }
 
-static PyObject*
-compute_error_location_carets(PyObject *lines, Py_ssize_t start_offset, Py_ssize_t end_offset,
+// helper data structure to keep track of which lines to output
+typedef struct SignificantLines {
+    // we ony add a maximum of 8 lines
+    Py_ssize_t lines[8];
+    size_t size;
+} SignificantLines;
+
+static void significant_lines_init(SignificantLines *sl) {
+    sl->size = 0;
+}
+
+static void significant_lines_append(SignificantLines* sl, Py_ssize_t line, Py_ssize_t max_line)
+{
+    if (line < 0 || line > max_line) {
+        return;
+    }
+    assert(sl->size < 8);
+    sl->lines[sl->size++] = line;
+}
+
+static int significant_lines_compare(const void *a, const void *b)
+{
+    return *(Py_ssize_t *)a - *(Py_ssize_t *)b;
+}
+
+// sort lines and remove duplicate lines
+static void significant_lines_process(SignificantLines *sl)
+{
+    qsort(sl->lines, sl->size, sizeof(Py_ssize_t), significant_lines_compare);
+    Py_ssize_t lines[8];
+    size_t idx = 0;
+    for (size_t i = 0; i < sl->size; i++) {
+        if (i && sl->lines[i] == sl->lines[i - 1]) {
+            continue;
+        }
+        lines[idx++] = sl->lines[i];
+    }
+    memcpy(sl->lines, lines, idx * sizeof(Py_ssize_t));
+    sl->size = idx;
+}
+
+static int
+print_error_location_carets(PyObject *lines, Py_ssize_t lineno,
+                              Py_ssize_t start_offset, Py_ssize_t end_offset,
                               Py_ssize_t left_end_lineno, Py_ssize_t right_start_lineno,
                               Py_ssize_t left_end_offset, Py_ssize_t right_start_offset,
-                              const char *primary, const char *secondary)
+                              const char *primary, const char *secondary,
+                              PyObject *f, int indent, int margin_indent, const char *margin)
 {
     Py_ssize_t num_lines = PyList_Size(lines);
-    PyObject *carets = PyList_New(num_lines);
-    PyObject *caret_line = NULL;
-    if (!carets) {
-        goto error;
-    }
+    PyObject *line = PyList_GET_ITEM(lines, lineno);
     int special_chars = (
         left_end_lineno != -1 && left_end_offset != -1 &&
         right_start_lineno != -1 && right_start_offset != -1
     );
-    for (Py_ssize_t lineno = 0; lineno < num_lines; lineno++) {
-        PyObject *line = PyList_GET_ITEM(lines, lineno);
-        Py_ssize_t len = (lineno == num_lines - 1) ? end_offset : PyUnicode_GET_LENGTH(line);
-        caret_line = PyList_New(len);
-        if (!caret_line) {
-            goto error;
-        }
-        int kind = PyUnicode_KIND(line);
-        const void *data = PyUnicode_DATA(line);
-        bool has_non_ws = 0;
-        for (Py_ssize_t col = 0; col < len; col++) {
-            const char *ch = primary;
-            if (!has_non_ws) {
-                Py_UCS4 ch = PyUnicode_READ(kind, data, col);
-                if (!IS_WHITESPACE(ch)) {
-                    has_non_ws = 1;
-                }
-            }
-            if (lineno == num_lines - 1 && col >= end_offset) {
-                break;
-            } else if (!has_non_ws || (lineno == 0 && col < start_offset)) {
-                ch = " ";
-            } else if (
-                special_chars &&
-                (lineno > left_end_lineno || (lineno == left_end_lineno && col >= left_end_offset)) &&
-                (lineno < right_start_lineno || (lineno == right_start_lineno && col < right_start_offset))
-            ) {
-                ch = secondary;
-            } // else ch = primary
-
-            PyObject *str = PyUnicode_FromString(ch);
-            if (!str) {
-                goto error;
+    Py_ssize_t len = (lineno == num_lines - 1) ? end_offset : PyUnicode_GET_LENGTH(line);
+    PyObject *carets = PyList_New(len);
+    if (!carets) {
+        goto error;
+    }
+    int kind = PyUnicode_KIND(line);
+    const void *data = PyUnicode_DATA(line);
+    bool has_non_ws = 0;
+    for (Py_ssize_t col = 0; col < len; col++) {
+        const char *ch = primary;
+        if (!has_non_ws) {
+            Py_UCS4 ch = PyUnicode_READ(kind, data, col);
+            if (!IS_WHITESPACE(ch)) {
+                has_non_ws = 1;
             }
-            PyList_SET_ITEM(caret_line, col, str);
         }
-        PyObject *caret_line_str = join_string_list("", caret_line);
-        if (!caret_line_str) {
+        if (lineno == num_lines - 1 && col >= end_offset) {
+            break;
+        } else if (!has_non_ws || (lineno == 0 && col < start_offset)) {
+            ch = " ";
+        } else if (
+            special_chars &&
+            (lineno > left_end_lineno || (lineno == left_end_lineno && col >= left_end_offset)) &&
+            (lineno < right_start_lineno || (lineno == right_start_lineno && col < right_start_offset))
+        ) {
+            ch = secondary;
+        } // else ch = primary
+
+        PyObject *str = PyUnicode_FromString(ch);
+        if (!str) {
             goto error;
         }
-        Py_DECREF(caret_line);
-        PyList_SET_ITEM(carets, lineno, caret_line_str);
+        PyList_SET_ITEM(carets, col, str);
     }
-    return carets;
+    PyObject *caret_line_str = join_string_list("", carets);
+    if (!caret_line_str) {
+        goto error;
+    }
+    int res = _write_line_with_margin_and_indent(f, caret_line_str, indent, margin_indent, margin);
+    Py_DECREF(caret_line_str);
+    if (res) {
+        goto error;
+    }
+    Py_DECREF(carets);
+    return 0;
 error:
     Py_XDECREF(carets);
-    Py_XDECREF(caret_line);
-    return NULL;
+    return -1;
 }
 
 static int
@@ -1210,35 +1249,38 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     if (start_line < 0) {
         start_line = lineno;
     }
-    if (end_line < 0) {
+    // only fetch first line if location information is missing
+    if (end_line < 0 || start_col_byte_offset < 0 || end_col_byte_offset < 0) {
         end_line = lineno;
     }
 
     PyObject* lines_original = NULL;
     PyObject* lines = NULL;
-    PyObject* carets = NULL;
     Py_ssize_t num_lines = 0;
     int rc = get_source_lines(filename, start_line, end_line, &lines_original);
     if (rc || !lines_original) {
         /* ignore errors since we can't report them, can we? */
         err = ignore_source_errors();
-        goto done;
+        goto error;
     }
 
     Py_ssize_t truncation = 0;
     lines = dedent(lines_original, &truncation);
     if (!lines) {
-        goto done;
+        goto error;
     }
     num_lines = PyList_Size(lines);
 
-    if (start_col_byte_offset < 0
-        || end_col_byte_offset < 0)
-    {
+    // only output first line if no column location is given
+    if (start_col_byte_offset < 0 || end_col_byte_offset < 0) {
+        if (_write_line_with_margin_and_indent(
+            f, PyList_GET_ITEM(lines, 0), _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin
+        )) {
+            goto error;
+        }
         goto done;
     }
 
-
     // When displaying errors, we will use the following generic structure:
     //
     //  ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE
@@ -1256,19 +1298,20 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     // To keep the column indicators pertinent, they are not shown when the primary character
     // spans the whole line.
 
-    // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
     PyObject *lines_original_split = PyUnicode_Splitlines(lines_original, 0);
     assert(PyList_Size(lines_original_split) == num_lines);
     if (!lines_original_split) {
-        goto done;
+        goto error;
     }
+
+    // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
     Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(
         PyList_GET_ITEM(lines_original_split, 0), start_col_byte_offset
     );
     if (start_offset < 0) {
         err = ignore_source_errors() < 0;
         Py_DECREF(lines_original_split);
-        goto done;
+        goto error;
     }
 
     Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(
@@ -1277,7 +1320,7 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     Py_DECREF(lines_original_split);
     if (end_offset < 0) {
         err = ignore_source_errors() < 0;
-        goto done;
+        goto error;
     }
 
     // adjust start/end offset based on dedent
@@ -1297,9 +1340,17 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
                                     &left_end_offset, &right_start_offset,
                                     &primary_error_char, &secondary_error_char);
     if (res < 0 && ignore_source_errors() < 0) {
-        goto done;
+        goto error;
     }
 
+    int show_carets = 1;
+
+    // only display significant lines: first line, last line, lines around anchor start/end
+    SignificantLines sl;
+    significant_lines_init(&sl);
+    significant_lines_append(&sl, 0, num_lines - 1);
+    significant_lines_append(&sl, num_lines - 1, num_lines - 1);
+
     if (res == 0) {
         // Elide indicators if primary char spans the frame line
         PyObject *tmp = PyUnicode_Substring(PyList_GET_ITEM(lines, 0), 0, start_offset);
@@ -1310,40 +1361,80 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
         int after_end_empty = tmp && _is_all_whitespace(tmp);
         Py_XDECREF(tmp);
         if (before_start_empty && after_end_empty) {
-            goto done;
+            show_carets = 0;
         }
         // clear anchor fields
         left_end_lineno = left_end_offset = right_start_lineno = right_start_offset = -1;
+    } else {
+        for (int i = -1; i <= 1; ++i) {
+            significant_lines_append(&sl, i + left_end_lineno, num_lines - 1);
+            significant_lines_append(&sl, i + right_start_lineno, num_lines - 1);
+        }
     }
 
-    carets = compute_error_location_carets(lines, start_offset, end_offset,
-                                           left_end_lineno, right_start_lineno,
-                                           left_end_offset, right_start_offset,
-                                           primary_error_char, secondary_error_char);
+    // sort and dedupe significant lines
+    significant_lines_process(&sl);
 
-done:
-    if (lines != NULL) {
-        Py_ssize_t num_carets = 0;
-        if (carets != NULL) {
-            num_carets = PyList_Size(carets);
-        }
-        for (Py_ssize_t lineno = 0; lineno < num_lines; lineno++) {
-            PyObject* line = PyList_GET_ITEM(lines, lineno);
-            if (_write_line_with_margin_and_indent(
-                f, line, _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin
-            )) {
-                break;
-            }
-            if (lineno < num_carets && _write_line_with_margin_and_indent(
-                f, PyList_GET_ITEM(carets, lineno), _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin
-            )) {
-                break;
+    for (size_t i = 0; i < sl.size; i++) {
+        if (i > 0) {
+            Py_ssize_t linediff = sl.lines[i] - sl.lines[i - 1];
+            if (linediff == 2) {
+                // only 1 line in between - just print it out
+                if (_write_line_with_margin_and_indent(
+                    f, PyList_GET_ITEM(lines, sl.lines[i] - 1), _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin
+                )) {
+                    goto error;
+                }
+                if (show_carets && print_error_location_carets(
+                    lines, sl.lines[i] - 1,
+                    start_offset, end_offset,
+                    left_end_lineno, right_start_lineno,
+                    left_end_offset, right_start_offset,
+                    primary_error_char, secondary_error_char,
+                    f, _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin
+                )) {
+                    goto error;
+                }
+            } else if (linediff > 2) {
+                // more than 1 line in between - abbreviate
+                PyObject *abbrv_str = PyUnicode_FromFormat("...<%d lines>...", linediff - 1);
+                if (!abbrv_str) {
+                    goto error;
+                }
+                int write_res = _write_line_with_margin_and_indent(
+                    f, abbrv_str, _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin
+                );
+                Py_DECREF(abbrv_str);
+                if (write_res) {
+                    goto error;
+                }
             }
         }
+        // print the current line
+        if (_write_line_with_margin_and_indent(
+            f, PyList_GET_ITEM(lines, sl.lines[i]), _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin
+        )) {
+            goto error;
+        }
+        if (show_carets && print_error_location_carets(
+            lines, sl.lines[i],
+            start_offset, end_offset,
+            left_end_lineno, right_start_lineno,
+            left_end_offset, right_start_offset,
+            primary_error_char, secondary_error_char,
+            f, _TRACEBACK_SOURCE_LINE_INDENT, margin_indent, margin
+        )) {
+            goto error;
+        }
     }
+
+done:
+    Py_DECREF(lines_original);
+    Py_DECREF(lines);
+    return 0;
+error:
     Py_XDECREF(lines_original);
     Py_XDECREF(lines);
-    Py_XDECREF(carets);
     return err;
 }
 

From 2fcbea7da7da7a2fb7c0de126eadaa6320ab3b48 Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Tue, 19 Sep 2023 15:05:35 -0700
Subject: [PATCH 11/14] fix other failing tests

---
 Lib/test/test_contextlib.py | 8 +++++++-
 Lib/test/test_doctest.py    | 3 +++
 Lib/test/test_exceptions.py | 3 ++-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_contextlib.py b/Lib/test/test_contextlib.py
index dbc7dfcc24bf07..c9b47f733c1e06 100644
--- a/Lib/test/test_contextlib.py
+++ b/Lib/test/test_contextlib.py
@@ -802,11 +802,17 @@ def raise_exc(exc):
         self.assertIsInstance(exc, ValueError)
         ve_frames = traceback.extract_tb(exc.__traceback__)
         expected = \
-            [('test_exit_exception_traceback', 'with self.exit_stack() as stack:')] + \
+            [(
+                'test_exit_exception_traceback',
+                'with self.exit_stack() as stack:\n'
+                '    stack.callback(raise_exc, ValueError)\n'
+                '    1/0'
+            )] + \
             self.callback_error_internal_frames + \
             [('_exit_wrapper', 'callback(*args, **kwds)'),
              ('raise_exc', 'raise exc')]
 
+        # breakpoint()
         self.assertEqual(
             [(f.name, f.line) for f in ve_frames], expected)
 
diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py
index 6e12e82a7a0084..838b2764a8cf4a 100644
--- a/Lib/test/test_doctest.py
+++ b/Lib/test/test_doctest.py
@@ -2918,6 +2918,9 @@ def test_unicode(): """
         Traceback (most recent call last):
           File ...
             exec(compile(example.source, filename, "single",
+            ~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+                         compileflags, True), test.globs)
+                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
           File "<doctest foo-bär@baz[0]>", line 1, in <module>
             raise Exception('clé')
         Exception: clé
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index 106baf959a6898..737c9d1afb5bf8 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -2045,6 +2045,7 @@ def test_multiline_not_highlighted(self):
              """,
                 [
                     '    1 < 2 and',
+                    '    3 > 4',
                     'AssertionError',
                 ],
             ),
@@ -2052,7 +2053,7 @@ def test_multiline_not_highlighted(self):
         for source, expected in cases:
             with self.subTest(source):
                 result = self.write_source(source)
-                self.assertEqual(result[-2:], expected)
+                self.assertEqual(result[-len(expected):], expected)
 
 
 class SyntaxErrorTests(unittest.TestCase):

From e4b978ddc34f793fea18e6145996b4269f7fd69f Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Wed, 20 Sep 2023 10:13:03 -0700
Subject: [PATCH 12/14] fix smelly symbol

---
 Python/traceback.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Python/traceback.c b/Python/traceback.c
index 7e71d417f0306e..e78e5b15a3889b 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -651,7 +651,8 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent,
  */
 
 // helper functions for anchor extraction
-const char *_get_segment_str(PyObject *segment_lines, Py_ssize_t lineno, Py_ssize_t *size)
+static const char
+*_get_segment_str(PyObject *segment_lines, Py_ssize_t lineno, Py_ssize_t *size)
 {
     return PyUnicode_AsUTF8AndSize(PyList_GET_ITEM(segment_lines, lineno), size);
 }

From 26fd2d9a0c3f6486eaa231d7c5976d7e745be851 Mon Sep 17 00:00:00 2001
From: William Wen <williamwen@meta.com>
Date: Thu, 21 Sep 2023 15:41:33 -0700
Subject: [PATCH 13/14] small fixes

---
 Lib/lib2to3/Grammar3.11.4.final.0.pickle      | Bin 15313 -> 0 bytes
 .../PatternGrammar3.11.4.final.0.pickle       | Bin 1225 -> 0 bytes
 Lib/traceback.py                              |  73 ++++++++++--------
 Python/traceback.c                            |  50 ++++++++----
 4 files changed, 78 insertions(+), 45 deletions(-)
 delete mode 100644 Lib/lib2to3/Grammar3.11.4.final.0.pickle
 delete mode 100644 Lib/lib2to3/PatternGrammar3.11.4.final.0.pickle

diff --git a/Lib/lib2to3/Grammar3.11.4.final.0.pickle b/Lib/lib2to3/Grammar3.11.4.final.0.pickle
deleted file mode 100644
index c3acebc758875b5438cfebaab64116d80c0d1c9b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 15313
zcmc&*X>eS}amL^xKoGp|`{wcjK=8irbNG1Q7qkQxoE-uS2)iIf%aSG8y6m+qTPt6Z
zY|FAK*%l?+KV110SEZ6v@-L}M<yR`HN~MzgNL5n#5m%~GseE70%-gpB2vUa~g0J73
zo}QlWo}Qk0v#+14d-Kn?SINI;N~vOV{$B6RzMl5nVDHV$K#8ca-PxW@M>f|#SSUqR
zs@Raub#-K(>K`aY%9je6JnE`du`!*?rStjhtz0RpQN=Oofm=OU*42`7;MQPoCWlg;
zDvnJLWDCagsaM4&G|LPWI`V~H5R6g9=5+pEuCt?iFxT0Y=`KYLY$GT~(x{5H=|W#`
zDH_Xu2XBRbn?QJTAd|lBd5uHYojs@(Hq9*c_4apk_d$X2kWorm^e_Q@T%iEfqluxe
zufG&c0#nvO>U<xRn+yS2?i++LoIq0ql`CX(gBg#S3XWZw&c1<kA=6cgrj?8Ad^&jg
z+P;B&DVhP%x-vb!?Mzi1*Ol!o<THid^lb_>OBKg=^%cOXBc1QarF%1_XtpX&sKkNM
z9GKH19o1ZjmdSPYb!Bt6I?!cLDVm4QGk4NGVPNNz<f+a~f1v}$8q9<D0*IS=D%0r&
zS_p%?VP7N_{R6&YG2~{28|fvgSl^xQEKtLxAnfkxyXy&;fe>c*rRA`1Ji03&@~xh}
zn-IuZGlkl0w=b-O=2;jU{t;`w3Yu5qp!sUB@yPB0`o@|v!kJ=i8R0r=K}S95-kahb
ztq_=Mc#F58UpDKadL7Qo*FlL~U%{8!!LF~Tt0U&H9@RuR;tgO9g<WAIOb!PEKlm4f
z-Gt8i;g?>?&8k?}-*=ZIy@iwm*<6?oTTvcJXY;|YwxQR7Okr>!=i6=vLEdD$3w}cF
z06`vxz{GW0?u0kxG5Y11ybGe`d$OGwy7z9j?@jjv>+gYR`9gZYv)hZIcYOA-V(@0Z
zb0FJ~dAc876Bph1AAlP9!E6BzbP&y;Du&^ZJTVX)hJ9!bM|=eALTOzoItq57*l`O+
zFGa_asC>E`Y_%#*j;Rt-9*5@@y8C4yBWQ<b*UJfrRB*$8l6aW1hh{!SSqAQT6P;#H
z_xdwkh*K~Og*^i^-oc2)h_mS8j_=|eSmD`Go<|pVTo)H$%e#g#)0yldbiB)02b;hY
zmqO9iUWN=(i+)Pc6_lQW!?<~N6{PpF8F&*UC`H%6*cDy%^|<U<-B5$YW*4~IT^MKl
zM#wfbh}5PQ0ov3dESq|SV>1R(*fd1-$}}R}nz0D5rYUMvW*h>lX^xtd86S;PW<u1g
z%tS;=GYJvWOh#NZQxFf$R0KgYEt;at^k}LwGZ5a)OawJE3z5vsj%F$|2QkacMUXP{
z5R}aPXs$8~5PHnQXudLw5Mj*XXrVGoqD9IqjTS4jELx(>as&>uB3h<QDq610%4mf$
ztD=-LtD}|5tcg}Bvldg`v|xIh)@ZFVZBdId>!Ma=+M_mQ)<^4<*$}lWvoTt)%%*6A
zGMl4~%51@;Gh3t0%51|VGuxxB%It`?DYG-$uFS4zhcdgPoyzQqb}6$L)57eF_9(MI
z+N;cgXrD3%;qvBCbU>NIaBp)2o^6i8sm-zIh%y=;Y>vZcO$0ABC!*uZoQxu6PDLk_
zIUSu;<_x^foQ+N^a}Mrh&PQjJxe%RG=3;bSnM=_HWiCe-mAL{(F;}C@%3On2nCsD1
zWp2O)Oh<HGnJ1zf%A^O2^>BYoh0^s>%Kg_$tCvc;T2}$q5N58IbQO_lJb_BDk^-Lf
zYN3+KjIUf1SFRmi+2Sj8tF8t=vS`ya=%|WicGRkCr2-f}c&6)OrtKtj;;>Fjjc@LV
zQ@W1Sdc8pD$T3Dg(&GlHXcM@xviB+$L#LZaAuZ}flQF~f8Rtn7`m|Sa99f@bq1|S0
z2d6>kIaH_g7ExEI^wwY_F_x!I=~_{wK?ui6=S?)7UZU#&SLk}cl)#mGj2Jnkw@G=s
zZb01*LRzRnH+p?{O7$*1)>Z7*O}=7}RP5E`TtyX&Vj*D-C0qxNKPte}QD|XFOjE!J
zuULzHqQ!oJ2Lv7zcu3%3fky-$6?jab7I<7B<_okrK}c;XJhquw_FjP<AFwO@DA1Vn
z#ugfo-ULNx#FLbnOixik@(~I?2!Z3wDmG%fDI(0WFk+|0uRtz%lAj5FewIA|lhWtd
z3nWC@DL~UXhI)Ds^?WSqxk^V%x^7Bul>RReI$qR22zOB`FA2OX&|$8~^HqV@1fCIi
zU3_ac`zte}#IvP3p1VRFB)-8q(W_(Fyh-UNq-9#*O@UhkZWh=nuuEV@;8wzN=S~wn
zfmmn!vVOYTV^jK;2%Z)wbI5UE=<ysH0|yh^)ww|R!(70{C5)g@uyf}Eo?$lbT%Mf6
zh1fVL^mf|!hU&5~DIc<|$lDsseZzZ7O1FzMZuD;A0%pLr&@(aY&V}4av5qr5V1rXz
zPIQDU!XfSkbwb3VI0yhJWA=m2MTyW~6<gDsHy8|Ph8ErI2i)~q6{19|P<Z>8*gm&K
zO}BQS`HC<_l8&r<V)FK3UW6vW#Tp)7=|&)ocW>N8N%z$(Q;p<6=v+)C%ZFn@1z=Fd
zhuZ>UzYi7zU1H6e?3utq9AmaVGjhAKP2C8=<ed5<RU!uT#ZvVTSNF-dvP~2lkgoC+
zC?Won;z2vgAqLurWW7SZgHZHzOq_GLkxM}j*h*VMZwu&^V4kQ4mhKr^)=SGV0+$Lb
zNIf(h^u}>5;t+L5sO}1UO5i<#Umyfq*KHWpII70;N-Rwa_WIca_jGX<G29t9rJt5L
zBa0YWw$R1d=9FIu(Fs>Xr-h!#u2~&c3;ji!OU(C-*jc(Dr+EAl>xJ)KmdIGmX`<Au
zpj6;YhoG&KRxB?8&jypV`2Ha<{f-{?C29Ax46}HnQ_Q<lEKX92#58)WDft|`kj{qg
z#iXWE=&3QW+n|`(!(4fW7%pJ76oq%Ks8A>I?0G^e@_bl+!^XpJH2et7@SW)wf`yW^
zI;FoX#`y{%xk=~=qdPMew1tiJi(b$;9Kt=^G)BAfEKliYh3U{)5%!g?D^hVHT7hxg
zaFNc16*rYY6xNE?SfC9eP#h>_VStSw2i?!<9@gTQNK56`l%=KE-a-%6^1SGTb`R0=
zWzq5#!eOR+HJ0?N!{btIT)9ou_*$^58yfT|_#ogMPhK%NDZQL%T@1>}gSg3hveRD|
z*0Eg&gEGmH(sN@;UK4V$qw9w%g&<xI#vvBAixY;~lmaFDj2F&U@04uF)i!K`Nys(X
zt(<-U=EXvcys}KK^pMsNv_eE-C-kJacQ+F}_xUmRC%r}D@k%(sH)5*SWxQS+K5E~L
zNxmhFs{N>?u<N1nQtuoA#n=aBHPvrOlM-1t5<Jjfr8W$7coU?RobSivWl>Z5O>)uS
z7Wf^3-xc^hf!`PSmcX|KzC&2<)!U_xSc(vqV%RU)%Nx>mh{UdK3NwNtg!jml{()FP
z+y!i0cY!K)!Sd90<j#A%I6w5_a4?g)r?k{K;o`;985QPpiv5V)yXj9Apj3@__mqBD
z*hx8rk-@mHAImV;34BlBPXsm!beaU6e=0h^68dzJl7VWvC?Bi&N|<S~;!qeDw!EU{
z7&cT<si{!Zw{+2vXAhMU{l3EN`Pe>{BI@(r&gB^gyHhV$1M$kiI^}jnP6eT<WL8mM
zp>=qDzaLEf{;=V#h>ssUMBJ2?nhM^Dh25=b!CN6`>_f@vE3q1{?DR=FNf1@GNx(VD
zNQ5$&LT`@r&-^~hS%Mg$<ik#(VQUl?c#+FX%c<D3KNsG=2)tVYOL?0ir&A;B$=oiD
z+c&<RBv?Qa|5DQ8Pb|>hy`7{yoqH*9l9dmNsPG3AN)&bx_P%UDVw2tXqR|}~)!Tfm
z4~^D~)$UUOXOIM|5UkmjB8MGkr2y~AdzOd+3iJUE7^xt?ld6p`S!F$|$PacR=kZd3
zcQxO$TYLOTO;||S<pdBqaX-)#r4gF?Mw3v!uO|x@bRJ9AZF-7e(L%66;i-Z}y99?S
zazGj4hm)grP1bFCnvkQBs|H=XdPtP^l6^R2G#*rr#)HQijR&83G(?O))M$J-;%Geh
z1f%gle}1Fkg296jmp&?o48QDD#H9}(8keL7CPb^SCWZHQV7V^Sao{TpL!21x{pORs
z*_79xA#XM}%G#6zpk5Rt_?XiAX~o4~UbN`2;YACQGYWdiZ-;gxZZGsNeRWFzYJ{a$
z-jKBH*W8rw=1uNU^x~MsuO-gJVX1;@N07tYHB+qZ>Q3jwZ#do0AWmRdc|NZ3O0Mxj
zj%cqm4#5!b6^>?=9)}C3r5b0WJYM5lug0;J#%Tr*FEzx~cx&a`DBjkT#v6sk`zwzE
zH8$_uWovA5HMXLR{2W!<l^WngTIr>@mf^yVUjE<<+!*47TIrQq4Zg&00!(iZH7RBg
zF|Z*EHcvFg?^F5Fno`P9n&X$Pd(UvVRUUHSC$1L*Gn#BYu94)S2_C^1Ka{=pIkCde
zZEpuwBS|xgdias<J*bBvrh53b?qifN7%bsuyWf;c`2Fs;p%Z#Y()2@2j$irynWXzb
zwjb94TnBL-!gUze5nM-c9m9p=dVL%hj_P?(&%ws;gTF(+a5+kW9}52mB{&~T<CUGD
zs;bDHt_MlkVuG>@1xp_!*+gYmCCKQRo^+D3ZNBCru$ORM#&rc3PPz3pTsX7V*KuJ0
z$$YZ1`+W<}luu~jDas!8tvRPShd6&IJf%v`2QT4NWiLnzPBtntP1&ms0S-h=SGKDH
zF+<tG3dBrhpLPgLR5qEV><fY@PxsH@yR()3$g#)Bll>fJ|EK~nSJ^+VK+IG2Pb(1f
zmHkUWIEh9w>toEK1<L-lW1Dl5EL8R%90K8qqAgPPKOWJ~DDz{nvj3H6U56q2l#(t{
zwz1lI?nnVQIh_~~m*W~Tt(Gb~HNjydIY!ZBnX>bJYX)-$@(`2}i0SCdm0cke^iG;?
zg|a&x0xnBLO4*a12LnEWJlm~Q_L3v>vu~BMw;jT}$7=Xz1!9e|&pHGa4{~0sY{?<8
zf)LT7>|26J$@&5;yJ}VTT}KArAX%HT4?NjuR(%XlS_h9!a2m<FkHMu~*}r%Mmq)A%
z>y`bh3Kcgf`*)9E`51z2RQ4Yq!Q~NxZBq7MonWvuZNFLB|8xkf@<eP=_IHUshOQDL
zxb#+K>uNYDxJe+3ZOSfi2skGZ+m&s12y7pS*rDt;K|DNej4}&$D!Vt)`eE}qY<W0K
z!d542a~`$DNp5RKP<fZK$48NXi$sV8K?*J!zweC~W6h9F*KTFcJJYu~)9+FC`Y56g
z75n#KyuHd=ClW@E-mp*ECr1(K|I?a@J?MUA?<S;7#uk`19`DDP#s`#r`Sam)P}$cL
zoc>TVVTg|%QudvM6iLejcN%3jA6EALN3?znn;ud2H=mu$QDuMo*|{83_RkVr9yThE
zp_NwlZxftGvg~7UIj-zKCAfSBc_L;1J;4d_h70-$W&e9P_N21awX!Vu4a6yB#|gp(
zeZR7h!D(!394WTaj0k6x-IXAv#N6rn{?01<>ceZ#Df>5xTHofpvVT7udqLTM5iD7=
z<BQ7vn?v9QmxxQs{%@k)ryPOH$~M%AE%44vwpXwXbO;<W5OG!6IfD39E8;a}7bRLh
zY=pV`J%+zrS9YZn4$B~gyP@p5Bm$oo_M~rSdgSv$T~(|*7@azIxx^#D69BaU2<m(s
zLoktGJi!El83gSFjRdm@@b#?w%%4o~#jJd$@1NxpefNwH^xbFlu2{n_dZhuLdfXGf
zM9*}3B){aB>RS2c+x6o<CU^MitZ!fMz72O+D2K1U)2@I|x^q2!ooUa$248r4p8P`F
zJ=V!5+5TBR$9B*4?o(`6sKE!to<u$ZcF(o^#Mh(c6J9>grb!kOY$e!2Am*&*Qw;!i
zP<))i$98#C#TukvuyqrWhXpyEk9G1mj`T|jwi7v?NPMG8xI*A^ftLw&1JTC>aa|B&
ziD(nVQ9;ZhVwOCvWQ$b<tpqCwmJuu`SVxc|SWD1Cu%2KI!D;{;$PgYBxI|d0Vs$nL
zU2vels)+(O2%I1U^VoPQ!F+<b1k(r>5U@Fi2Jek1;*(^;MuE)&rwLps@U*~-0%r(p
z5I9%hbb)&b$rh(>U^QEiM?A9LWk;~$s-PybyW>JrBd|%}7J*9zjuSXl;5LEV1>PV;
zN41z-8FJhth|T1<u3VR;+v00yqE`!?BXEttwF1uyoGfs?z>NZ{38{-r4uo1V87uIV
zR4pN*ULF_oagRJU^Kpuh?G!jh;7oxF1uho2Okhf2i@?nSw+h@Xa6h50Bd8~sNid0^
z4FGS5geL`_5EuzOKnT&tbZ2u&N#JzCm*S*7C-8#6^8zmkRD^mFNv`oRQ*5}Mxp%j3
zpi7QVZC!m8emb&acx$$eyp6X_yiKspyiK$dc$;J=@iy5`;cbeY#@kdogSTmR7H`w-
z9NuQwdA!ZE3wWDl7x6aRF5zvCUB=s7yMniQb|r7~?P}f@*tNVZw5_}?vg>$TY<Kdu
z)^_rC#`f`c-9F3PZTkXmIr}<qPun+n`;vW^x3Ahi;qB)L)P=s=nVcxgV|kntSL1xR
z7Ux%WILE5TE6Es~_cq{nrbhgY#c@Cr&diqJe0M26OkRc$m6yxHTaDw`8l2=#!MEW}
zI7yw3-+acQ)lw|X+w}%;*oM|SaaOzsRQvEP`F{LLbO7t>L2x+)E{E|=_YrV8s@LMs
zumy*P>v3qf28V{LacDSCU&I-~C7dr`#%tsioJC*7x!N_HPxtH1`WZ;{8qS(u$ARLv
zAo-6V`MX%kevB8p_aMnna2EZmK_s%c95q>-Bt^|7r|6oBBpAIak*K5BFck$k9lEtx
zPDIn|63JkCqt8~+o63nvdUGOWNpJC)9FlLLw+>BK(c3E$H1v*SGLhbuOm)$_hh%r@
zz2$5<eKMrb=u?T*Dt$W9p`^m-v&oD#eJ+uDq|cW#+w=vWxutJ-ByxUqM@2rJegdWl
zS#mm^%v;kp%c)qpGm$c*yDBnWbfzMoO?MB?;nTM&^U}15yFrxF*<{X<w!@O$bWb96
zNcSc(Wpv+&8A1BVWJ01INTvzue8{%ag*ahN4_2fO>N`yGOJ?-xyA_F8`l-r1EPXFH
zfb)<qR3v=qFAmEl)6Wh|Y}3yT&ui1q56|+`FI1+w=`Y7+$wSg#smxu{FIHqQ>X#}L
z*YwNfOf~%q<_Pi{-Gq5{Xj+v1n$Nt{#d0#0{yN;LoYJYkQISxlzd1BrQom7|p{7ff
zDWLkzL^7oQb}|!De}|=zeyG2j%rVs8tIQ+R-;b^0hUBee&Yga{BF|30Q%;G~KS(CJ
z=^vJp1ocltHkrO(PBPQ)$H`~<gP}=r`e#FvEA=npL^AzhNFCD;${A()qh!{Y{@9I(
ZB!1~%Cey<7uLlt=kgJD$wJJ5(@_*S)Y#0Cl

diff --git a/Lib/lib2to3/PatternGrammar3.11.4.final.0.pickle b/Lib/lib2to3/PatternGrammar3.11.4.final.0.pickle
deleted file mode 100644
index 67e7d83e9194d5e1ddc268c53ef5873b98871f01..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1225
zcmaJ>J#QL85WT%`Fkp-UTeecAxgtv8I!&acnw%9yNs)r$2%8)-hKRdM<N_&8Ty44e
zJNa#Sv%6qO0me@|UvFlI+4&Xy`5SuXv(cIARuA{n)#7H^+)w3Nvnt7p`b!BR_k=om
zy{P4SS=93%Qs+Xb&Yq&uxi3`mM%KlA0U?0@t^8Kha`tICuXP>@m41-lB{ZOmgbJ-b
z7LDrKlsC2j&WwCzfE;BA+A;!N8H1us5Lu>qEXr1+;;IH)r6)QwUlJr9burQ8lWz~6
zIY{e4Dch;V$f|vrJfpRq)OJJ}osfr+17pV-k<Ml5Qm{?O2S*a^NCI}Gn7t^W&}2t+
zWP&p4u?jh0Mko#t`4*?<VA(S2Gaa2795A8vbvHhDC1f--Y$Nt#k8_uXjK<ZLQDAt&
zV}+r@*i|^D1i5XU7nJT*o6%Nw*Rn+;@g@7=8vo~NA$r#YZ|8JK4ksz43TI!7YC8|T
zjpd?{d&kw`KOxaP@-&vCBWBFdvm<6q*Yk}g{IC_cFlH|xAp=X8w^tmB_H*-TU=v~j
z-|aI#)q=TP6jO;uFhQ87qPO|`kDoQDd~wU2V@McU3_=CVRSh#4YXBW8V2lk4qaLv9
zkYU88z$jCe`wTsXD+ZsT!*I<o2H*;OYy+f9@8rYJ)q2KVEq_x~skmO<$))KsM$bn_
z13VG#F$NaW0B>N3-i)Y=2Xl%>^f3u%bOno#@#?^O4eJ<xZMtk!$L$f*1+zvD7X_|-
iuo@_JyMS7UJBHd%3x#@n>w_*D72)x(u~$6Ny!;2V#C%Zz

diff --git a/Lib/traceback.py b/Lib/traceback.py
index 816b021c28370a..ec0187abbd89f6 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -319,6 +319,15 @@ def _original_line(self):
         self.line
         return self._line
 
+    @property
+    def _dedented_lines(self):
+        # Returns _original_line, but dedented
+        self.line
+        if self._line_dedented is None:
+            if self._line is not None:
+                self._line_dedented = textwrap.dedent(self._line).rstrip()
+        return self._line_dedented
+
     @property
     def line(self):
         if self._line is None:
@@ -329,10 +338,8 @@ def line(self):
             for lineno in range(self.lineno, end_lineno + 1):
                 # treat errors and empty lines as the same
                 self._line += linecache.getline(self.filename, lineno).rstrip() + "\n"
-        if self._line_dedented is None:
-            if self._line is not None:
-                self._line_dedented = textwrap.dedent(self._line).rstrip()
-        return self._line_dedented
+        # return only the first line
+        return self._line.partition("\n")[0].strip()
 
 def walk_stack(f):
     """Walk a stack yielding the frame and line number for each frame.
@@ -476,49 +483,47 @@ def format_frame_summary(self, frame_summary):
         row = []
         row.append('  File "{}", line {}, in {}\n'.format(
             frame_summary.filename, frame_summary.lineno, frame_summary.name))
-        if frame_summary.line:
+        if frame_summary._dedented_lines:
             if (
                 frame_summary.colno is None or
                 frame_summary.end_colno is None
             ):
-                # only output first line
-                row.append(textwrap.indent(frame_summary.line.partition('\n')[0], '    ') + "\n")
+                # only output first line if column information is missing
+                row.append(textwrap.indent(frame_summary.line, '    ') + "\n")
             else:
-                all_lines_original = frame_summary._original_line.splitlines()[
-                    :frame_summary.end_lineno - frame_summary.lineno + 1
-                ]
-                colno = 0 if frame_summary.colno is None else frame_summary.colno
-                end_colno = len(all_lines_original[-1]) if frame_summary.end_colno is None else frame_summary.end_colno
-                # character index of the start of the instruction
-                start_offset = _byte_offset_to_character_offset(
-                    all_lines_original[0], frame_summary.colno
-                )
-                # character index of the end of the instruction
-                end_offset = _byte_offset_to_character_offset(
-                    all_lines_original[-1], frame_summary.end_colno
-                )
+                # get first and last line
+                all_lines_original = frame_summary._original_line.splitlines()
+                first_line = all_lines_original[0]
+                last_line = all_lines_original[frame_summary.end_lineno - frame_summary.lineno]
 
-                all_lines = frame_summary.line.splitlines()[
+                # character index of the start/end of the instruction
+                start_offset = _byte_offset_to_character_offset(first_line, frame_summary.colno)
+                end_offset = _byte_offset_to_character_offset(last_line, frame_summary.end_colno)
+
+                all_lines = frame_summary._dedented_lines.splitlines()[
                     :frame_summary.end_lineno - frame_summary.lineno + 1
                 ]
+
                 # adjust start/end offset based on dedent
-                dedent_characters = len(all_lines_original[0]) - len(all_lines[0])
+                dedent_characters = len(first_line) - len(all_lines[0])
                 start_offset -= dedent_characters
                 end_offset -= dedent_characters
                 start_offset = max(0, start_offset)
                 end_offset = max(0, end_offset)
 
+                # get exact code segment corresponding to the instruction
                 segment = "\n".join(all_lines)
                 segment = segment[start_offset:len(segment) - (len(all_lines[-1]) - end_offset)]
 
+                # attempt to parse for anchors
                 anchors: Optional[_Anchors] = None
                 try:
                     anchors = _extract_caret_anchors_from_line_segment(segment)
                 except AssertionError:
                     pass
 
-                show_carets = False
                 # only use carets if there are anchors or the carets do not span all lines
+                show_carets = False
                 if anchors or all_lines[0][:start_offset].lstrip() or all_lines[-1][end_offset:].rstrip():
                     show_carets = True
 
@@ -534,7 +539,8 @@ def format_frame_summary(self, frame_summary):
                 if anchors:
                     anchors_left_end_offset = anchors.left_end_offset
                     anchors_right_start_offset = anchors.right_start_offset
-                    # anchor positions do not take start_offset into account
+                    # computed anchor positions do not take start_offset into account,
+                    # so account for it here
                     if anchors.left_end_lineno == 0:
                         anchors_left_end_offset += start_offset
                     if anchors.right_start_lineno == 0:
@@ -548,19 +554,22 @@ def format_frame_summary(self, frame_summary):
                         range(anchors.right_start_lineno - 1, anchors.right_start_lineno + 2)
                     )
 
+                # remove bad line numbers
                 significant_lines.discard(-1)
                 significant_lines.discard(len(all_lines))
 
+                # output all_lines[lineno] along with carets
                 def output_line(lineno):
                     result.append(all_lines[lineno] + "\n")
                     if not show_carets:
                         return
                     num_spaces = len(all_lines[lineno]) - len(all_lines[lineno].lstrip())
                     carets = []
-                    for col in range(len(all_lines[lineno])):
-                        if lineno == len(all_lines) - 1 and col >= end_offset:
-                            break
-                        elif col < num_spaces or (lineno == 0 and col < start_offset):
+                    num_carets = end_offset if lineno == len(all_lines) - 1 else len(all_lines[lineno])
+                    # compute caret character for each position
+                    for col in range(num_carets):
+                        if col < num_spaces or (lineno == 0 and col < start_offset):
+                            # before first non-ws char of the line, or before start of instruction
                             carets.append(' ')
                         elif anchors and (
                             lineno > anchors.left_end_lineno or
@@ -569,6 +578,7 @@ def output_line(lineno):
                             lineno < anchors.right_start_lineno or
                             (lineno == anchors.right_start_lineno and col < anchors_right_start_offset)
                         ):
+                            # within anchors
                             carets.append(secondary_char)
                         else:
                             carets.append(primary_char)
@@ -673,9 +683,10 @@ def _extract_caret_anchors_from_line_segment(segment):
     import ast
 
     try:
-        # Without brackets, `segment` is parsed as a statement.
-        # We expect an expression, so wrap `segment` in
-        # brackets to handle multi-line expressions.
+        # Without parentheses, `segment` is parsed as a statement.
+        # Binary ops, subscripts, and calls are expressions, so
+        # we can wrap them with parentheses to parse them as
+        # (possibly multi-line) expressions.
         tree = ast.parse("(\n" + segment + "\n)")
     except SyntaxError:
         return None
diff --git a/Python/traceback.c b/Python/traceback.c
index e78e5b15a3889b..dffe86cd65875f 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -650,13 +650,16 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent,
  *  TypeError: 'NoneType' object is not subscriptable
  */
 
-// helper functions for anchor extraction
+// The below functions are helper functions for anchor extraction
+
+// Get segment_lines[lineno] in C string form
 static const char
 *_get_segment_str(PyObject *segment_lines, Py_ssize_t lineno, Py_ssize_t *size)
 {
     return PyUnicode_AsUTF8AndSize(PyList_GET_ITEM(segment_lines, lineno), size);
 }
 
+// Gets the next valid offset in segment_lines[lineno], if the current offset is not valid
 static int
 _next_valid_offset(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offset)
 {
@@ -679,6 +682,7 @@ _next_valid_offset(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offs
     return 0;
 }
 
+// Get the next valid offset
 static int
 _increment_offset(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offset)
 {
@@ -686,6 +690,7 @@ _increment_offset(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offse
     return _next_valid_offset(segment_lines, lineno, offset);
 }
 
+// Get the next valid offset at least on the next line
 static int
 _nextline(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offset)
 {
@@ -694,6 +699,7 @@ _nextline(PyObject *segment_lines, Py_ssize_t *lineno, Py_ssize_t *offset)
     return _next_valid_offset(segment_lines, lineno, offset);
 }
 
+// Get the next valid non-"\#" character that satisfies the stop predicate
 static int
 _increment_until(PyObject *segment_lines, Py_ssize_t *lineno,
                  Py_ssize_t *offset, int (*stop)(char))
@@ -721,6 +727,7 @@ _increment_until(PyObject *segment_lines, Py_ssize_t *lineno,
     return 0;
 }
 
+// is the character a binary op character? (not whitespace or closing paren)
 static int
 _is_op_char(char ch)
 {
@@ -750,6 +757,8 @@ extract_anchors_from_expr(PyObject *segment_lines, expr_ty expr,
 {
     switch (expr->kind) {
         case BinOp_kind: {
+            // anchor begin: first binary op char after left subexpression
+            // anchor end: 1 or 2 characters after anchor begin
             expr_ty left = expr->v.BinOp.left;
             expr_ty right = expr->v.BinOp.right;
             *left_anchor_lineno = left->end_lineno - 2;
@@ -786,7 +795,7 @@ extract_anchors_from_expr(PyObject *segment_lines, expr_ty expr,
                 )
             ) {
                 char ch = segment_str[*right_anchor_col];
-                if (!IS_WHITESPACE(ch) && ch != '\\' && ch != '#') {
+                if (_is_op_char(ch) && ch != '\\' && ch != '#') {
                     ++*right_anchor_col;
                 }
             }
@@ -796,6 +805,8 @@ extract_anchors_from_expr(PyObject *segment_lines, expr_ty expr,
             return 1;
         }
         case Subscript_kind: {
+            // anchor begin: first "[" after the value subexpression
+            // anchor end: end of the entire subscript expression
             *left_anchor_lineno = expr->v.Subscript.value->end_lineno - 2;
             *left_anchor_col = expr->v.Subscript.value->end_col_offset;
             if (_next_valid_offset(
@@ -817,6 +828,7 @@ extract_anchors_from_expr(PyObject *segment_lines, expr_ty expr,
             return 1;
         }
         case Call_kind:
+            // anchor positions determined similarly to Subscript
             *left_anchor_lineno = expr->v.Call.func->end_lineno - 2;
             *left_anchor_col = expr->v.Call.func->end_col_offset;
             if (_next_valid_offset(
@@ -859,6 +871,10 @@ extract_anchors_from_stmt(PyObject *segment_lines, stmt_ty statement,
     }
 }
 
+// Returns:
+// 1 if anchors were found
+// 0 if anchors could not be computed
+// -1 on error
 static int
 extract_anchors_from_line(PyObject *filename, PyObject *lines,
                           Py_ssize_t start_offset, Py_ssize_t end_offset,
@@ -935,6 +951,7 @@ extract_anchors_from_line(PyObject *filename, PyObject *lines,
                                             &flags, arena);
     if (!module) {
         if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_SyntaxError)) {
+            // AST parsing failed due to SyntaxError - ignore it
             PyErr_Clear();
             res = 0;
         }
@@ -1020,7 +1037,7 @@ static void significant_lines_append(SignificantLines* sl, Py_ssize_t line, Py_s
 
 static int significant_lines_compare(const void *a, const void *b)
 {
-    return *(Py_ssize_t *)a - *(Py_ssize_t *)b;
+    return (int)(*(Py_ssize_t *)a - *(Py_ssize_t *)b);
 }
 
 // sort lines and remove duplicate lines
@@ -1039,13 +1056,14 @@ static void significant_lines_process(SignificantLines *sl)
     sl->size = idx;
 }
 
+// output lines[lineno] along with carets
 static int
 print_error_location_carets(PyObject *lines, Py_ssize_t lineno,
-                              Py_ssize_t start_offset, Py_ssize_t end_offset,
-                              Py_ssize_t left_end_lineno, Py_ssize_t right_start_lineno,
-                              Py_ssize_t left_end_offset, Py_ssize_t right_start_offset,
-                              const char *primary, const char *secondary,
-                              PyObject *f, int indent, int margin_indent, const char *margin)
+                            Py_ssize_t start_offset, Py_ssize_t end_offset,
+                            Py_ssize_t left_end_lineno, Py_ssize_t right_start_lineno,
+                            Py_ssize_t left_end_offset, Py_ssize_t right_start_offset,
+                            const char *primary, const char *secondary,
+                            PyObject *f, int indent, int margin_indent, const char *margin)
 {
     Py_ssize_t num_lines = PyList_Size(lines);
     PyObject *line = PyList_GET_ITEM(lines, lineno);
@@ -1069,15 +1087,15 @@ print_error_location_carets(PyObject *lines, Py_ssize_t lineno,
                 has_non_ws = 1;
             }
         }
-        if (lineno == num_lines - 1 && col >= end_offset) {
-            break;
-        } else if (!has_non_ws || (lineno == 0 && col < start_offset)) {
+        if (!has_non_ws || (lineno == 0 && col < start_offset)) {
+            // before first non-ws char of the line, or before start of instruction
             ch = " ";
         } else if (
             special_chars &&
             (lineno > left_end_lineno || (lineno == left_end_lineno && col >= left_end_offset)) &&
             (lineno < right_start_lineno || (lineno == right_start_lineno && col < right_start_offset))
         ) {
+            // within anchors
             ch = secondary;
         } // else ch = primary
 
@@ -1248,6 +1266,7 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     }
 
     if (start_line < 0) {
+        // in case something went wrong
         start_line = lineno;
     }
     // only fetch first line if location information is missing
@@ -1285,7 +1304,7 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     // When displaying errors, we will use the following generic structure:
     //
     //  ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE
-    //        ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^~~~~~~~~~~~~~~~~~~~
+    //        ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^~~~~~~~~~~~~~~~~~~~
     //        |              |-> left_end_offset     |                  |-> end_offset
     //        |-> start_offset                       |-> right_start_offset
     //
@@ -1296,8 +1315,11 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
     // AST information or we cannot identify special ranges within it, then left_end_offset and
     // right_end_offset will be set to -1.
     //
+    // To support displaying errors that span multiple lines, *left_end_lineno* and
+    // *right_start_lineno* contain the line numbers of the special ranges.
+    //
     // To keep the column indicators pertinent, they are not shown when the primary character
-    // spans the whole line.
+    // spans all of the error lines.
 
     PyObject *lines_original_split = PyUnicode_Splitlines(lines_original, 0);
     assert(PyList_Size(lines_original_split) == num_lines);
@@ -1398,7 +1420,7 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
                 }
             } else if (linediff > 2) {
                 // more than 1 line in between - abbreviate
-                PyObject *abbrv_str = PyUnicode_FromFormat("...<%d lines>...", linediff - 1);
+                PyObject *abbrv_str = PyUnicode_FromFormat("...<%d lines>...", (int)linediff - 1);
                 if (!abbrv_str) {
                     goto error;
                 }

From bd8a837b0838ad76a049d7399505b4bc4dadbc35 Mon Sep 17 00:00:00 2001
From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com>
Date: Thu, 21 Sep 2023 22:41:46 +0000
Subject: [PATCH 14/14] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?=
 =?UTF-8?q?lurb=5Fit.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../next/Library/2023-09-21-22-41-45.gh-issue-106922.qslOVH.rst  | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 Misc/NEWS.d/next/Library/2023-09-21-22-41-45.gh-issue-106922.qslOVH.rst

diff --git a/Misc/NEWS.d/next/Library/2023-09-21-22-41-45.gh-issue-106922.qslOVH.rst b/Misc/NEWS.d/next/Library/2023-09-21-22-41-45.gh-issue-106922.qslOVH.rst
new file mode 100644
index 00000000000000..a6dae826c45598
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-09-21-22-41-45.gh-issue-106922.qslOVH.rst
@@ -0,0 +1 @@
+Display multiple lines with `traceback` when errors span multiple lines.