Skip to content

Commit e09bbae

Browse files
author
erifan01
committed
cmd/compile: enable address folding for global symbols of shared library
Address folding is disabled in CL42172, the commit message of which said that "In shared library, load/store of global is rewritten to using GOT and temp register, which conflicts with the use of temp register for assembling large offset.". Actually this doesn't happen because the sequence of instructions when rewritten to use Got looks like this: MOVD $sym, Rx becomes MOVD sym@GOT, Rx If there is an offset off, there will be one more instruction: ADD $off, Rx, Rx And MOVD sym, Rx becomes MOVD sym@GOT, REGTMP MOVx (REGTMP), Ry If there is a small offset off, it becomes: MOVD sym@GOT, REGTMP MOVx (REGTMP)off, Ry If off is very large, it becomes: MOVD sym@GOT, REGTMP MOVD $off, Rt ADD Rt, REGTMP MOVx (REGTMP), Ry We can see that the address can be calculated correctly, and testing on darwin/arm64 confirms this. Removing this restriction is beneficial to further optimize the sequence of "ADRP+ADD+LD/ST" to "ADRP+LD/ST(offset), so this CL removes it. Change-Id: I0e9f7bc1723e0a027f32cf0ae2c41cd6df49defe Reviewed-on: https://go-review.googlesource.com/c/go/+/445535 Reviewed-by: Cherry Mui <[email protected]> Run-TryBot: Eric Fang <[email protected]> Reviewed-by: Heschi Kreinick <[email protected]> TryBot-Result: Gopher Robot <[email protected]>
1 parent 8a9e2d9 commit e09bbae

File tree

2 files changed

+132
-209
lines changed

2 files changed

+132
-209
lines changed

src/cmd/compile/internal/ssa/_gen/ARM64.rules

+44-88
Original file line numberDiff line numberDiff line change
@@ -837,35 +837,25 @@
837837
(MOVDaddr [int32(off1)+off2] {sym} ptr)
838838

839839
// fold address into load/store
840-
(MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
841-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
840+
(MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) =>
842841
(MOVBload [off1+int32(off2)] {sym} ptr mem)
843-
(MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
844-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
842+
(MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) =>
845843
(MOVBUload [off1+int32(off2)] {sym} ptr mem)
846-
(MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
847-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
844+
(MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) =>
848845
(MOVHload [off1+int32(off2)] {sym} ptr mem)
849-
(MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
850-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
846+
(MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) =>
851847
(MOVHUload [off1+int32(off2)] {sym} ptr mem)
852-
(MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
853-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
848+
(MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) =>
854849
(MOVWload [off1+int32(off2)] {sym} ptr mem)
855-
(MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
856-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
850+
(MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) =>
857851
(MOVWUload [off1+int32(off2)] {sym} ptr mem)
858-
(MOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
859-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
852+
(MOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) =>
860853
(MOVDload [off1+int32(off2)] {sym} ptr mem)
861-
(LDP [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
862-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
854+
(LDP [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) =>
863855
(LDP [off1+int32(off2)] {sym} ptr mem)
864-
(FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
865-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
856+
(FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) =>
866857
(FMOVSload [off1+int32(off2)] {sym} ptr mem)
867-
(FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
868-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
858+
(FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) =>
869859
(FMOVDload [off1+int32(off2)] {sym} ptr mem)
870860

871861
// register indexed load
@@ -930,41 +920,29 @@
930920
(FMOVDloadidx8 ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (FMOVDload ptr [int32(c)<<3] mem)
931921
(FMOVSloadidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (FMOVSload ptr [int32(c)<<2] mem)
932922

933-
(MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
934-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
923+
(MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) =>
935924
(MOVBstore [off1+int32(off2)] {sym} ptr val mem)
936-
(MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
937-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
925+
(MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) =>
938926
(MOVHstore [off1+int32(off2)] {sym} ptr val mem)
939-
(MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
940-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
927+
(MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) =>
941928
(MOVWstore [off1+int32(off2)] {sym} ptr val mem)
942-
(MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
943-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
929+
(MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) =>
944930
(MOVDstore [off1+int32(off2)] {sym} ptr val mem)
945-
(STP [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem) && is32Bit(int64(off1)+off2)
946-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
931+
(STP [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem) && is32Bit(int64(off1)+off2) =>
947932
(STP [off1+int32(off2)] {sym} ptr val1 val2 mem)
948-
(FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
949-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
933+
(FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) =>
950934
(FMOVSstore [off1+int32(off2)] {sym} ptr val mem)
951-
(FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
952-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
935+
(FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) =>
953936
(FMOVDstore [off1+int32(off2)] {sym} ptr val mem)
954-
(MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
955-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
937+
(MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) =>
956938
(MOVBstorezero [off1+int32(off2)] {sym} ptr mem)
957-
(MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
958-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
939+
(MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) =>
959940
(MOVHstorezero [off1+int32(off2)] {sym} ptr mem)
960-
(MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
961-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
941+
(MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) =>
962942
(MOVWstorezero [off1+int32(off2)] {sym} ptr mem)
963-
(MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
964-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
943+
(MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) =>
965944
(MOVDstorezero [off1+int32(off2)] {sym} ptr mem)
966-
(MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
967-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
945+
(MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) =>
968946
(MOVQstorezero [off1+int32(off2)] {sym} ptr mem)
969947

970948
// register indexed store
@@ -1013,93 +991,71 @@
1013991
(FMOVSstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (FMOVSstore [int32(c)<<2] ptr val mem)
1014992

1015993
(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
1016-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1017-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
994+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
1018995
(MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
1019996
(MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
1020-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1021-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
997+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
1022998
(MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
1023999
(MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
1024-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1025-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1000+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10261001
(MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
10271002
(MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
1028-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1029-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1003+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10301004
(MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
10311005
(MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
1032-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1033-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1006+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10341007
(MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
10351008
(MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
1036-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1037-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1009+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10381010
(MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
10391011
(MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
1040-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1041-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1012+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10421013
(MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
10431014
(LDP [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
1044-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1045-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1015+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10461016
(LDP [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
10471017
(FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
1048-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1049-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1018+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10501019
(FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
10511020
(FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
1052-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1053-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1021+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10541022
(FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
10551023

10561024
(MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
1057-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1058-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1025+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10591026
(MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
10601027
(MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
1061-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1062-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1028+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10631029
(MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
10641030
(MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
1065-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1066-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1031+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10671032
(MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
10681033
(MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
1069-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1070-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1034+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10711035
(MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
10721036
(STP [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem)
1073-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1074-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1037+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10751038
(STP [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem)
10761039
(FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
1077-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1078-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1040+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10791041
(FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
10801042
(FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
1081-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1082-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1043+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10831044
(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
10841045
(MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
1085-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1086-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1046+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10871047
(MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
10881048
(MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
1089-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1090-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1049+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10911050
(MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
10921051
(MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
1093-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1094-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1052+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10951053
(MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
10961054
(MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
1097-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1098-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1055+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
10991056
(MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
11001057
(MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
1101-
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
1102-
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
1058+
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
11031059
(MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
11041060

11051061
// store zero

0 commit comments

Comments
 (0)