7
7
8
8
#include " llvm-version.h"
9
9
#include " support/dtypes.h"
10
+ #include < sstream>
10
11
11
12
#include < llvm/Pass.h>
12
13
#include < llvm/IR/Module.h>
@@ -99,23 +100,41 @@ void LowerPTLS::runOnFunction(LLVMContext &ctx, Module &M, Function *F,
99
100
ptlsStates->addAttribute (AttributeSet::FunctionIndex,
100
101
Attribute::NoUnwind);
101
102
}
102
- else if (jl_tls_offset != -1 ) {
103
103
#ifdef LLVM37
104
+ else if (jl_tls_offset != -1 ) {
104
105
auto T_int8 = Type::getInt8Ty (ctx);
105
106
auto T_pint8 = PointerType::get (T_int8, 0 );
106
- auto T_size = (sizeof (size_t ) == 8 ? Type::getInt64Ty (ctx) :
107
- Type::getInt32Ty (ctx));
108
107
// Replace the function call with inline assembly if we know
109
108
// how to generate it.
110
- const char *asm_str = nullptr ;
109
+ # if defined(_CPU_X86_64_) || defined(_CPU_X86_)
110
+ // Workaround LLVM bug by hiding the offset computation
111
+ // (and therefore the optimization opportunity) from LLVM.
112
+ static const std::string asm_str = [&] () {
113
+ std::stringstream stm;
111
114
# if defined(_CPU_X86_64_)
112
- asm_str = " movq %fs:0, $0" ;
113
- # elif defined(_CPU_X86_)
114
- asm_str = " movl %gs:0, $0" ;
115
- # elif defined(_CPU_AARCH64_)
116
- asm_str = " mrs $0, tpidr_el0" ;
115
+ stm << " movq %fs:0, $0;\n addq $$" << jl_tls_offset << " , $0" ;
116
+ # else
117
+ stm << " movl %gs:0, $0;\n addl $$" << jl_tls_offset << " , $0" ;
117
118
# endif
118
- assert (asm_str && " Cannot emit thread pointer for this architecture." );
119
+ return stm.str ();
120
+ }();
121
+ // The add instruction clobbers flags
122
+ auto tp = InlineAsm::get (FunctionType::get (T_pint8, false ),
123
+ asm_str.c_str (),
124
+ " =r,~{dirflag},~{fpsr},~{flags}" , false );
125
+ Value *tls = CallInst::Create (tp, " ptls_i8" , ptlsStates);
126
+ tls = new BitCastInst (tls, PointerType::get (T_ppjlvalue, 0 ),
127
+ " ptls" , ptlsStates);
128
+ # elif defined(_CPU_AARCH64_)
129
+ // AArch64 doesn't seem to have this issue.
130
+ // (Possibly because there are many more registers and the offset is
131
+ // positive and small)
132
+ // It's also harder to emit the offset in a generic way on AArch64
133
+ // (need to generate one or two `add` with shift) so let llvm emit
134
+ // the add for now.
135
+ auto T_size = (sizeof (size_t ) == 8 ? Type::getInt64Ty (ctx) :
136
+ Type::getInt32Ty (ctx));
137
+ const char *asm_str = " mrs $0, tpidr_el0" ;
119
138
auto offset = ConstantInt::getSigned (T_size, jl_tls_offset);
120
139
auto tp = InlineAsm::get (FunctionType::get (T_pint8, false ),
121
140
asm_str, " =r" , false );
@@ -124,10 +143,14 @@ void LowerPTLS::runOnFunction(LLVMContext &ctx, Module &M, Function *F,
124
143
" ptls_i8" , ptlsStates);
125
144
tls = new BitCastInst (tls, PointerType::get (T_ppjlvalue, 0 ),
126
145
" ptls" , ptlsStates);
146
+ # else
147
+ Value *tls = nullptr ;
148
+ assert (0 && " Cannot emit thread pointer for this architecture." );
149
+ # endif
127
150
ptlsStates->replaceAllUsesWith (tls);
128
151
ptlsStates->eraseFromParent ();
129
- #endif
130
152
}
153
+ #endif
131
154
else {
132
155
ptlsStates->addAttribute (AttributeSet::FunctionIndex,
133
156
Attribute::ReadNone);
0 commit comments