diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp index c19cf71ba4c6a..c8c011d94e4a3 100644 --- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -662,6 +662,13 @@ static bool canCreateThunkFor(Function *F) { return true; } +/// Copy metadata from one function to another. +static void copyMetadataIfPresent(Function *From, Function *To, StringRef Key) { + if (MDNode *MD = From->getMetadata(Key)) { + To->setMetadata(Key, MD); + } +} + // Replace G with a simple tail call to bitcast(F). Also (unless // MergeFunctionsPDI holds) replace direct uses of G with bitcast(F), // delete G. Under MergeFunctionsPDI, we use G itself for creating @@ -740,6 +747,9 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { } else { NewG->copyAttributesFrom(G); NewG->takeName(G); + // Ensure CFI type metadata is propagated to the new function. + copyMetadataIfPresent(G, NewG, "type"); + copyMetadataIfPresent(G, NewG, "kcfi_type"); removeUsers(G); G->replaceAllUsesWith(NewG); G->eraseFromParent(); @@ -815,6 +825,9 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { F->getAddressSpace(), "", F->getParent()); NewF->copyAttributesFrom(F); NewF->takeName(F); + // Ensure CFI type metadata is propagated to the new function. + copyMetadataIfPresent(F, NewF, "type"); + copyMetadataIfPresent(F, NewF, "kcfi_type"); removeUsers(F); F->replaceAllUsesWith(NewF); diff --git a/llvm/test/Transforms/MergeFunc/cfi-thunk-merging.ll b/llvm/test/Transforms/MergeFunc/cfi-thunk-merging.ll new file mode 100644 index 0000000000000..d35d777282730 --- /dev/null +++ b/llvm/test/Transforms/MergeFunc/cfi-thunk-merging.ll @@ -0,0 +1,210 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 2 +;; Check the cases involving internal CFI instrumented functions where we do not expect functions to be merged. +; RUN: opt -S -passes=mergefunc < %s | FileCheck %s +; RUN: opt -S -passes=mergefunc,lowertypetests < %s | FileCheck --check-prefix=LOWERTYPETESTS %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux" + +@0 = private unnamed_addr constant { i16, i16, [12 x i8] } { i16 -1, i16 0, [12 x i8] c"'int (int)'\00" } + +; Function Attrs: noinline nounwind optnone +define dso_local i32 @f(i32 noundef %arg) #0 !type !3 !type !4 { +entry: + %arg.addr = alloca i32, align 4 + %a = alloca i32, align 4 + %b = alloca i32, align 4 + store i32 %arg, ptr %arg.addr, align 4 + store i32 0, ptr %b, align 4 + %0 = load i32, ptr %arg.addr, align 4 + %cmp = icmp sgt i32 %0, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 1, ptr %a, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %1 = load i32, ptr %a, align 4 + %2 = load i32, ptr %b, align 4 + %add = add nsw i32 %1, %2 + ret i32 %add +} + +; Function Attrs: noinline nounwind optnone +define dso_local i32 @f_thunk(i32 noundef %arg) #0 !type !3 !type !4 { +entry: + %arg.addr = alloca i32, align 4 + %a = alloca i32, align 4 + %b = alloca i32, align 4 + store i32 %arg, ptr %arg.addr, align 4 + store i32 0, ptr %b, align 4 + %0 = load i32, ptr %arg.addr, align 4 + %cmp = icmp sgt i32 %0, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 1, ptr %a, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %1 = load i32, ptr %a, align 4 + %2 = load i32, ptr %b, align 4 + %add = add nsw i32 %1, %2 + ret i32 %add +} + +; Function Attrs: noinline nounwind optnone +define dso_local i32 @g(i32 noundef %b) #0 !type !3 !type !4 { +entry: + %b.addr = alloca i32, align 4 + %fp = alloca ptr, align 8 + store i32 %b, ptr %b.addr, align 4 + %0 = load i32, ptr %b.addr, align 4 + %tobool = icmp ne i32 %0, 0 + %1 = zext i1 %tobool to i64 + %cond = select i1 %tobool, ptr @f, ptr @f_thunk + store ptr %cond, ptr %fp, align 8 + %2 = load ptr, ptr %fp, align 8 + %3 = call i1 @llvm.type.test(ptr %2, metadata !"_ZTSFiiE"), !nosanitize !5 + br i1 %3, label %cont, label %trap, !nosanitize !5 + +trap: ; preds = %entry + call void @llvm.ubsantrap(i8 2) #3, !nosanitize !5 + unreachable, !nosanitize !5 + +cont: ; preds = %entry + %4 = load i32, ptr %b.addr, align 4 + %call = call i32 %2(i32 noundef %4) + ret i32 %call +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i1 @llvm.type.test(ptr, metadata) #1 + +; Function Attrs: cold noreturn nounwind +declare void @llvm.ubsantrap(i8 immarg) #2 + +attributes #0 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #2 = { cold noreturn nounwind } +attributes #3 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 4, !"CFI Canonical Jump Tables", i32 0} +!3 = !{i64 0, !"_ZTSFiiE"} +!4 = !{i64 0, !"_ZTSFiiE.generalized"} +!5 = !{} +; CHECK-LABEL: define dso_local i32 @f +; CHECK-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] !type !2 !type !3 { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[ARG]], ptr [[ARG_ADDR]], align 4 +; CHECK-NEXT: store i32 0, ptr [[B]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 1, ptr [[A]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret i32 [[ADD]] +; +; +; CHECK-LABEL: define dso_local i32 @g +; CHECK-SAME: (i32 noundef [[B:%.*]]) #[[ATTR0]] !type !2 !type !3 { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[FP:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[TOBOOL]] to i64 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], ptr @f, ptr @f_thunk +; CHECK-NEXT: store ptr [[COND]], ptr [[FP]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[FP]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.type.test(ptr [[TMP2]], metadata !"_ZTSFiiE"), !nosanitize !4 +; CHECK-NEXT: br i1 [[TMP3]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize !4 +; CHECK: trap: +; CHECK-NEXT: call void @llvm.ubsantrap(i8 2) #[[ATTR3:[0-9]+]], !nosanitize !4 +; CHECK-NEXT: unreachable, !nosanitize !4 +; CHECK: cont: +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 +; CHECK-NEXT: [[CALL:%.*]] = call i32 [[TMP2]](i32 noundef [[TMP4]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define dso_local i32 @f_thunk +; CHECK-SAME: (i32 noundef [[TMP0:%.*]]) #[[ATTR0]] !type !2 { +; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @f(i32 noundef [[TMP0]]) #[[ATTR0]] +; CHECK-NEXT: ret i32 [[TMP2]] +; +; +; LOWERTYPETESTS-LABEL: define dso_local i32 @f +; LOWERTYPETESTS-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] !type !2 !type !3 { +; LOWERTYPETESTS-NEXT: entry: +; LOWERTYPETESTS-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 +; LOWERTYPETESTS-NEXT: [[A:%.*]] = alloca i32, align 4 +; LOWERTYPETESTS-NEXT: [[B:%.*]] = alloca i32, align 4 +; LOWERTYPETESTS-NEXT: store i32 [[ARG]], ptr [[ARG_ADDR]], align 4 +; LOWERTYPETESTS-NEXT: store i32 0, ptr [[B]], align 4 +; LOWERTYPETESTS-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +; LOWERTYPETESTS-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 0 +; LOWERTYPETESTS-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; LOWERTYPETESTS: if.then: +; LOWERTYPETESTS-NEXT: store i32 1, ptr [[A]], align 4 +; LOWERTYPETESTS-NEXT: br label [[IF_END]] +; LOWERTYPETESTS: if.end: +; LOWERTYPETESTS-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 +; LOWERTYPETESTS-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 4 +; LOWERTYPETESTS-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +; LOWERTYPETESTS-NEXT: ret i32 [[ADD]] +; +; +; LOWERTYPETESTS-LABEL: define dso_local i32 @g +; LOWERTYPETESTS-SAME: (i32 noundef [[B:%.*]]) #[[ATTR0]] !type !2 !type !3 { +; LOWERTYPETESTS-NEXT: entry: +; LOWERTYPETESTS-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +; LOWERTYPETESTS-NEXT: [[FP:%.*]] = alloca ptr, align 8 +; LOWERTYPETESTS-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 +; LOWERTYPETESTS-NEXT: [[TMP0:%.*]] = load i32, ptr [[B_ADDR]], align 4 +; LOWERTYPETESTS-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +; LOWERTYPETESTS-NEXT: [[TMP1:%.*]] = zext i1 [[TOBOOL]] to i64 +; LOWERTYPETESTS-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], ptr @.cfi.jumptable, ptr getelementptr inbounds ([2 x [8 x i8]], ptr @.cfi.jumptable, i64 0, i64 1) +; LOWERTYPETESTS-NEXT: store ptr [[COND]], ptr [[FP]], align 8 +; LOWERTYPETESTS-NEXT: [[TMP2:%.*]] = load ptr, ptr [[FP]], align 8 +; LOWERTYPETESTS-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 +; LOWERTYPETESTS-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], ptrtoint (ptr @.cfi.jumptable to i64) +; LOWERTYPETESTS-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 3 +; LOWERTYPETESTS-NEXT: [[TMP6:%.*]] = shl i64 [[TMP4]], 61 +; LOWERTYPETESTS-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP6]] +; LOWERTYPETESTS-NEXT: [[TMP8:%.*]] = icmp ule i64 [[TMP7]], 1 +; LOWERTYPETESTS-NEXT: br i1 [[TMP8]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize !4 +; LOWERTYPETESTS: trap: +; LOWERTYPETESTS-NEXT: call void @llvm.ubsantrap(i8 2) #[[ATTR4:[0-9]+]], !nosanitize !4 +; LOWERTYPETESTS-NEXT: unreachable, !nosanitize !4 +; LOWERTYPETESTS: cont: +; LOWERTYPETESTS-NEXT: [[TMP9:%.*]] = load i32, ptr [[B_ADDR]], align 4 +; LOWERTYPETESTS-NEXT: [[CALL:%.*]] = call i32 [[TMP2]](i32 noundef [[TMP9]]) +; LOWERTYPETESTS-NEXT: ret i32 [[CALL]] +; +; +; LOWERTYPETESTS-LABEL: define dso_local i32 @f_thunk +; LOWERTYPETESTS-SAME: (i32 noundef [[TMP0:%.*]]) #[[ATTR0]] !type !2 { +; LOWERTYPETESTS-NEXT: [[TMP2:%.*]] = tail call i32 @f(i32 noundef [[TMP0]]) #[[ATTR0]] +; LOWERTYPETESTS-NEXT: ret i32 [[TMP2]] +; +; +; LOWERTYPETESTS-LABEL: define private void @.cfi.jumptable +; LOWERTYPETESTS-SAME: () #[[ATTR3:[0-9]+]] align 8 { +; LOWERTYPETESTS-NEXT: entry: +; LOWERTYPETESTS-NEXT: call void asm sideeffect "jmp ${0:c}@plt\0Aint3\0Aint3\0Aint3\0Ajmp ${1:c}@plt\0Aint3\0Aint3\0Aint3\0A", "s,s"(ptr @f, ptr @f_thunk) +; LOWERTYPETESTS-NEXT: unreachable +;