From 70ce91dd4632aea58bca0ea25cc83e1a4166fa01 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Mon, 25 Nov 2024 17:56:50 +0000 Subject: [PATCH 1/2] [AMDGPU][SplitModule] Fix unintentional integer division A static analysis tool warned that a division was always being performed in integer division, so was either 0.0 or 1.0. This doesn't seem intentional, so has been fixed to return a true ratio using floating-point division. This in turn showed a bug where a comparison against this ratio was incorrect. --- llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp index 5d7aff1c5092c..6adb43a153999 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp @@ -1101,9 +1101,9 @@ void RecursiveSearchSplitting::pickPartition(unsigned Depth, unsigned Idx, // Check if the amount of code in common makes it worth it. assert(SimilarDepsCost && Entry.CostExcludingGraphEntryPoints); const double Ratio = - SimilarDepsCost / Entry.CostExcludingGraphEntryPoints; + (double)SimilarDepsCost / Entry.CostExcludingGraphEntryPoints; assert(Ratio >= 0.0 && Ratio <= 1.0); - if (LargeFnOverlapForMerge > Ratio) { + if (Ratio > LargeFnOverlapForMerge) { // For debug, just print "L", so we'll see "L3=P3" for instance, which // will mean we reached max depth and chose P3 based on this // heuristic. From cdecbbcbdb1618902179bba7f97947856977e83e Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 26 Nov 2024 16:03:31 +0000 Subject: [PATCH 2/2] static_cast --- llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp index 6adb43a153999..6f1c45d92c8cb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp @@ -1100,8 +1100,8 @@ void RecursiveSearchSplitting::pickPartition(unsigned Depth, unsigned Idx, if (Entry.CostExcludingGraphEntryPoints > LargeClusterThreshold) { // Check if the amount of code in common makes it worth it. assert(SimilarDepsCost && Entry.CostExcludingGraphEntryPoints); - const double Ratio = - (double)SimilarDepsCost / Entry.CostExcludingGraphEntryPoints; + const double Ratio = static_cast(SimilarDepsCost) / + Entry.CostExcludingGraphEntryPoints; assert(Ratio >= 0.0 && Ratio <= 1.0); if (Ratio > LargeFnOverlapForMerge) { // For debug, just print "L", so we'll see "L3=P3" for instance, which