Skip to content

Commit 4cf6316

Browse files
committed
8351414: C2: MergeStores must happen after RangeCheck smearing
Reviewed-by: chagedorn, qamai
1 parent 8a5ed47 commit 4cf6316

File tree

8 files changed

+110
-4
lines changed

8 files changed

+110
-4
lines changed

src/hotspot/share/opto/compile.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,9 @@ void Compile::remove_useless_node(Node* dead) {
402402
if (dead->for_post_loop_opts_igvn()) {
403403
remove_from_post_loop_opts_igvn(dead);
404404
}
405+
if (dead->for_merge_stores_igvn()) {
406+
remove_from_merge_stores_igvn(dead);
407+
}
405408
if (dead->is_Call()) {
406409
remove_useless_late_inlines( &_late_inlines, dead);
407410
remove_useless_late_inlines( &_string_late_inlines, dead);
@@ -453,6 +456,7 @@ void Compile::disconnect_useless_nodes(Unique_Node_List& useful, Unique_Node_Lis
453456
remove_useless_nodes(_template_assertion_predicate_opaqs, useful); // remove useless Assertion Predicate opaque nodes
454457
remove_useless_nodes(_expensive_nodes, useful); // remove useless expensive nodes
455458
remove_useless_nodes(_for_post_loop_igvn, useful); // remove useless node recorded for post loop opts IGVN pass
459+
remove_useless_nodes(_for_merge_stores_igvn, useful); // remove useless node recorded for merge stores IGVN pass
456460
remove_useless_unstable_if_traps(useful); // remove useless unstable_if traps
457461
remove_useless_coarsened_locks(useful); // remove useless coarsened locks nodes
458462
#ifdef ASSERT
@@ -627,6 +631,7 @@ Compile::Compile(ciEnv* ci_env, ciMethod* target, int osr_bci,
627631
_stub_entry_point(nullptr),
628632
_max_node_limit(MaxNodeLimit),
629633
_post_loop_opts_phase(false),
634+
_merge_stores_phase(false),
630635
_allow_macro_nodes(true),
631636
_inlining_progress(false),
632637
_inlining_incrementally(false),
@@ -651,6 +656,7 @@ Compile::Compile(ciEnv* ci_env, ciMethod* target, int osr_bci,
651656
_template_assertion_predicate_opaqs(comp_arena(), 8, 0, nullptr),
652657
_expensive_nodes(comp_arena(), 8, 0, nullptr),
653658
_for_post_loop_igvn(comp_arena(), 8, 0, nullptr),
659+
_for_merge_stores_igvn(comp_arena(), 8, 0, nullptr),
654660
_unstable_if_traps(comp_arena(), 8, 0, nullptr),
655661
_coarsened_locks(comp_arena(), 8, 0, nullptr),
656662
_congraph(nullptr),
@@ -905,6 +911,7 @@ Compile::Compile(ciEnv* ci_env,
905911
_stub_entry_point(nullptr),
906912
_max_node_limit(MaxNodeLimit),
907913
_post_loop_opts_phase(false),
914+
_merge_stores_phase(false),
908915
_allow_macro_nodes(true),
909916
_inlining_progress(false),
910917
_inlining_incrementally(false),
@@ -923,6 +930,7 @@ Compile::Compile(ciEnv* ci_env,
923930
_log(ci_env->log()),
924931
_first_failure_details(nullptr),
925932
_for_post_loop_igvn(comp_arena(), 8, 0, nullptr),
933+
_for_merge_stores_igvn(comp_arena(), 8, 0, nullptr),
926934
_congraph(nullptr),
927935
NOT_PRODUCT(_igv_printer(nullptr) COMMA)
928936
_unique(0),
@@ -1870,6 +1878,49 @@ void Compile::process_for_post_loop_opts_igvn(PhaseIterGVN& igvn) {
18701878
}
18711879
}
18721880

1881+
void Compile::record_for_merge_stores_igvn(Node* n) {
1882+
if (!n->for_merge_stores_igvn()) {
1883+
assert(!_for_merge_stores_igvn.contains(n), "duplicate");
1884+
n->add_flag(Node::NodeFlags::Flag_for_merge_stores_igvn);
1885+
_for_merge_stores_igvn.append(n);
1886+
}
1887+
}
1888+
1889+
void Compile::remove_from_merge_stores_igvn(Node* n) {
1890+
n->remove_flag(Node::NodeFlags::Flag_for_merge_stores_igvn);
1891+
_for_merge_stores_igvn.remove(n);
1892+
}
1893+
1894+
// We need to wait with merging stores until RangeCheck smearing has removed the RangeChecks during
1895+
// the post loops IGVN phase. If we do it earlier, then there may still be some RangeChecks between
1896+
// the stores, and we merge the wrong sequence of stores.
1897+
// Example:
1898+
// StoreI RangeCheck StoreI StoreI RangeCheck StoreI
1899+
// Apply MergeStores:
1900+
// StoreI RangeCheck [ StoreL ] RangeCheck StoreI
1901+
// Remove more RangeChecks:
1902+
// StoreI [ StoreL ] StoreI
1903+
// But now it would have been better to do this instead:
1904+
// [ StoreL ] [ StoreL ]
1905+
//
1906+
// Note: we allow stores to merge in this dedicated IGVN round, and any later IGVN round,
1907+
// since we never unset _merge_stores_phase.
1908+
void Compile::process_for_merge_stores_igvn(PhaseIterGVN& igvn) {
1909+
C->set_merge_stores_phase();
1910+
1911+
if (_for_merge_stores_igvn.length() > 0) {
1912+
while (_for_merge_stores_igvn.length() > 0) {
1913+
Node* n = _for_merge_stores_igvn.pop();
1914+
n->remove_flag(Node::NodeFlags::Flag_for_merge_stores_igvn);
1915+
igvn._worklist.push(n);
1916+
}
1917+
igvn.optimize();
1918+
if (failing()) return;
1919+
assert(_for_merge_stores_igvn.length() == 0, "no more delayed nodes allowed");
1920+
print_method(PHASE_AFTER_MERGE_STORES, 3);
1921+
}
1922+
}
1923+
18731924
void Compile::record_unstable_if_trap(UnstableIfTrap* trap) {
18741925
if (OptimizeUnstableIf) {
18751926
_unstable_if_traps.append(trap);
@@ -2429,6 +2480,8 @@ void Compile::Optimize() {
24292480

24302481
process_for_post_loop_opts_igvn(igvn);
24312482

2483+
process_for_merge_stores_igvn(igvn);
2484+
24322485
if (failing()) return;
24332486

24342487
#ifdef ASSERT

src/hotspot/share/opto/compile.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ class Compile : public Phase {
318318
uintx _max_node_limit; // Max unique node count during a single compilation.
319319

320320
bool _post_loop_opts_phase; // Loop opts are finished.
321+
bool _merge_stores_phase; // Phase for merging stores, after post loop opts phase.
321322
bool _allow_macro_nodes; // True if we allow creation of macro nodes.
322323

323324
int _major_progress; // Count of something big happening
@@ -374,6 +375,7 @@ class Compile : public Phase {
374375
GrowableArray<Node*> _template_assertion_predicate_opaqs;
375376
GrowableArray<Node*> _expensive_nodes; // List of nodes that are expensive to compute and that we'd better not let the GVN freely common
376377
GrowableArray<Node*> _for_post_loop_igvn; // List of nodes for IGVN after loop opts are over
378+
GrowableArray<Node*> _for_merge_stores_igvn; // List of nodes for IGVN merge stores
377379
GrowableArray<UnstableIfTrap*> _unstable_if_traps; // List of ifnodes after IGVN
378380
GrowableArray<Node_List*> _coarsened_locks; // List of coarsened Lock and Unlock nodes
379381
ConnectionGraph* _congraph;
@@ -766,6 +768,12 @@ class Compile : public Phase {
766768
void remove_useless_unstable_if_traps(Unique_Node_List &useful);
767769
void process_for_unstable_if_traps(PhaseIterGVN& igvn);
768770

771+
bool merge_stores_phase() { return _merge_stores_phase; }
772+
void set_merge_stores_phase() { _merge_stores_phase = true; }
773+
void record_for_merge_stores_igvn(Node* n);
774+
void remove_from_merge_stores_igvn(Node* n);
775+
void process_for_merge_stores_igvn(PhaseIterGVN& igvn);
776+
769777
void shuffle_macro_nodes();
770778
void sort_macro_nodes();
771779

src/hotspot/share/opto/memnode.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3450,12 +3450,23 @@ Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) {
34503450
}
34513451

34523452
if (MergeStores && UseUnalignedAccesses) {
3453-
if (phase->C->post_loop_opts_phase()) {
3453+
if (phase->C->merge_stores_phase()) {
34543454
MergePrimitiveStores merge(phase, this);
34553455
Node* progress = merge.run();
34563456
if (progress != nullptr) { return progress; }
34573457
} else {
3458-
phase->C->record_for_post_loop_opts_igvn(this);
3458+
// We need to wait with merging stores until RangeCheck smearing has removed the RangeChecks during
3459+
// the post loops IGVN phase. If we do it earlier, then there may still be some RangeChecks between
3460+
// the stores, and we merge the wrong sequence of stores.
3461+
// Example:
3462+
// StoreI RangeCheck StoreI StoreI RangeCheck StoreI
3463+
// Apply MergeStores:
3464+
// StoreI RangeCheck [ StoreL ] RangeCheck StoreI
3465+
// Remove more RangeChecks:
3466+
// StoreI [ StoreL ] StoreI
3467+
// But now it would have been better to do this instead:
3468+
// [ StoreL ] [ StoreL ]
3469+
phase->C->record_for_merge_stores_igvn(this);
34593470
}
34603471
}
34613472

src/hotspot/share/opto/node.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,11 @@ Node *Node::clone() const {
508508
// If it is applicable, it will happen anyway when the cloned node is registered with IGVN.
509509
n->remove_flag(Node::NodeFlags::Flag_for_post_loop_opts_igvn);
510510
}
511+
if (for_merge_stores_igvn()) {
512+
// Don't add cloned node to Compile::_for_merge_stores_igvn list automatically.
513+
// If it is applicable, it will happen anyway when the cloned node is registered with IGVN.
514+
n->remove_flag(Node::NodeFlags::Flag_for_merge_stores_igvn);
515+
}
511516
if (n->is_ParsePredicate()) {
512517
C->add_parse_predicate(n->as_ParsePredicate());
513518
}
@@ -615,6 +620,9 @@ void Node::destruct(PhaseValues* phase) {
615620
if (for_post_loop_opts_igvn()) {
616621
compile->remove_from_post_loop_opts_igvn(this);
617622
}
623+
if (for_merge_stores_igvn()) {
624+
compile->remove_from_merge_stores_igvn(this);
625+
}
618626

619627
if (is_SafePoint()) {
620628
as_SafePoint()->delete_replaced_nodes();

src/hotspot/share/opto/node.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -830,8 +830,9 @@ class Node {
830830
Flag_is_expensive = 1 << 13,
831831
Flag_is_predicated_vector = 1 << 14,
832832
Flag_for_post_loop_opts_igvn = 1 << 15,
833-
Flag_is_removed_by_peephole = 1 << 16,
834-
Flag_is_predicated_using_blend = 1 << 17,
833+
Flag_for_merge_stores_igvn = 1 << 16,
834+
Flag_is_removed_by_peephole = 1 << 17,
835+
Flag_is_predicated_using_blend = 1 << 18,
835836
_last_flag = Flag_is_predicated_using_blend
836837
};
837838

@@ -1075,6 +1076,7 @@ class Node {
10751076
bool is_scheduled() const { return (_flags & Flag_is_scheduled) != 0; }
10761077

10771078
bool for_post_loop_opts_igvn() const { return (_flags & Flag_for_post_loop_opts_igvn) != 0; }
1079+
bool for_merge_stores_igvn() const { return (_flags & Flag_for_merge_stores_igvn) != 0; }
10781080

10791081
// Is 'n' possibly a loop entry (i.e. a Parse Predicate projection)?
10801082
static bool may_be_loop_entry(Node* n) {

src/hotspot/share/opto/phasetype.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
flags(CCP1, "PhaseCCP 1") \
8989
flags(ITER_GVN2, "Iter GVN 2") \
9090
flags(PHASEIDEALLOOP_ITERATIONS, "PhaseIdealLoop iterations") \
91+
flags(AFTER_MERGE_STORES, "After Merge Stores") \
9192
flags(BEFORE_MACRO_EXPANSION , "Before Macro Expansion") \
9293
flags(AFTER_MACRO_EXPANSION_STEP, "After Macro Expansion Step") \
9394
flags(AFTER_MACRO_EXPANSION, "After Macro Expansion") \

test/hotspot/jtreg/compiler/c2/TestMergeStores.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,15 @@
4949
* @run main compiler.c2.TestMergeStores unaligned
5050
*/
5151

52+
/*
53+
* @test
54+
* @bug 8318446 8331054 8331311 8335392 8348959 8351414
55+
* @summary Test merging of consecutive stores
56+
* @modules java.base/jdk.internal.misc
57+
* @library /test/lib /
58+
* @run main compiler.c2.TestMergeStores StressIGVN
59+
*/
60+
5261
public class TestMergeStores {
5362
static int RANGE = 1000;
5463
private static final Unsafe UNSAFE = Unsafe.getUnsafe();
@@ -99,6 +108,19 @@ public static void main(String[] args) {
99108
switch (args[0]) {
100109
case "aligned" -> { framework.addFlags("-XX:-UseUnalignedAccesses"); }
101110
case "unaligned" -> { framework.addFlags("-XX:+UseUnalignedAccesses"); }
111+
// StressIGVN can mix up the order of RangeCheck smearing and MergeStores optimization,
112+
// if they run in the same IGVN round. When we did not yet have a dedicated IGVN round
113+
// after post loop opts for MergeStrores, it could happen that we would only remove
114+
// RangeChecks after already merging some stores, and now they would have to be split
115+
// up again and re-merged with different stores. Example:
116+
// StoreI RangeCheck StoreI StoreI RangeCheck StoreI
117+
// Apply MergeStores:
118+
// StoreI RangeCheck [ StoreL ] RangeCheck StoreI
119+
// Remove more RangeChecks:
120+
// StoreI [ StoreL ] StoreI
121+
// But now it would have been better to do this instead:
122+
// [ StoreL ] [ StoreL ]
123+
case "StressIGVN" -> { framework.addFlags("-XX:+StressIGVN"); }
102124
default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
103125
}
104126
framework.start();

test/hotspot/jtreg/compiler/lib/ir_framework/CompilePhase.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ public enum CompilePhase {
9595
CCP1("PhaseCCP 1"),
9696
ITER_GVN2("Iter GVN 2"),
9797
PHASEIDEALLOOP_ITERATIONS("PhaseIdealLoop iterations"),
98+
AFTER_MERGE_STORES("After Merge Stores"),
9899
BEFORE_MACRO_EXPANSION("Before Macro Expansion"),
99100
AFTER_MACRO_EXPANSION_STEP("After Macro Expansion Step"),
100101
AFTER_MACRO_EXPANSION("After Macro Expansion"),

0 commit comments

Comments
 (0)