8314307: Improve loop handling

Reviewed-by: mbalao, fferrari, andrew Backport-of: 62ac93d145ca9fa1ab0c040533c62c42c202703a
2025-12-10 10:44:16 -06:00 · 2023-11-07 11:08:30 +00:00 · 2023-11-07 11:08:30 +00:00 · f7cb28de01
commit f7cb28de01
parent 77d38b78e4
4 changed files with 402 additions and 85 deletions
--- a/hotspot/src/share/vm/opto/ifnode.cpp
+++ b/hotspot/src/share/vm/opto/ifnode.cpp
@ -882,6 +882,46 @@ Node *IfNode::Ideal(PhaseGVN *phase, bool can_reshape) {
    // then we are guaranteed to fail, so just start interpreting there.
    // We 'expand' the top 3 range checks to include all post-dominating
    // checks.
+    //
+    // Example:
+    // a[i+x] // (1) 1 < x < 6
+    // a[i+3] // (2)
+    // a[i+4] // (3)
+    // a[i+6] // max = max of all constants
+    // a[i+2]
+    // a[i+1] // min = min of all constants
+    //
+    // If x < 3:
+    //   (1) a[i+x]: Leave unchanged
+    //   (2) a[i+3]: Replace with a[i+max] = a[i+6]: i+x < i+3 <= i+6  -> (2) is covered
+    //   (3) a[i+4]: Replace with a[i+min] = a[i+1]: i+1 < i+4 <= i+6  -> (3) and all following checks are covered
+    //   Remove all other a[i+c] checks
+    //
+    // If x >= 3:
+    //   (1) a[i+x]: Leave unchanged
+    //   (2) a[i+3]: Replace with a[i+min] = a[i+1]: i+1 < i+3 <= i+x  -> (2) is covered
+    //   (3) a[i+4]: Replace with a[i+max] = a[i+6]: i+1 < i+4 <= i+6  -> (3) and all following checks are covered
+    //   Remove all other a[i+c] checks
+    //
+    // We only need the top 2 range checks if x is the min or max of all constants.
+    //
+    // This, however, only works if the interval [i+min,i+max] is not larger than max_int (i.e. abs(max - min) < max_int):
+    // The theoretical max size of an array is max_int with:
+    // - Valid index space: [0,max_int-1]
+    // - Invalid index space: [max_int,-1] // max_int, min_int, min_int - 1 ..., -1
+    //
+    // The size of the consecutive valid index space is smaller than the size of the consecutive invalid index space.
+    // If we choose min and max in such a way that:
+    // - abs(max - min) < max_int
+    // - i+max and i+min are inside the valid index space
+    // then all indices [i+min,i+max] must be in the valid index space. Otherwise, the invalid index space must be
+    // smaller than the valid index space which is never the case for any array size.
+    //
+    // Choosing a smaller array size only makes the valid index space smaller and the invalid index space larger and
+    // the argument above still holds.
+    //
+    // Note that the same optimization with the same maximal accepted interval size can also be found in C1.
+    const jlong maximum_number_of_min_max_interval_indices = (jlong)max_jint;

    // The top 3 range checks seen
    const int NRC =3;
@ -915,13 +955,18 @@ Node *IfNode::Ideal(PhaseGVN *phase, bool can_reshape) {
            found_immediate_dominator = true;
            break;
          }
-          // Gather expanded bounds
-          off_lo = MIN2(off_lo,offset2);
-          off_hi = MAX2(off_hi,offset2);
-          // Record top NRC range checks
-          prev_checks[nb_checks%NRC].ctl = prev_dom;
-          prev_checks[nb_checks%NRC].off = offset2;
-          nb_checks++;
+
+          // "x - y" -> must add one to the difference for number of elements in [x,y]
+          const jlong diff = (jlong)MIN2(offset2, off_lo) - (jlong)MAX2(offset2, off_hi);
+          if (ABS(diff) < maximum_number_of_min_max_interval_indices) {
+            // Gather expanded bounds
+            off_lo = MIN2(off_lo, offset2);
+            off_hi = MAX2(off_hi, offset2);
+            // Record top NRC range checks
+            prev_checks[nb_checks % NRC].ctl = prev_dom;
+            prev_checks[nb_checks % NRC].off = offset2;
+            nb_checks++;
+          }
        }
      }
      prev_dom = dom;
--- a/hotspot/src/share/vm/opto/loopnode.cpp
+++ b/hotspot/src/share/vm/opto/loopnode.cpp
@ -260,6 +260,49 @@ void PhaseIdealLoop::set_subtree_ctrl( Node *n ) {
  set_early_ctrl( n );
 }

+void PhaseIdealLoop::insert_loop_limit_check(ProjNode* limit_check_proj, Node* cmp_limit, Node* bol) {
+  Node* new_predicate_proj = create_new_if_for_predicate(limit_check_proj, NULL,
+                                                         Deoptimization::Reason_loop_limit_check);
+  Node* iff = new_predicate_proj->in(0);
+  assert(iff->Opcode() == Op_If, "bad graph shape");
+  Node* conv = iff->in(1);
+  assert(conv->Opcode() == Op_Conv2B, "bad graph shape");
+  Node* opaq = conv->in(1);
+  assert(opaq->Opcode() == Op_Opaque1, "bad graph shape");
+  cmp_limit = _igvn.register_new_node_with_optimizer(cmp_limit);
+  bol = _igvn.register_new_node_with_optimizer(bol);
+  set_subtree_ctrl(bol);
+  _igvn.replace_input_of(iff, 1, bol);
+
+#ifndef PRODUCT
+  // report that the loop predication has been actually performed
+  // for this loop
+  if (TraceLoopLimitCheck) {
+    tty->print_cr("Counted Loop Limit Check generated:");
+    debug_only( bol->dump(2); )
+  }
+#endif
+}
+
+static int check_stride_overflow(jlong final_correction, const TypeInt* limit_t) {
+  if (final_correction > 0) {
+    if (limit_t->_lo > (max_jint - final_correction)) {
+      return -1;
+    }
+    if (limit_t->_hi > (max_jint - final_correction)) {
+      return 1;
+    }
+  } else {
+    if (limit_t->_hi < (min_jint - final_correction)) {
+      return -1;
+    }
+    if (limit_t->_lo < (min_jint - final_correction)) {
+      return 1;
+    }
+  }
+  return 0;
+}
+
 //------------------------------is_counted_loop--------------------------------
 bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
  PhaseGVN *gvn = &_igvn;
@ -463,51 +506,256 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
  assert(x->Opcode() == Op_Loop, "regular loops only");
  C->print_method(PHASE_BEFORE_CLOOPS, 3);

-  Node *hook = new (C) Node(6);
+  Node* adjusted_limit = limit;

  if (LoopLimitCheck) {

  // ===================================================
-  // Generate loop limit check to avoid integer overflow
-  // in cases like next (cyclic loops):
+  // We can only convert this loop to a counted loop if we can guarantee that the iv phi will never overflow at runtime.
+  // This is an implicit assumption taken by some loop optimizations. We therefore must ensure this property at all cost.
+  // At this point, we've already excluded some trivial cases where an overflow could have been proven statically.
+  // But even though we cannot prove that an overflow will *not* happen, we still want to speculatively convert this loop
+  // to a counted loop. This can be achieved by adding additional iv phi overflow checks before the loop. If they fail,
+  // we trap and resume execution before the loop without having executed any iteration of the loop, yet.
  //
-  // for (i=0; i <= max_jint; i++) {}
-  // for (i=0; i <  max_jint; i+=2) {}
+  // These additional iv phi overflow checks can be inserted as Loop Limit Check Predicates above the Loop Limit Check
+  // Parse Predicate which captures a JVM state just before the entry of the loop. If there is no such Parse Predicate,
+  // we cannot generate a Loop Limit Check Predicate and thus cannot speculatively convert the loop to a counted loop.
+  //
+  // In the following, we only focus on int loops with stride > 0 to keep things simple. The argumentation and proof
+  // for stride < 0 is analogously. For long loops, we would replace max_int with max_long.
  //
  //
-  // Limit check predicate depends on the loop test:
+  // The loop to be converted does not always need to have the often used shape:
  //
-  // for(;i != limit; i++)       --> limit <= (max_jint)
-  // for(;i <  limit; i+=stride) --> limit <= (max_jint - stride + 1)
-  // for(;i <= limit; i+=stride) --> limit <= (max_jint - stride    )
+  //                                                 i = init
+  //     i = init                                loop:
+  //     do {                                        ...
+  //         // ...               equivalent         i+=stride
+  //         i+=stride               <==>            if (i < limit)
+  //     } while (i < limit);                          goto loop
+  //                                             exit:
+  //                                                 ...
  //
+  // where the loop exit check uses the post-incremented iv phi and a '<'-operator.
+  //
+  // We could also have '<='-operator (or '>='-operator for negative strides) or use the pre-incremented iv phi value
+  // in the loop exit check:
+  //
+  //         i = init
+  //     loop:
+  //         ...
+  //         if (i <= limit)
+  //             i+=stride
+  //             goto loop
+  //     exit:
+  //         ...
+  //
+  // Let's define the following terms:
+  // - iv_pre_i: The pre-incremented iv phi before the i-th iteration.
+  // - iv_post_i: The post-incremented iv phi after the i-th iteration.
+  //
+  // The iv_pre_i and iv_post_i have the following relation:
+  //      iv_pre_i + stride = iv_post_i
+  //
+  // When converting a loop to a counted loop, we want to have a canonicalized loop exit check of the form:
+  //     iv_post_i < adjusted_limit
+  //
+  // If that is not the case, we need to canonicalize the loop exit check by using different values for adjusted_limit:
+  // (LE1) iv_post_i < limit: Already canonicalized. We can directly use limit as adjusted_limit.
+  //           -> adjusted_limit = limit.
+  // (LE2) iv_post_i <= limit:
+  //           iv_post_i < limit + 1
+  //           -> adjusted limit = limit + 1
+  // (LE3) iv_pre_i < limit:
+  //           iv_pre_i + stride < limit + stride
+  //           iv_post_i < limit + stride
+  //           -> adjusted_limit = limit + stride
+  // (LE4) iv_pre_i <= limit:
+  //           iv_pre_i < limit + 1
+  //           iv_pre_i + stride < limit + stride + 1
+  //           iv_post_i < limit + stride + 1
+  //           -> adjusted_limit = limit + stride + 1
+  //
+  // Note that:
+  //     (AL) limit <= adjusted_limit.
+  //
+  // The following loop invariant has to hold for counted loops with n iterations (i.e. loop exit check true after n-th
+  // loop iteration) and a canonicalized loop exit check to guarantee that no iv_post_i over- or underflows:
+  // (INV) For i = 1..n, min_int <= iv_post_i <= max_int
+  //
+  // To prove (INV), we require the following two conditions/assumptions:
+  // (i): adjusted_limit - 1 + stride <= max_int
+  // (ii): init < limit
+  //
+  // If we can prove (INV), we know that there can be no over- or underflow of any iv phi value. We prove (INV) by
+  // induction by assuming (i) and (ii).
+  //
+  // Proof by Induction
+  // ------------------
+  // > Base case (i = 1): We show that (INV) holds after the first iteration:
+  //     min_int <= iv_post_1 = init + stride <= max_int
+  // Proof:
+  //     First, we note that (ii) implies
+  //         (iii) init <= limit - 1
+  //     max_int >= adjusted_limit - 1 + stride   [using (i)]
+  //             >= limit - 1 + stride            [using (AL)]
+  //             >= init + stride                 [using (iii)]
+  //             >= min_int                       [using stride > 0, no underflow]
+  // Thus, no overflow happens after the first iteration and (INV) holds for i = 1.
+  //
+  // Note that to prove the base case we need (i) and (ii).
+  //
+  // > Induction Hypothesis (i = j, j > 1): Assume that (INV) holds after the j-th iteration:
+  //     min_int <= iv_post_j <= max_int
+  // > Step case (i = j + 1): We show that (INV) also holds after the j+1-th iteration:
+  //     min_int <= iv_post_{j+1} = iv_post_j + stride <= max_int
+  // Proof:
+  // If iv_post_j >= adjusted_limit:
+  //     We exit the loop after the j-th iteration, and we don't execute the j+1-th iteration anymore. Thus, there is
+  //     also no iv_{j+1}. Since (INV) holds for iv_j, there is nothing left to prove.
+  // If iv_post_j < adjusted_limit:
+  //     First, we note that:
+  //         (iv) iv_post_j <= adjusted_limit - 1
+  //     max_int >= adjusted_limit - 1 + stride    [using (i)]
+  //             >= iv_post_j + stride             [using (iv)]
+  //             >= min_int                        [using stride > 0, no underflow]
+  //
+  // Note that to prove the step case we only need (i).
+  //
+  // Thus, by assuming (i) and (ii), we proved (INV).
+  //
+  //
+  // It is therefore enough to add the following two Loop Limit Check Predicates to check assumptions (i) and (ii):
+  //
+  // (1) Loop Limit Check Predicate for (i):
+  //     Using (i): adjusted_limit - 1 + stride <= max_int
+  //
+  //     This condition is now restated to use limit instead of adjusted_limit:
+  //
+  //     To prevent an overflow of adjusted_limit -1 + stride itself, we rewrite this check to
+  //         max_int - stride + 1 >= adjusted_limit
+  //     We can merge the two constants into
+  //         canonicalized_correction = stride - 1
+  //     which gives us
+  //        max_int - canonicalized_correction >= adjusted_limit
+  //
+  //     To directly use limit instead of adjusted_limit in the predicate condition, we split adjusted_limit into:
+  //         adjusted_limit = limit + limit_correction
+  //     Since stride > 0 and limit_correction <= stride + 1, we can restate this with no over- or underflow into:
+  //         max_int - canonicalized_correction - limit_correction >= limit
+  //     Since canonicalized_correction and limit_correction are both constants, we can replace them with a new constant:
+  //         final_correction = canonicalized_correction + limit_correction
+  //     which gives us:
+  //
+  //     Final predicate condition:
+  //         max_int - final_correction >= limit
+  //
+  // (2) Loop Limit Check Predicate for (ii):
+  //     Using (ii): init < limit
+  //
+  //     This Loop Limit Check Predicate is not required if we can prove at compile time that either:
+  //        (2.1) type(init) < type(limit)
+  //             In this case, we know:
+  //                 all possible values of init < all possible values of limit
+  //             and we can skip the predicate.
+  //
+  //        (2.2) init < limit is already checked before (i.e. found as a dominating check)
+  //            In this case, we do not need to re-check the condition and can skip the predicate.
+  //            This is often found for while- and for-loops which have the following shape:
+  //
+  //                if (init < limit) { // Dominating test. Do not need the Loop Limit Check Predicate below.
+  //                    i = init;
+  //                    if (init >= limit) { trap(); } // Here we would insert the Loop Limit Check Predicate
+  //                    do {
+  //                        i += stride;
+  //                    } while (i < limit);
+  //                }
+  //
+  //        (2.3) init + stride <= max_int
+  //            In this case, there is no overflow of the iv phi after the first loop iteration.
+  //            In the proof of the base case above we showed that init + stride <= max_int by using assumption (ii):
+  //                init < limit
+  //            In the proof of the step case above, we did not need (ii) anymore. Therefore, if we already know at
+  //            compile time that init + stride <= max_int then we have trivially proven the base case and that
+  //            there is no overflow of the iv phi after the first iteration. In this case, we don't need to check (ii)
+  //            again and can skip the predicate.

-  // Check if limit is excluded to do more precise int overflow check.
-  bool incl_limit = (bt == BoolTest::le || bt == BoolTest::ge);
-  int stride_m  = stride_con - (incl_limit ? 0 : (stride_con > 0 ? 1 : -1));

-  // If compare points directly to the phi we need to adjust
-  // the compare so that it points to the incr. Limit have
-  // to be adjusted to keep trip count the same and the
-  // adjusted limit should be checked for int overflow.
-  if (phi_incr != NULL) {
-    stride_m  += stride_con;
-  }
+  // Accounting for (LE3) and (LE4) where we use pre-incremented phis in the loop exit check.
+  const jlong limit_correction_for_pre_iv_exit_check = (phi_incr != NULL) ? stride_con : 0;

-  if (limit->is_Con()) {
-    int limit_con = limit->get_int();
-    if ((stride_con > 0 && limit_con > (max_jint - stride_m)) ||
-        (stride_con < 0 && limit_con < (min_jint - stride_m))) {
-      // Bailout: it could be integer overflow.
+  // Accounting for (LE2) and (LE4) where we use <= or >= in the loop exit check.
+  const bool includes_limit = (bt == BoolTest::le || bt == BoolTest::ge);
+  const jlong limit_correction_for_le_ge_exit_check = (includes_limit ? (stride_con > 0 ? 1 : -1) : 0);
+
+  const jlong limit_correction = limit_correction_for_pre_iv_exit_check + limit_correction_for_le_ge_exit_check;
+  const jlong canonicalized_correction = stride_con + (stride_con > 0 ? -1 : 1);
+  const jlong final_correction = canonicalized_correction + limit_correction;
+
+  int sov = check_stride_overflow(final_correction, limit_t);
+
+  // If sov==0, limit's type always satisfies the condition, for
+  // example, when it is an array length.
+  if (sov != 0) {
+    if (sov < 0) {
+      return false;  // Bailout: integer overflow is certain.
+    }
+    // (1) Loop Limit Check Predicate is required because we could not statically prove that
+    //     limit + final_correction = adjusted_limit - 1 + stride <= max_int
+    ProjNode *limit_check_proj = find_predicate_insertion_point(init_control, Deoptimization::Reason_loop_limit_check);
+    if (!limit_check_proj) {
+      // The Loop Limit Check Parse Predicate is not generated if this method trapped here before.
+#ifdef ASSERT
+      if (TraceLoopLimitCheck) {
+        tty->print("missing loop limit check:");
+        loop->dump_head();
+        x->dump(1);
+      }
+#endif
      return false;
    }
-  } else if ((stride_con > 0 && limit_t->_hi <= (max_jint - stride_m)) ||
-             (stride_con < 0 && limit_t->_lo >= (min_jint - stride_m))) {
-      // Limit's type may satisfy the condition, for example,
-      // when it is an array length.
-  } else {
-    // Generate loop's limit check.
-    // Loop limit check predicate should be near the loop.
+
+    IfNode* check_iff = limit_check_proj->in(0)->as_If();
+
+    if (!is_dominator(get_ctrl(limit), check_iff->in(0))) {
+      return false;
+    }
+
+    Node* cmp_limit;
+    Node* bol;
+
+    if (stride_con > 0) {
+      cmp_limit = new (C) CmpINode(limit, _igvn.intcon(max_jint - final_correction));
+      bol = new (C) BoolNode(cmp_limit, BoolTest::le);
+    } else {
+      cmp_limit = new (C) CmpINode(limit, _igvn.intcon(min_jint - final_correction));
+      bol = new (C) BoolNode(cmp_limit, BoolTest::ge);
+    }
+
+    insert_loop_limit_check(limit_check_proj, cmp_limit, bol);
+  }
+
+  // (2.3)
+  const bool init_plus_stride_could_overflow =
+          (stride_con > 0 && init_t->_hi > max_jint - stride_con) ||
+          (stride_con < 0 && init_t->_lo < min_jint - stride_con);
+  // (2.1)
+  const bool init_gte_limit = (stride_con > 0 && init_t->_hi >= limit_t->_lo) ||
+                              (stride_con < 0 && init_t->_lo <= limit_t->_hi);
+
+  if (init_gte_limit && // (2.1)
+     ((bt == BoolTest::ne || init_plus_stride_could_overflow) && // (2.3)
+      !has_dominating_loop_limit_check(init_trip, limit, stride_con, init_control))) { // (2.2)
+    // (2) Iteration Loop Limit Check Predicate is required because neither (2.1), (2.2), nor (2.3) holds.
+    // We use the following condition:
+    // - stride > 0: init < limit
+    // - stride < 0: init > limit
+    //
+    // This predicate is always required if we have a non-equal-operator in the loop exit check (where stride = 1 is
+    // a requirement). We transform the loop exit check by using a less-than-operator. By doing so, we must always
+    // check that init < limit. Otherwise, we could have a different number of iterations at runtime.
+
    ProjNode *limit_check_proj = find_predicate_insertion_point(init_control, Deoptimization::Reason_loop_limit_check);
    if (!limit_check_proj) {
      // The limit check predicate is not generated if this method trapped here before.
@ -520,41 +768,38 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
 #endif
      return false;
    }
-
    IfNode* check_iff = limit_check_proj->in(0)->as_If();
+
+    if (!is_dominator(get_ctrl(limit), check_iff->in(0)) ||
+        !is_dominator(get_ctrl(init_trip), check_iff->in(0))) {
+      return false;
+    }
+
    Node* cmp_limit;
    Node* bol;

    if (stride_con > 0) {
-      cmp_limit = new (C) CmpINode(limit, _igvn.intcon(max_jint - stride_m));
-      bol = new (C) BoolNode(cmp_limit, BoolTest::le);
+      cmp_limit = new (C) CmpINode(init_trip, limit);
+      bol = new (C) BoolNode(cmp_limit, BoolTest::lt);
    } else {
-      cmp_limit = new (C) CmpINode(limit, _igvn.intcon(min_jint - stride_m));
-      bol = new (C) BoolNode(cmp_limit, BoolTest::ge);
+      cmp_limit = new (C) CmpINode(init_trip, limit);
+      bol = new (C) BoolNode(cmp_limit, BoolTest::gt);
    }
-    cmp_limit = _igvn.register_new_node_with_optimizer(cmp_limit);
-    bol = _igvn.register_new_node_with_optimizer(bol);
-    set_subtree_ctrl(bol);

-    // Replace condition in original predicate but preserve Opaque node
-    // so that previous predicates could be found.
-    assert(check_iff->in(1)->Opcode() == Op_Conv2B &&
-           check_iff->in(1)->in(1)->Opcode() == Op_Opaque1, "");
-    Node* opq = check_iff->in(1)->in(1);
-    _igvn.hash_delete(opq);
-    opq->set_req(1, bol);
-    // Update ctrl.
-    set_ctrl(opq, check_iff->in(0));
-    set_ctrl(check_iff->in(1), check_iff->in(0));
+    insert_loop_limit_check(limit_check_proj, cmp_limit, bol);
+  }

-#ifndef PRODUCT
-    // report that the loop predication has been actually performed
-    // for this loop
-    if (TraceLoopLimitCheck) {
-      tty->print_cr("Counted Loop Limit Check generated:");
-      debug_only( bol->dump(2); )
+  if (bt == BoolTest::ne) {
+    // Now we need to canonicalize the loop condition if it is 'ne'.
+    assert(stride_con == 1 || stride_con == -1, "simple increment only - checked before");
+    if (stride_con > 0) {
+      // 'ne' can be replaced with 'lt' only when init < limit. This is ensured by the inserted predicate above.
+      bt = BoolTest::lt;
+    } else {
+      assert(stride_con < 0, "must be");
+      // 'ne' can be replaced with 'gt' only when init > limit. This is ensured by the inserted predicate above.
+      bt = BoolTest::gt;
    }
-#endif
  }

  if (phi_incr != NULL) {
@ -567,26 +812,15 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
    // is converted to
    //   i = init; do {} while(++i < limit+1);
    //
-    limit = gvn->transform(new (C) AddINode(limit, stride));
+    adjusted_limit = gvn->transform(new (C) AddINode(limit, stride));
  }

-  // Now we need to canonicalize loop condition.
-  if (bt == BoolTest::ne) {
-    assert(stride_con == 1 || stride_con == -1, "simple increment only");
-    // 'ne' can be replaced with 'lt' only when init < limit.
-    if (stride_con > 0 && init_t->_hi < limit_t->_lo)
-      bt = BoolTest::lt;
-    // 'ne' can be replaced with 'gt' only when init > limit.
-    if (stride_con < 0 && init_t->_lo > limit_t->_hi)
-      bt = BoolTest::gt;
-  }
-
-  if (incl_limit) {
+  if (includes_limit) {
    // The limit check guaranties that 'limit <= (max_jint - stride)' so
    // we can convert 'i <= limit' to 'i < limit+1' since stride != 0.
    //
    Node* one = (stride_con > 0) ? gvn->intcon( 1) : gvn->intcon(-1);
-    limit = gvn->transform(new (C) AddINode(limit, one));
+    adjusted_limit = gvn->transform(new (C) AddINode(adjusted_limit, one));
    if (bt == BoolTest::le)
      bt = BoolTest::lt;
    else if (bt == BoolTest::ge)
@ -594,10 +828,11 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
    else
      ShouldNotReachHere();
  }
-  set_subtree_ctrl( limit );
+  set_subtree_ctrl(adjusted_limit);

  } else { // LoopLimitCheck

+  Node *hook = new (C) Node(6);
  // If compare points to incr, we are ok.  Otherwise the compare
  // can directly point to the phi; in this case adjust the compare so that
  // it points to the incr by adjusting the limit.
@ -691,6 +926,11 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
  limit = gvn->transform(new (C) AddINode(span,init_trip));
  set_subtree_ctrl( limit );

+  adjusted_limit = limit;
+
+  // Free up intermediate goo
+  _igvn.remove_dead_node(hook);
+
  } // LoopLimitCheck

  if (!UseCountedLoopSafepoints) {
@ -728,7 +968,7 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
  }
  cmp = cmp->clone();
  cmp->set_req(1,incr);
-  cmp->set_req(2,limit);
+  cmp->set_req(2, adjusted_limit);
  cmp = _igvn.register_new_node_with_optimizer(cmp);
  set_ctrl(cmp, iff->in(0));

@ -802,9 +1042,6 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
    }
  }

-  // Free up intermediate goo
-  _igvn.remove_dead_node(hook);
-
 #ifdef ASSERT
  assert(l->is_valid_counted_loop(), "counted loop shape is messed up");
  assert(l == loop->_head && l->phi() == phi && l->loopexit() == lex, "" );
@ -821,6 +1058,37 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
  return true;
 }

+// Check if there is a dominating loop limit check of the form 'init < limit' starting at the loop entry.
+// If there is one, then we do not need to create an additional Loop Limit Check Predicate.
+bool PhaseIdealLoop::has_dominating_loop_limit_check(Node* init_trip, Node* limit, const int stride_con,
+                                                     Node* loop_entry) {
+  // Eagerly call transform() on the Cmp and Bool node to common them up if possible. This is required in order to
+  // successfully find a dominated test with the If node below.
+  Node* cmp_limit;
+  Node* bol;
+  if (stride_con > 0) {
+    cmp_limit = _igvn.transform(new (C) CmpINode(init_trip, limit));
+    bol = _igvn.transform(new (C) BoolNode(cmp_limit, BoolTest::lt));
+  } else {
+    cmp_limit = _igvn.transform(new (C) CmpINode(init_trip, limit));
+    bol = _igvn.transform(new (C) BoolNode(cmp_limit, BoolTest::gt));
+  }
+
+  // Check if there is already a dominating init < limit check. If so, we do not need a Loop Limit Check Predicate.
+  IfNode* iff = new (C) IfNode(loop_entry, bol, PROB_MIN, COUNT_UNKNOWN);
+  // Also add fake IfProj nodes in order to call transform() on the newly created IfNode.
+  IfFalseNode* if_false = new (C) IfFalseNode(iff);
+  IfTrueNode* if_true = new (C) IfTrueNode(iff);
+  Node* dominated_iff = _igvn.transform(iff);
+  // ConI node? Found dominating test (IfNode::dominated_by() returns a ConI node).
+  const bool found_dominating_test = dominated_iff != NULL && dominated_iff->Opcode() == Op_ConI;
+
+  // Kill the If with its projections again in the next IGVN round by cutting it off from the graph.
+  _igvn.replace_input_of(iff, 0, C->top());
+  _igvn.replace_input_of(iff, 1, C->top());
+  return found_dominating_test;
+}
+
 //----------------------exact_limit-------------------------------------------
 Node* PhaseIdealLoop::exact_limit( IdealLoopTree *loop ) {
  assert(loop->_head->is_CountedLoop(), "");
--- a/hotspot/src/share/vm/opto/loopnode.hpp
+++ b/hotspot/src/share/vm/opto/loopnode.hpp
@ -896,6 +896,10 @@ public:
  // Create a new if above the uncommon_trap_if_pattern for the predicate to be promoted
  ProjNode* create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
                                        Deoptimization::DeoptReason reason);
+  void insert_loop_limit_check(ProjNode* limit_check_proj, Node* cmp_limit, Node* bol);
+  bool has_dominating_loop_limit_check(Node* init_trip, Node* limit, int stride_con,
+                                       Node* loop_entry);
+
  void register_control(Node* n, IdealLoopTree *loop, Node* pred);

  // Clone loop predicates to cloned loops (peeled, unswitched)
--- a/hotspot/src/share/vm/opto/phaseX.hpp
+++ b/hotspot/src/share/vm/opto/phaseX.hpp
@ -433,9 +433,6 @@ private:

 protected:

-  // Idealize new Node 'n' with respect to its inputs and its value
-  virtual Node *transform( Node *a_node );
-
  // Warm up hash table, type table and initial worklist
  void init_worklist( Node *a_root );

@ -449,6 +446,9 @@ public:
  PhaseIterGVN( PhaseGVN *gvn ); // Used after Parser
  PhaseIterGVN( PhaseIterGVN *igvn, const char *dummy ); // Used after +VerifyOpto

+  // Idealize new Node 'n' with respect to its inputs and its value
+  virtual Node *transform( Node *a_node );
+
  virtual PhaseIterGVN *is_IterGVN() { return this; }

  Unique_Node_List _worklist;       // Iterative worklist