Skip to content

Commit 52fdf1e

Browse files
Artemiy VolkovJeffreyALaw
Artemiy Volkov
authored andcommitted
tree-optimization/116024 - simplify some cases of X +- C1 cmp C2
Whenever C1 and C2 are integer constants, X is of a wrapping type, and cmp is a relational operator, the expression X +- C1 cmp C2 can be simplified in the following cases: (a) If cmp is <= and C2 -+ C1 == +INF(1), we can transform the initial comparison in the following way: X +- C1 <= C2 -INF <= X +- C1 <= C2 (add left hand side which holds for any X, C1) -INF -+ C1 <= X <= C2 -+ C1 (add -+C1 to all 3 expressions) -INF -+ C1 <= X <= +INF (due to (1)) -INF -+ C1 <= X (eliminate the right hand side since it holds for any X) (b) By analogy, if cmp if >= and C2 -+ C1 == -INF(1), use the following sequence of transformations: X +- C1 >= C2 +INF >= X +- C1 >= C2 (add left hand side which holds for any X, C1) +INF -+ C1 >= X >= C2 -+ C1 (add -+C1 to all 3 expressions) +INF -+ C1 >= X >= -INF (due to (1)) +INF -+ C1 >= X (eliminate the right hand side since it holds for any X) (c) The > and < cases are negations of (a) and (b), respectively. This transformation allows to occasionally save add / sub instructions, for instance the expression 3 + (uint32_t)f() < 2 compiles to cmn w0, #4 cset w0, ls instead of add w0, w0, 3 cmp w0, 2 cset w0, ls on aarch64. Testcases that go together with this patch have been split into two separate files, one containing testcases for unsigned variables and the other for wrapping signed ones (and thus compiled with -fwrapv). Additionally, one aarch64 test has been adjusted since the patch has caused the generated code to change from cmn w0, #2 csinc w0, w1, wzr, cc (x < -2) to cmn w0, #3 csinc w0, w1, wzr, cs (x <= -3) This patch has been bootstrapped and regtested on aarch64, x86_64, and i386, and additionally regtested on riscv32. gcc/ChangeLog: PR tree-optimization/116024 * match.pd: New transformation around integer comparison. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/pr116024-2.c: New test. * gcc.dg/tree-ssa/pr116024-2-fwrapv.c: Ditto. * gcc.target/aarch64/gtu_to_ltu_cmp_1.c: Adjust.
1 parent e5f5cff commit 52fdf1e

File tree

4 files changed

+118
-2
lines changed

4 files changed

+118
-2
lines changed

gcc/match.pd

+42-1
Original file line numberDiff line numberDiff line change
@@ -9030,6 +9030,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
90309030
(cmp @0 { TREE_OVERFLOW (res)
90319031
? drop_tree_overflow (res) : res; }))))))))
90329032
(for cmp (lt le gt ge)
9033+
rcmp (gt ge lt le)
90339034
(for op (plus minus)
90349035
rop (minus plus)
90359036
(simplify
@@ -9057,7 +9058,47 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
90579058
"X cmp C2 -+ C1"),
90589059
WARN_STRICT_OVERFLOW_COMPARISON);
90599060
}
9060-
(cmp @0 { res; })))))))))
9061+
(cmp @0 { res; })))))
9062+
/* For wrapping types, simplify the following cases of X +- C1 CMP C2:
9063+
9064+
(a) If CMP is <= and C2 -+ C1 == +INF (1), simplify to X >= -INF -+ C1
9065+
by observing the following:
9066+
9067+
X +- C1 <= C2
9068+
==> -INF <= X +- C1 <= C2 (add left hand side which holds for any X, C1)
9069+
==> -INF -+ C1 <= X <= C2 -+ C1 (add -+C1 to all 3 expressions)
9070+
==> -INF -+ C1 <= X <= +INF (due to (1))
9071+
==> -INF -+ C1 <= X (eliminate the right hand side since it holds for any X)
9072+
9073+
(b) Similarly, if CMP is >= and C2 -+ C1 == -INF (1):
9074+
9075+
X +- C1 >= C2
9076+
==> +INF >= X +- C1 >= C2 (add left hand side which holds for any X, C1)
9077+
==> +INF -+ C1 >= X >= C2 -+ C1 (add -+C1 to all 3 expressions)
9078+
==> +INF -+ C1 >= X >= -INF (due to (1))
9079+
==> +INF -+ C1 >= X (eliminate the right hand side since it holds for any X)
9080+
9081+
(c) The > and < cases are negations of (a) and (b), respectively. */
9082+
(if (TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
9083+
(with
9084+
{
9085+
wide_int max = wi::max_value (TREE_TYPE (@0));
9086+
wide_int min = wi::min_value (TREE_TYPE (@0));
9087+
9088+
wide_int c2 = rop == PLUS_EXPR
9089+
? wi::add (wi::to_wide (@2), wi::to_wide (@1))
9090+
: wi::sub (wi::to_wide (@2), wi::to_wide (@1));
9091+
}
9092+
(if (((cmp == LE_EXPR || cmp == GT_EXPR) && wi::eq_p (c2, max))
9093+
|| ((cmp == LT_EXPR || cmp == GE_EXPR) && wi::eq_p (c2, min)))
9094+
(with
9095+
{
9096+
wide_int c1 = rop == PLUS_EXPR
9097+
? wi::add (wi::bit_not (c2), wi::to_wide (@1))
9098+
: wi::sub (wi::bit_not (c2), wi::to_wide (@1));
9099+
tree c1_cst = wide_int_to_tree (TREE_TYPE (@0), c1);
9100+
}
9101+
(rcmp @0 { c1_cst; })))))))))
90619102

90629103
/* Invert sign of X in comparisons of the form C1 - X CMP C2. */
90639104

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/* PR tree-optimization/116024 */
2+
/* { dg-do compile } */
3+
/* { dg-options "-O1 -fdump-tree-forwprop1-details -fwrapv" } */
4+
5+
#include <stdint.h>
6+
#include <limits.h>
7+
8+
uint32_t f(void);
9+
10+
int32_t i3(void)
11+
{
12+
int32_t l = -10 + (int32_t)f();
13+
return l <= INT32_MAX - 10; // f() >= INT32_MIN + 10
14+
}
15+
16+
int32_t i3a(void)
17+
{
18+
int32_t l = -20 + (int32_t)f();
19+
return l < INT32_MAX - 19; // f() > INT32_MAX + 20
20+
}
21+
22+
int32_t i3b(void)
23+
{
24+
int32_t l = 30 + (int32_t)f();
25+
return l >= INT32_MIN + 30; // f() <= INT32_MAX - 30
26+
}
27+
28+
int32_t i3c(void)
29+
{
30+
int32_t l = 40 + (int32_t)f();
31+
return l > INT32_MIN + 39; // f() < INT32_MIN - 40
32+
}
33+
34+
/* { dg-final { scan-tree-dump-times "Removing dead stmt:.*? \\+" 4 "forwprop1" } } */
35+
/* { dg-final { scan-tree-dump-times "gimple_simplified to.* >= -2147483638" 1 "forwprop1" } } */
36+
/* { dg-final { scan-tree-dump-times "gimple_simplified to.* >= -2147483628" 1 "forwprop1" } } */
37+
/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 2147483617" 1 "forwprop1" } } */
38+
/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 2147483607" 1 "forwprop1" } } */
+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/* PR tree-optimization/116024 */
2+
/* { dg-do compile } */
3+
/* { dg-options "-O1 -fdump-tree-forwprop1-details" } */
4+
5+
#include <stdint.h>
6+
7+
uint32_t f(void);
8+
9+
int32_t i3(void)
10+
{
11+
uint32_t l = 10 + (uint32_t)f();
12+
return l <= 9; // f() >= -10u
13+
}
14+
15+
int32_t i3a(void)
16+
{
17+
uint32_t l = 20 + (uint32_t)f();
18+
return l < 20; // f() > -21u
19+
}
20+
21+
int32_t i3b(void)
22+
{
23+
uint32_t l = 30 + (uint32_t)f();
24+
return l >= 30; // f() <= -31u
25+
}
26+
27+
int32_t i3c(void)
28+
{
29+
uint32_t l = 40 + (uint32_t)f();
30+
return l > 39; // f() < -39u
31+
}
32+
33+
/* { dg-final { scan-tree-dump-times "Removing dead stmt:.*? \\+" 4 "forwprop1" } } */
34+
/* { dg-final { scan-tree-dump-times "gimple_simplified to.* > 4294967285" 1 "forwprop1" } } */
35+
/* { dg-final { scan-tree-dump-times "gimple_simplified to.* > 4294967275" 1 "forwprop1" } } */
36+
/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 4294967265" 1 "forwprop1" } } */
37+
/* { dg-final { scan-tree-dump-times "gimple_simplified to.* <= 4294967255" 1 "forwprop1" } } */

gcc/testsuite/gcc.target/aarch64/gtu_to_ltu_cmp_1.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ f1 (int x, int t)
1010
return t;
1111
}
1212

13-
/* { dg-final { scan-assembler-times "cmn\\tw\[0-9\]+, #2" 1 } } */
13+
/* { dg-final { scan-assembler-times "cmn\\tw\[0-9\]+, #3" 1 } } */

0 commit comments

Comments
 (0)