bd52d17906
too large to list, but see: http://gcc.gnu.org/gcc-3.4/changes.html http://gcc.gnu.org/gcc-4.0/changes.html http://gcc.gnu.org/gcc-4.1/changes.html for the details.
642 lines
17 KiB
C
642 lines
17 KiB
C
/* Reassociation for trees.
|
|
Copyright (C) 2005 Free Software Foundation, Inc.
|
|
Contributed by Daniel Berlin <dan@dberlin.org>
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2, or (at your option)
|
|
any later version.
|
|
|
|
GCC is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GCC; see the file COPYING. If not, write to
|
|
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
|
|
Boston, MA 02110-1301, USA. */
|
|
|
|
#include "config.h"
|
|
#include "system.h"
|
|
#include "coretypes.h"
|
|
#include "tm.h"
|
|
#include "errors.h"
|
|
#include "ggc.h"
|
|
#include "tree.h"
|
|
#include "basic-block.h"
|
|
#include "diagnostic.h"
|
|
#include "tree-inline.h"
|
|
#include "tree-flow.h"
|
|
#include "tree-gimple.h"
|
|
#include "tree-dump.h"
|
|
#include "timevar.h"
|
|
#include "hashtab.h"
|
|
#include "tree-iterator.h"
|
|
#include "tree-pass.h"
|
|
|
|
/* This is a simple global reassociation pass that uses a combination
|
|
of heuristics and a hashtable to try to expose more operations to
|
|
CSE.
|
|
|
|
The basic idea behind the heuristic is to rank expressions by
|
|
depth of the computation tree and loop depth, and try to produce
|
|
expressions consisting of small rank operations, as they are more
|
|
likely to reoccur. In addition, we use a hashtable to try to see
|
|
if we can transpose an operation into something we have seen
|
|
before.
|
|
|
|
Note that the way the hashtable is structured will sometimes find
|
|
matches that will not expose additional redundancies, since it is
|
|
not unwound as we traverse back up one branch of the dominator
|
|
tree and down another. However, the cost of improving this is
|
|
probably not worth the additional benefits it will bring. */
|
|
|
|
/* Statistics */
|
|
static struct
|
|
{
|
|
int reassociated_by_rank;
|
|
int reassociated_by_match;
|
|
} reassociate_stats;
|
|
|
|
|
|
|
|
/* Seen binary operator hashtable. */
|
|
static htab_t seen_binops;
|
|
|
|
/* Binary operator struct. */
|
|
|
|
typedef struct seen_binop_d
|
|
{
|
|
tree op1;
|
|
tree op2;
|
|
} *seen_binop_t;
|
|
|
|
/* Return a SEEN_BINOP_T if we have seen an associative binary
|
|
operator with OP1 and OP2 in it. */
|
|
|
|
static seen_binop_t
|
|
find_seen_binop (tree op1, tree op2)
|
|
{
|
|
void **slot;
|
|
struct seen_binop_d sbd;
|
|
sbd.op1 = op1;
|
|
sbd.op2 = op2;
|
|
slot = htab_find_slot (seen_binops, &sbd, NO_INSERT);
|
|
if (!slot)
|
|
return NULL;
|
|
return ((seen_binop_t) *slot);
|
|
}
|
|
|
|
/* Insert a binary operator consisting of OP1 and OP2 into the
|
|
SEEN_BINOP table. */
|
|
|
|
static void
|
|
insert_seen_binop (tree op1, tree op2)
|
|
{
|
|
void **slot;
|
|
seen_binop_t new_pair = xmalloc (sizeof (*new_pair));
|
|
new_pair->op1 = op1;
|
|
new_pair->op2 = op2;
|
|
slot = htab_find_slot (seen_binops, new_pair, INSERT);
|
|
if (*slot != NULL)
|
|
free (*slot);
|
|
*slot = new_pair;
|
|
}
|
|
|
|
/* Return the hash value for a seen binop structure pointed to by P.
|
|
Because all the binops we consider are associative, we just add the
|
|
hash value for op1 and op2. */
|
|
|
|
static hashval_t
|
|
seen_binop_hash (const void *p)
|
|
{
|
|
const seen_binop_t sb = (seen_binop_t) p;
|
|
return iterative_hash_expr (sb->op1, 0) + iterative_hash_expr (sb->op2, 0);
|
|
}
|
|
|
|
/* Return true if two seen binop structures pointed to by P1 and P2 are equal.
|
|
We have to check the operators both ways because we don't know what
|
|
order they appear in the table. */
|
|
|
|
static int
|
|
seen_binop_eq (const void *p1, const void *p2)
|
|
{
|
|
const seen_binop_t sb1 = (seen_binop_t) p1;
|
|
const seen_binop_t sb2 = (seen_binop_t) p2;
|
|
return (sb1->op1 == sb2->op1 && sb1->op2 == sb2->op2)
|
|
|| (sb1->op2 == sb2->op1 && sb1->op1 == sb2->op2);
|
|
}
|
|
|
|
/* Value rank structure. */
|
|
|
|
typedef struct valrank_d
|
|
{
|
|
tree e;
|
|
unsigned int rank;
|
|
} *valrank_t;
|
|
|
|
/* Starting rank number for a given basic block, so that we can rank
|
|
operations using unmovable instructions in that BB based on the bb
|
|
depth. */
|
|
static unsigned int *bb_rank;
|
|
|
|
/* Value rank hashtable. */
|
|
static htab_t value_rank;
|
|
|
|
|
|
/* Look up the value rank structure for expression E. */
|
|
|
|
static valrank_t
|
|
find_value_rank (tree e)
|
|
{
|
|
void **slot;
|
|
struct valrank_d vrd;
|
|
vrd.e = e;
|
|
slot = htab_find_slot (value_rank, &vrd, NO_INSERT);
|
|
if (!slot)
|
|
return NULL;
|
|
return ((valrank_t) *slot);
|
|
}
|
|
|
|
/* Insert {E,RANK} into the value rank hashtable. */
|
|
|
|
static void
|
|
insert_value_rank (tree e, unsigned int rank)
|
|
{
|
|
void **slot;
|
|
valrank_t new_pair = xmalloc (sizeof (*new_pair));
|
|
new_pair->e = e;
|
|
new_pair->rank = rank;
|
|
slot = htab_find_slot (value_rank, new_pair, INSERT);
|
|
gcc_assert (*slot == NULL);
|
|
*slot = new_pair;
|
|
|
|
}
|
|
|
|
|
|
/* Return the hash value for a value rank structure */
|
|
|
|
static hashval_t
|
|
valrank_hash (const void *p)
|
|
{
|
|
const valrank_t vr = (valrank_t) p;
|
|
return iterative_hash_expr (vr->e, 0);
|
|
}
|
|
|
|
/* Return true if two value rank structures are equal. */
|
|
|
|
static int
|
|
valrank_eq (const void *p1, const void *p2)
|
|
{
|
|
const valrank_t vr1 = (valrank_t) p1;
|
|
const valrank_t vr2 = (valrank_t) p2;
|
|
return vr1->e == vr2->e;
|
|
}
|
|
|
|
|
|
/* Initialize the reassociation pass. */
|
|
|
|
static void
|
|
init_reassoc (void)
|
|
{
|
|
int i;
|
|
unsigned int rank = 2;
|
|
|
|
tree param;
|
|
int *bbs = xmalloc ((last_basic_block + 1) * sizeof (int));
|
|
|
|
memset (&reassociate_stats, 0, sizeof (reassociate_stats));
|
|
|
|
/* Reverse RPO (Reverse Post Order) will give us something where
|
|
deeper loops come later. */
|
|
flow_reverse_top_sort_order_compute (bbs);
|
|
bb_rank = xcalloc (last_basic_block + 1, sizeof (unsigned int));
|
|
value_rank = htab_create (511, valrank_hash,
|
|
valrank_eq, free);
|
|
seen_binops = htab_create (511, seen_binop_hash,
|
|
seen_binop_eq, free);
|
|
|
|
/* Give each argument a distinct rank. */
|
|
for (param = DECL_ARGUMENTS (current_function_decl);
|
|
param;
|
|
param = TREE_CHAIN (param))
|
|
{
|
|
if (default_def (param) != NULL)
|
|
{
|
|
tree def = default_def (param);
|
|
insert_value_rank (def, ++rank);
|
|
}
|
|
}
|
|
/* Give the chain decl a distinct rank. */
|
|
if (cfun->static_chain_decl != NULL)
|
|
{
|
|
tree def = default_def (cfun->static_chain_decl);
|
|
if (def != NULL)
|
|
insert_value_rank (def, ++rank);
|
|
}
|
|
|
|
/* Set up rank for each BB */
|
|
for (i = 0; i < n_basic_blocks; i++)
|
|
bb_rank[bbs[i]] = ++rank << 16;
|
|
|
|
free (bbs);
|
|
calculate_dominance_info (CDI_DOMINATORS);
|
|
|
|
}
|
|
|
|
/* Cleanup after the reassociation pass, and print stats if
|
|
requested. */
|
|
|
|
static void
|
|
fini_reassoc (void)
|
|
{
|
|
|
|
if (dump_file && (dump_flags & TDF_STATS))
|
|
{
|
|
fprintf (dump_file, "Reassociation stats:\n");
|
|
fprintf (dump_file, "Reassociated by rank: %d\n", reassociate_stats.reassociated_by_rank);
|
|
fprintf (dump_file, "Reassociated by match: %d\n", reassociate_stats.reassociated_by_match);
|
|
}
|
|
htab_delete (value_rank);
|
|
htab_delete (seen_binops);
|
|
free (bb_rank);
|
|
}
|
|
|
|
/* Given an expression E, return the rank of the expression. */
|
|
|
|
static unsigned int
|
|
get_rank (tree e)
|
|
{
|
|
valrank_t vr;
|
|
|
|
/* Constants have rank 0. */
|
|
if (is_gimple_min_invariant (e))
|
|
return 0;
|
|
|
|
/* SSA_NAME's have the rank of the expression they are the result
|
|
of.
|
|
For globals and uninitialized values, the rank is 0.
|
|
For function arguments, use the pre-setup rank.
|
|
For PHI nodes, stores, asm statements, etc, we use the rank of
|
|
the BB.
|
|
For simple operations, the rank is the maximum rank of any of
|
|
its operands, or the bb_rank, whichever is less.
|
|
I make no claims that this is optimal, however, it gives good
|
|
results. */
|
|
|
|
if (TREE_CODE (e) == SSA_NAME)
|
|
{
|
|
tree stmt;
|
|
tree rhs;
|
|
unsigned int rank, maxrank;
|
|
int i;
|
|
|
|
if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
|
|
&& e == default_def (SSA_NAME_VAR (e)))
|
|
return find_value_rank (e)->rank;
|
|
|
|
stmt = SSA_NAME_DEF_STMT (e);
|
|
if (bb_for_stmt (stmt) == NULL)
|
|
return 0;
|
|
|
|
if (TREE_CODE (stmt) != MODIFY_EXPR
|
|
|| !ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
|
|
return bb_rank[bb_for_stmt (stmt)->index];
|
|
|
|
/* If we already have a rank for this expression, use that. */
|
|
vr = find_value_rank (e);
|
|
if (vr)
|
|
return vr->rank;
|
|
|
|
/* Otherwise, find the maximum rank for the operands, or the bb
|
|
rank, whichever is less. */
|
|
rank = 0;
|
|
maxrank = bb_rank[bb_for_stmt(stmt)->index];
|
|
rhs = TREE_OPERAND (stmt, 1);
|
|
if (TREE_CODE_LENGTH (TREE_CODE (rhs)) == 0)
|
|
rank = MAX (rank, get_rank (rhs));
|
|
else
|
|
{
|
|
for (i = 0;
|
|
i < TREE_CODE_LENGTH (TREE_CODE (rhs))
|
|
&& TREE_OPERAND (rhs, i)
|
|
&& rank != maxrank; i++)
|
|
rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i)));
|
|
}
|
|
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Rank for ");
|
|
print_generic_expr (dump_file, e, 0);
|
|
fprintf (dump_file, " is %d\n", (rank + 1));
|
|
}
|
|
|
|
/* Note the rank in the hashtable so we don't recompute it. */
|
|
insert_value_rank (e, (rank + 1));
|
|
return (rank + 1);
|
|
}
|
|
|
|
/* Globals, etc, are rank 0 */
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* Decide whether we should transpose RHS and some operand of
|
|
LHSDEFOP.
|
|
If yes, then return true and set TAKEOP to the operand number of LHSDEFOP to
|
|
switch RHS for.
|
|
Otherwise, return false. */
|
|
|
|
static bool
|
|
should_transpose (tree rhs ATTRIBUTE_UNUSED,
|
|
unsigned int rhsrank,
|
|
tree lhsdefop, unsigned int *takeop)
|
|
{
|
|
/* Attempt to expose the low ranked
|
|
arguments to CSE if we have something like:
|
|
a = <rank 2> + c (rank 1)
|
|
b = a (rank 3) + d (rank 1)
|
|
We want to transform this into:
|
|
a = c + d
|
|
b = <rank 2> + <rank 3>
|
|
|
|
The op finding part wouldn't be necessary if
|
|
we could swap the operands above and not have
|
|
update_stmt change them back on us.
|
|
*/
|
|
unsigned int lowrankop;
|
|
unsigned int lowrank;
|
|
unsigned int highrank;
|
|
unsigned int highrankop;
|
|
unsigned int temp;
|
|
|
|
lowrankop = 0;
|
|
*takeop = 1;
|
|
lowrank = get_rank (TREE_OPERAND (lhsdefop, 0));
|
|
temp = get_rank (TREE_OPERAND (lhsdefop, 1));
|
|
highrank = temp;
|
|
highrankop = 1;
|
|
if (temp < lowrank)
|
|
{
|
|
lowrankop = 1;
|
|
highrankop = 0;
|
|
*takeop = 0;
|
|
highrank = lowrank;
|
|
lowrank = temp;
|
|
}
|
|
|
|
/* If highrank == lowrank, then we had something
|
|
like:
|
|
a = <rank 1> + <rank 1>
|
|
already, so there is no guarantee that
|
|
swapping our argument in is going to be
|
|
better.
|
|
If we run reassoc twice, we could probably
|
|
have a flag that switches this behavior on,
|
|
so that we try once without it, and once with
|
|
it, so that redundancy elimination sees it
|
|
both ways.
|
|
*/
|
|
|
|
if (lowrank == rhsrank && highrank != lowrank)
|
|
return true;
|
|
|
|
/* Also, see if the LHS's high ranked op should be switched with our
|
|
RHS simply because it is greater in rank than our current RHS. */
|
|
if (TREE_CODE (TREE_OPERAND (lhsdefop, highrankop)) == SSA_NAME)
|
|
{
|
|
tree iop = SSA_NAME_DEF_STMT (TREE_OPERAND (lhsdefop, highrankop));
|
|
if (TREE_CODE (iop) == MODIFY_EXPR)
|
|
iop = TREE_OPERAND (iop, 1);
|
|
if (TREE_CODE (iop) == TREE_CODE (lhsdefop))
|
|
*takeop = 1;
|
|
if (rhsrank < get_rank (TREE_OPERAND (lhsdefop, *takeop)))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Attempt to reassociate the associative binary operator BEXPR, which
|
|
is in the statement pointed to by CURRBSI. Return true if we
|
|
changed the statement. */
|
|
|
|
static bool
|
|
reassociate_expr (tree bexpr, block_stmt_iterator *currbsi)
|
|
{
|
|
tree lhs = TREE_OPERAND (bexpr, 0);
|
|
tree rhs = TREE_OPERAND (bexpr, 1);
|
|
tree lhsdef;
|
|
tree lhsi;
|
|
bool changed = false;
|
|
unsigned int lhsrank = get_rank (lhs);
|
|
unsigned int rhsrank = get_rank (rhs);
|
|
|
|
/* If unsafe math optimizations we can do reassociation for non-integral
|
|
types. */
|
|
if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
|
|
|| !INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
|
|
&& (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (rhs))
|
|
|| !SCALAR_FLOAT_TYPE_P (TREE_TYPE(lhs))
|
|
|| !flag_unsafe_math_optimizations))
|
|
return false;
|
|
|
|
/* We want the greater ranked operand to be our "LHS" for simplicity
|
|
sake. There is no point in actually modifying the expression, as
|
|
update_stmt will simply resort the operands anyway. */
|
|
if (lhsrank < rhsrank)
|
|
{
|
|
tree temp;
|
|
unsigned int temp1;
|
|
temp = lhs;
|
|
lhs = rhs;
|
|
rhs = temp;
|
|
temp1 = lhsrank;
|
|
lhsrank = rhsrank;
|
|
rhsrank = temp1;
|
|
}
|
|
|
|
/* If the high ranked operand is an SSA_NAME, and the binary
|
|
operator is not something we've already seen somewhere else
|
|
(i.e., it may be redundant), attempt to reassociate it.
|
|
|
|
We can't reassociate expressions unless the expression we are
|
|
going to reassociate with is only used in our current expression,
|
|
or else we may screw up other computations, like so:
|
|
|
|
a = b + c
|
|
e = a + d
|
|
|
|
g = a + f
|
|
|
|
We cannot reassociate and rewrite the "a = ..." ,
|
|
because that would change the value of the computation of
|
|
"g = a + f". */
|
|
if (TREE_CODE (lhs) == SSA_NAME && !find_seen_binop (lhs, rhs))
|
|
{
|
|
lhsdef = SSA_NAME_DEF_STMT (lhs);
|
|
if (TREE_CODE (lhsdef) == MODIFY_EXPR)
|
|
{
|
|
lhsi = TREE_OPERAND (lhsdef, 1);
|
|
if (TREE_CODE (lhsi) == TREE_CODE (bexpr))
|
|
{
|
|
use_operand_p use;
|
|
tree usestmt;
|
|
if (single_imm_use (lhs, &use, &usestmt))
|
|
{
|
|
unsigned int takeop = 0;
|
|
unsigned int otherop = 1;
|
|
bool foundmatch = false;
|
|
bool foundrank = false;
|
|
|
|
/* If we can easily transpose this into an operation
|
|
we've already seen, let's do that.
|
|
otherwise, let's try to expose low ranked ops to
|
|
CSE. */
|
|
if (find_seen_binop (TREE_OPERAND (lhsi, 1), rhs))
|
|
{
|
|
takeop = 0;
|
|
otherop = 1;
|
|
foundmatch = true;
|
|
}
|
|
else if (find_seen_binop (TREE_OPERAND (lhsi, 0),
|
|
rhs))
|
|
{
|
|
takeop = 1;
|
|
otherop = 0;
|
|
foundmatch = true;
|
|
}
|
|
else if (should_transpose (rhs, rhsrank, lhsi,
|
|
&takeop))
|
|
{
|
|
foundrank = true;
|
|
}
|
|
if (foundmatch || foundrank)
|
|
{
|
|
block_stmt_iterator lhsbsi = bsi_for_stmt (lhsdef);
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "Reassociating by %s\n",
|
|
foundmatch ? "match" : "rank");
|
|
fprintf (dump_file, "Before LHS:");
|
|
print_generic_stmt (dump_file, lhsi, 0);
|
|
fprintf (dump_file, "Before curr expr:");
|
|
print_generic_stmt (dump_file, bexpr, 0);
|
|
}
|
|
TREE_OPERAND (bexpr, 0) = TREE_OPERAND (lhsi, takeop);
|
|
TREE_OPERAND (lhsi, takeop) = rhs;
|
|
TREE_OPERAND (bexpr, 1) = TREE_OPERAND (lhsdef, 0);
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
fprintf (dump_file, "After LHS:");
|
|
print_generic_stmt (dump_file, lhsi, 0);
|
|
fprintf (dump_file, "After curr expr:");
|
|
print_generic_stmt (dump_file, bexpr, 0);
|
|
}
|
|
bsi_move_before (&lhsbsi, currbsi);
|
|
update_stmt (lhsdef);
|
|
update_stmt (bsi_stmt (*currbsi));
|
|
lhsbsi = bsi_for_stmt (lhsdef);
|
|
update_stmt (bsi_stmt (lhsbsi));
|
|
|
|
/* If update_stmt didn't reorder our operands,
|
|
we'd like to recurse on the expression we
|
|
just reassociated and reassociate it
|
|
top-down, exposing further opportunities.
|
|
Unfortunately, update_stmt does reorder them,
|
|
so we can't do this cheaply. */
|
|
if (!foundmatch)
|
|
reassociate_stats.reassociated_by_rank++;
|
|
else
|
|
reassociate_stats.reassociated_by_match++;
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return changed;
|
|
}
|
|
|
|
/* Reassociate expressions in basic block BB and its dominator as
|
|
children , return true if any
|
|
expressions changed. */
|
|
|
|
static bool
|
|
reassociate_bb (basic_block bb)
|
|
{
|
|
bool changed = false;
|
|
block_stmt_iterator bsi;
|
|
basic_block son;
|
|
|
|
for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
|
|
{
|
|
tree stmt = bsi_stmt (bsi);
|
|
|
|
if (TREE_CODE (stmt) == MODIFY_EXPR)
|
|
{
|
|
tree rhs = TREE_OPERAND (stmt, 1);
|
|
if (associative_tree_code (TREE_CODE (rhs)))
|
|
{
|
|
if (reassociate_expr (rhs, &bsi))
|
|
{
|
|
changed = true;
|
|
update_stmt (stmt);
|
|
}
|
|
insert_seen_binop (TREE_OPERAND (rhs, 0),
|
|
TREE_OPERAND (rhs, 1));
|
|
}
|
|
}
|
|
}
|
|
for (son = first_dom_son (CDI_DOMINATORS, bb);
|
|
son;
|
|
son = next_dom_son (CDI_DOMINATORS, son))
|
|
{
|
|
changed |= reassociate_bb (son);
|
|
}
|
|
return changed;
|
|
}
|
|
|
|
|
|
static bool
|
|
do_reassoc (void)
|
|
{
|
|
bool changed = false;
|
|
|
|
changed = reassociate_bb (ENTRY_BLOCK_PTR);
|
|
|
|
return changed;
|
|
}
|
|
|
|
|
|
/* Gate and execute functions for Reassociation. */
|
|
|
|
static void
|
|
execute_reassoc (void)
|
|
{
|
|
init_reassoc ();
|
|
do_reassoc ();
|
|
fini_reassoc ();
|
|
}
|
|
|
|
struct tree_opt_pass pass_reassoc =
|
|
{
|
|
"reassoc", /* name */
|
|
NULL, /* gate */
|
|
execute_reassoc, /* execute */
|
|
NULL, /* sub */
|
|
NULL, /* next */
|
|
0, /* static_pass_number */
|
|
TV_TREE_REASSOC, /* tv_id */
|
|
PROP_cfg | PROP_ssa | PROP_alias, /* properties_required */
|
|
0, /* properties_provided */
|
|
0, /* properties_destroyed */
|
|
0, /* todo_flags_start */
|
|
TODO_update_ssa | TODO_dump_func
|
|
| TODO_ggc_collect | TODO_verify_ssa, /* todo_flags_finish */
|
|
0 /* letter */
|
|
};
|