forked from cseagle/blc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
condexe.hh
237 lines (228 loc) · 11.9 KB
/
condexe.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// \file condexe.hh
/// \brief Classes for simplifying control-flow with shared conditional expressions
#ifndef __CONDEXE__
#define __CONDEXE__
#include "funcdata.hh"
/// \brief A helper class for describing the similarity of the boolean condition between 2 CBRANCH operations
///
/// This class determines if two CBRANCHs share the same condition. It also determines if the conditions
/// are complements of each other, and/or they are shared along only one path.
///
/// The expression computing the root boolean value for one CBRANCH is marked out
/// by setupInitOp(). For the other CBRANCH, findMatch() tries to find common Varnode
/// in its boolean expression and then maps a critical path from the Varnode to the final boolean.
/// Assuming the common Varnode exists, the method finalJudgement() decides if the two boolean values
/// are the same, uncorrelated, or complements of one another.
class ConditionMarker {
PcodeOp *initop; ///< The root CBRANCH operation to compare against
Varnode *basevn; ///< The boolean Varnode on which the root CBRANCH keys
Varnode *boolvn; ///< If \b basevn is defined by BOOL_NEGATE, this is the unnegated Varnode
Varnode *bool2vn; ///< If the first param to \b binaryop is defined by BOOL_NEGATE, this is the unnegated Varnode
Varnode *bool3vn; ///< If the second param to \b binaryop is defined by BOOL_NEGATE, this is the unnegated Varnode
PcodeOp *binaryop; ///< The binary operator producing the root boolean (if non-null)
bool matchflip; ///< True if the compared CBRANCH keys on the opposite boolean value of the root
int4 state; ///< Depth of critical path
PcodeOp *opstate[2]; ///< p-code operations along the critical path
bool flipstate[2]; ///< Boolean negation along the critical path
int4 slotstate[2]; ///< Input Varnode to follow to stay on critical path
bool multion; ///< True if MULTIEQUAL used in condition
bool binon; ///< True if a binary operator is used in condition
int4 multislot; ///< Input slot of MULTIEQUAL on critical path, -1 if no MULTIEQUAL
void setupInitOp(PcodeOp *op); ///< Map out the root boolean expression
Varnode *findMatch(PcodeOp *op); ///< Find a matching Varnode in the root expression producing the given CBRANCH boolean
bool sameOpComplement(PcodeOp *bin1op, PcodeOp *bin2op);
bool andOrComplement(PcodeOp *bin1op, PcodeOp *bin2op);
bool finalJudgement(Varnode *vn);
public:
ConditionMarker(void); ///< Constructor
~ConditionMarker(void); ///< Destructor
bool verifyCondition(PcodeOp *op, PcodeOp *initop); ///< Perform the correlation test on two CBRANCH operations
int4 getMultiSlot(void) const { return multislot; } ///< Get the MULTIEQUAL slot in the critical path
bool getFlip(void) const { return matchflip; } ///< Return \b true is the expressions are anti-correlated
static bool varnodeSame(Varnode *a,Varnode *b);
static bool varnodeComplement(Varnode *a,Varnode *b);
};
/// \brief A class for simplifying a series of conditionally executed statements.
///
/// This class tries to perform transformations like the following:
/// \code
/// if (a) { if (a) {
/// BODY1
/// } ==> BODY1
/// if (a) { BODY2
/// BODY2
/// } }
/// \endcode
/// Other similar configurations where two CBRANCHs are based on
/// the same condition are handled. The main variation, referred to as a
/// \b directsplit in the code looks like:
/// \code
/// if (a) { if (a && new_boolean()) {
/// a = new_boolean();
/// } ==> BODY1
/// if (a) {
/// BODY1
/// } }
/// \endcode
/// The value of 'a' doesn't need to be reevaluated if it is false.
///
/// In the first scenario, there is a block where two flows come
/// together but don't need to, as the evaluation of the boolean
/// is redundant. This block is the \b iblock. The original
/// evaluation of the boolean occurs in \b initblock. There are
/// two paths from here to \b iblock, called the \b prea path and \b preb path,
/// either of which may contain additional 1in/1out blocks.
/// There are also two paths out of \b iblock, \b posta, and \b postb.
/// The ConditionalExecution class determines if the CBRANCH in
/// \b iblock is redundant by determining if the boolean value is
/// either the same as, or the complement of, the boolean value
/// in \b initblock. If the CBRANCH is redundant, \b iblock is
/// removed, linking \b prea to \b posta and \b preb to \b postb (or vice versa
/// depending on whether the booleans are complements of each other).
/// If \b iblock is to be removed, modifications to data-flow made
/// by \b iblock must be preserved. For MULTIEQUALs in \b iblock,
/// reads are examined to see if they came from the \b posta path,
/// or the \b postb path, then the are replaced by the MULTIEQUAL
/// slot corresponding to the matching \b prea or \b preb branch. If
/// \b posta and \b postb merge at an \b exitblock, the MULTIEQUAL must
/// be pushed into the \b exitblock and reads which can't be
/// attributed to the \b posta or \b postb path are replaced by the
/// \b exitblock MULTIEQUAL.
///
/// In theory, other operations performed in \b iblock could be
/// pushed into \b exitblock if they are not read in the \b posta
/// or \b postb paths, but currently
/// non MULTIEQUAL operations in \b iblock terminate the action.
///
/// In the second scenario, the boolean evaluated in \b initblock
/// remains unmodified along only one of the two paths out, \b prea
/// or \b reb. The boolean in \b iblock (modulo complementing) will
/// evaluate in the same way. We define \b posta as the path out of
/// \b iblock that will be followed by this unmodified path. The
/// transform that needs to be made is to have the unmodified path
/// out of \b initblock flow immediately into the \b posta path without
/// having to reevalute the condition in \b iblock. \b iblock is not
/// removed because flow from the "modified" path may also flow
/// into \b posta, depending on how the boolean was modified.
/// Adjustments to data-flow are similar to the first scenario but
/// slightly more complicated. The first block along the \b posta
/// path is referred to as the \b posta_block, this block will
/// have a new block flowing into it.
class ConditionalExecution {
Funcdata *fd; ///< Function being analyzed
PcodeOp *cbranch; ///< CBRANCH in iblock
BlockBasic *initblock; ///< The initial block computing the boolean value
BlockBasic *iblock; ///< The block where flow is (unnecessarily) coming together
int4 prea_inslot; ///< iblock->In(prea_inslot) = pre a path
bool init2a_true; ///< Does \b true branch (in terms of iblock) go to path pre a
bool iblock2posta_true; ///< Does \b true branch go to path post a
int4 camethruposta_slot; ///< init or pre slot to use, for data-flow thru post
int4 posta_outslot; ///< The \b out edge from iblock to posta
BlockBasic *posta_block; ///< First block in posta path
BlockBasic *postb_block; ///< First block in postb path
bool directsplit; ///< True if this the \e direct \e split variation
map<int4,Varnode *> replacement; ///< Map from block to replacement Varnode for (current) Varnode
vector<PcodeOp *> returnop; ///< RETURN ops that have flow coming out of the iblock
vector<bool> heritageyes; ///< Boolean array indexed by address space indicating whether the space is heritaged
void buildHeritageArray(void);
bool testIBlock(void);
bool findInitPre(void); ///< Find \b initblock, based on \b iblock
bool verifySameCondition(void); ///< Verify that \b initblock and \b iblock branch on the same condition
bool testOpRead(Varnode *vn,PcodeOp *op); ///< Can we move the (non MULTIEQUAL) defining p-code of the given Varnode
bool testMultiRead(Varnode *vn,PcodeOp *op); ///< Can we mave the MULTIEQUAL defining p-code of the given Varnode
bool testRemovability(PcodeOp *op); ///< Test if the given PcodeOp can be removed from \b iblock
void predefineDirectMulti(PcodeOp *op);
void adjustDirectMulti(void); ///< Update inputs to any MULTIEQUAL in the direct block
Varnode *getNewMulti(PcodeOp *op,BlockBasic *bl);
Varnode *getReplacementRead(PcodeOp *op,BlockBasic *bl);
void doReplacement(PcodeOp *op); ///< Replace the data-flow for the given PcodeOp in \b iblock
void fixReturnOp(void);
bool verify(void); ///< Verify that we have a removable \b iblock
public:
ConditionalExecution(Funcdata *f); ///< Constructor
bool trial(BlockBasic *ib); ///< Test for a modifiable configuration around the given block
void execute(void); ///< Eliminate the unnecessary path join at \b iblock
};
/// \brief Search for and remove various forms of redundant CBRANCH operations
///
/// This action wraps the analysis performed by ConditionalExecution to simplify control-flow
/// that repeatedly branches on the same (or slightly modified) boolean expression.
class ActionConditionalExe : public Action {
public:
ActionConditionalExe(const string &g) : Action(0,"conditionalexe",g) {} ///< Constructor
virtual Action *clone(const ActionGroupList &grouplist) const {
if (!grouplist.contains(getGroup())) return (Action *)0;
return new ActionConditionalExe(getGroup());
}
virtual int4 apply(Funcdata &data);
};
/// \brief Simplify predication constructions involving the INT_OR operator
///
/// In this form of predication, two variables are set based on a condition and then ORed together.
/// Both variables may be set to zero, or to some other value, based on the condition
/// and the zero values are such that at least one of the variables is zero.
/// \code
/// tmp1 = cond ? val1 : 0;
/// tmp2 = cond ? 0 : val2;
/// result = tmp1 | tmp2;
/// \endcode
/// The RuleOrPredicate simplifies this to
/// \code
/// if (cond) result = val1; else result = val2;
/// \endcode
/// or to be precise
/// \code
/// newtmp = val1 ? val2; // Using a new MULTIEQUAL
/// result = newtmp;
/// \endcode
/// In an alternate form we have
/// \code
/// tmp1 = (val2 == 0) ? val1 : 0
/// result = tmp1 | val2;
/// \endcode
/// again, one of val1 or val2 must be zero, so this gets replaced with
/// \code
/// tmp1 = val1 ? val2
/// result = tmp1
/// \endcode
class RuleOrPredicate : public Rule {
/// \brief A helper class to mark up predicated INT_OR expressions
struct MultiPredicate {
PcodeOp *op; ///< Base MULTIEQUAL op
int4 zeroSlot; ///< Input slot containing path that sets zero
const FlowBlock *zeroBlock; ///< Final block in path that sets zero
const FlowBlock *condBlock; ///< Conditional block determining if zero is set or not
PcodeOp *cbranch; ///< CBRANCH determining if zero is set
Varnode *otherVn; ///< Other (non-zero) Varnode getting set on other path
bool zeroPathIsTrue; ///< True if path to zero set is the \b true path out of condBlock
bool discoverZeroSlot(Varnode *vn);
bool discoverCbranch(void);
void discoverPathIsTrue(void);
bool discoverConditionalZero(Varnode *vn);
};
int4 checkSingle(Varnode *vn,MultiPredicate &branch,PcodeOp *op,Funcdata &data);
public:
RuleOrPredicate(const string &g) : Rule(g, 0, "orpredicate") {} ///< Constructor
virtual Rule *clone(const ActionGroupList &grouplist) const {
if (!grouplist.contains(getGroup())) return (Rule *)0;
return new RuleOrPredicate(getGroup());
}
virtual void getOpList(vector<uint4> &oplist) const;
virtual int4 applyOp(PcodeOp *op,Funcdata &data);
};
#endif