Skip to content

Commit

Permalink
Merge pull request #2084 from jpstotz/FillArrayData
Browse files Browse the repository at this point in the history
Fix array type detection on fill-array-data DEX instructions - fixes #1806
  • Loading branch information
StevenArzt authored May 27, 2024
2 parents 249f856 + c5165e0 commit 944263d
Show file tree
Hide file tree
Showing 4 changed files with 202 additions and 94 deletions.
2 changes: 1 addition & 1 deletion src/main/java/soot/dexpler/DexBody.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicReference;

import org.jf.dexlib2.analysis.ClassPath;
import org.jf.dexlib2.analysis.ClassPathResolver;
Expand Down Expand Up @@ -771,6 +770,7 @@ public Body jimplify(Body b, SootMethod m) {
DeadAssignmentEliminator.v().transform(jBody);
UnconditionalBranchFolder.v().transform(jBody);
}
DexFillArrayDataTransformer.v().transform(jBody);

TypeAssigner.v().transform(jBody);

Expand Down
166 changes: 166 additions & 0 deletions src/main/java/soot/dexpler/DexFillArrayDataTransformer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
package soot.dexpler;

/*-
* #%L
* Soot - a J*va Optimization Framework
* %%
* Copyright (C) 2012 Michael Markert, Frank Hartmann
*
* (c) 2012 University of Luxembourg - Interdisciplinary Centre for
* Security Reliability and Trust (SnT) - All rights reserved
* Alexandre Bartel
*
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 2.1 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Lesser Public License for more details.
*
* You should have received a copy of the GNU General Lesser Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/lgpl-2.1.html>.
* #L%
*/

import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import soot.ArrayType;
import soot.Body;
import soot.BodyTransformer;
import soot.G;
import soot.Local;
import soot.Type;
import soot.Unit;
import soot.Value;
import soot.dexpler.instructions.FillArrayDataInstruction;
import soot.dexpler.typing.UntypedConstant;
import soot.jimple.ArrayRef;
import soot.jimple.AssignStmt;
import soot.jimple.InvokeExpr;
import soot.jimple.NewArrayExpr;
import soot.toolkits.graph.ExceptionalUnitGraph;
import soot.toolkits.graph.ExceptionalUnitGraphFactory;
import soot.toolkits.scalar.LocalDefs;

/**
* If Dalvik bytecode can contain <code>fill-array-data</code> instructions that can fill an array with data elements we only
* know the element size of.
*
* Therefore when processing such instructions in {@link FillArrayDataInstruction} we don't know the exact type of the data
* that is loaded. Because of (conditional) branches in the code, identifying the type is not always possible at that stage.
* Instead {@link UntypedConstant} constants are used. These constants are processed by this transformer and get their final
* type.
*
*
* @author Jan Peter Stotz
*
*/
public class DexFillArrayDataTransformer extends BodyTransformer {
private static final Logger logger = LoggerFactory.getLogger(DexFillArrayDataTransformer.class);

private static final int MAX_RECURSION_DEPTH = 5;

public static DexFillArrayDataTransformer v() {
return new DexFillArrayDataTransformer();
}

protected void internalTransform(final Body body, String phaseName, Map<String, String> options) {
final ExceptionalUnitGraph g = ExceptionalUnitGraphFactory.createExceptionalUnitGraph(body, DalvikThrowAnalysis.v());
final LocalDefs defs = G.v().soot_toolkits_scalar_LocalDefsFactory().newLocalDefs(g);

for (Iterator<Unit> unitIt = body.getUnits().snapshotIterator(); unitIt.hasNext();) {
Unit u = unitIt.next();
if (!(u instanceof AssignStmt)) {
continue;
}
AssignStmt ass = (AssignStmt) u;
Value rightOp = ass.getRightOp();
if (rightOp instanceof UntypedConstant) {
Value left = ass.getLeftOp();
if (left instanceof ArrayRef) {
ArrayRef leftArray = (ArrayRef) left;

Local l = (Local) leftArray.getBase();
List<Type> arrayTypes = new LinkedList<>();
checkArrayDefinitions(l, ass, defs, arrayTypes, MAX_RECURSION_DEPTH);
if (arrayTypes.isEmpty()) {
throw new InternalError("Failed to determine the array type ");
}
if (arrayTypes.size() > 1) {
arrayTypes = arrayTypes.stream().distinct().collect(Collectors.toList());
if (arrayTypes.size() > 1) {
logger.warn("Found multiple possible array types, using first ignoreing the others: {}", arrayTypes);
}
}

// We found the array type, now convert the untyped constant value to it's final type
Type elementType = arrayTypes.get(0);
Value constant = ass.getRightOp();
UntypedConstant untyped = (UntypedConstant) constant;
ass.setRightOp(untyped.defineType(elementType));
}
}
}
}

/**
* Check the all available definitions of the current array to detect the array type and thus the type of the data loaded
* by the array-fill-data instruction.
*
* @param l
* local the array we are interested in is saved in
* @param u
* unit we start our search
* @param defs
* @param arrayTypes
* result list containing the discovered array type(s)
* @param maxDepth
*/
private void checkArrayDefinitions(Local l, Unit u, LocalDefs defs, List<Type> arrayTypes, int maxDepth) {
if (maxDepth <= 0) {
// Avoid infinite recursion
logger.warn("Recursion depth limit reached - aborting");
return;
}
List<Unit> assDefs = defs.getDefsOfAt(l, u);
for (Unit d : assDefs) {
if (d instanceof AssignStmt) {
AssignStmt arrayAssign = (AssignStmt) d;
Value source = arrayAssign.getRightOp();
if (source instanceof NewArrayExpr) {
// array is assigned from a newly created array
NewArrayExpr newArray = (NewArrayExpr) source;
arrayTypes.add(newArray.getBaseType());
} else if (source instanceof InvokeExpr) {
// array is assigned from the return value of a function
InvokeExpr invExpr = (InvokeExpr) source;
Type aType = invExpr.getMethodRef().getReturnType();
if (!(aType instanceof ArrayType)) {
throw new InternalError("Failed to identify the array type. The identified method invocation "
+ "does not return an array type. Invocation: " + invExpr.getMethodRef());
}
arrayTypes.add(((ArrayType) aType).getArrayElementType());
} else if (source instanceof Local) {
// our array is defined by an assignment from another array => check the definition of that other array.
Local newLocal = (Local) source; // local of the "other array"
checkArrayDefinitions(newLocal, d, defs, arrayTypes, maxDepth - 1);
} else {
throw new InternalError("Unsupported array definition statement: " + d);
}
}
}

}
}
119 changes: 26 additions & 93 deletions src/main/java/soot/dexpler/instructions/FillArrayDataInstruction.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,41 +27,35 @@
* #L%
*/

import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.jf.dexlib2.iface.instruction.Instruction;
import org.jf.dexlib2.iface.instruction.formats.ArrayPayload;
import org.jf.dexlib2.iface.instruction.formats.Instruction22c;
import org.jf.dexlib2.iface.instruction.formats.Instruction31t;
import org.jf.dexlib2.iface.reference.TypeReference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import soot.ArrayType;
import soot.BooleanType;
import soot.ByteType;
import soot.CharType;
import soot.DoubleType;
import soot.FloatType;
import soot.IntType;
import soot.Local;
import soot.LongType;
import soot.ShortType;
import soot.Type;
import soot.dexpler.DexBody;
import soot.dexpler.DexType;
import soot.dexpler.DexFillArrayDataTransformer;
import soot.dexpler.typing.UntypedConstant;
import soot.dexpler.typing.UntypedIntOrFloatConstant;
import soot.dexpler.typing.UntypedLongOrDoubleConstant;
import soot.jimple.ArrayRef;
import soot.jimple.AssignStmt;
import soot.jimple.DoubleConstant;
import soot.jimple.FloatConstant;
import soot.jimple.Constant;
import soot.jimple.IntConstant;
import soot.jimple.Jimple;
import soot.jimple.LongConstant;
import soot.jimple.NumericConstant;
import soot.jimple.Stmt;

/**
* Converts <code>fill-array-data</code> instructions and associated data blocks into a series of assignment instructions
* (one for each array index the data block contains a value).
*
* As the data block contains untyped data, only the number of bytes per element is known. Recovering the array type at the
* stage this class is used on would require a detailed analysis on the dex code. Therefore we save the data elements as
* {@link UntypedConstant} and later use {@link DexFillArrayDataTransformer} to convert the values to their final type.
*/
public class FillArrayDataInstruction extends PseudoInstruction {
private static final Logger logger = LoggerFactory.getLogger(FillArrayDataInstruction.class);

Expand Down Expand Up @@ -95,13 +89,11 @@ public void jimplify(DexBody body) {
List<Number> elements = arrayTable.getArrayElements();
int numElements = elements.size();

int elementsWidth = arrayTable.getElementWidth();
Stmt firstAssign = null;
for (int i = 0; i < numElements; i++) {
ArrayRef arrayRef = Jimple.v().newArrayRef(arrayReference, IntConstant.v(i));
NumericConstant element = getArrayElement(elements.get(i), body, destRegister);
if (element == null) {
break;
}
Constant element = getArrayElement(elements.get(i), elementsWidth);
AssignStmt assign = Jimple.v().newAssignStmt(arrayRef, element);
addTags(assign);
body.add(assign);
Expand All @@ -110,6 +102,8 @@ public void jimplify(DexBody body) {
}
}
if (firstAssign == null) { // if numElements == 0. Is it possible?
logger.warn("No assign statements created for array at address 0x{} - empty array data section?",
Integer.toHexString(targetAddress));
firstAssign = Jimple.v().newNopStmt();
body.add(firstAssign);
}
Expand All @@ -122,80 +116,19 @@ public void jimplify(DexBody body) {

}

private NumericConstant getArrayElement(Number element, DexBody body, int arrayRegister) {

List<DexlibAbstractInstruction> instructions = body.instructionsBefore(this);
Set<Integer> usedRegisters = new HashSet<Integer>();
usedRegisters.add(arrayRegister);

Type elementType = null;
Outer: for (DexlibAbstractInstruction i : instructions) {
if (usedRegisters.isEmpty()) {
break;
}

for (int reg : usedRegisters) {
if (i instanceof NewArrayInstruction) {
NewArrayInstruction newArrayInstruction = (NewArrayInstruction) i;
Instruction22c instruction22c = (Instruction22c) newArrayInstruction.instruction;
if (instruction22c.getRegisterA() == reg) {
ArrayType arrayType = (ArrayType) DexType.toSoot((TypeReference) instruction22c.getReference());
elementType = arrayType.getElementType();
break Outer;
}
}
}

// // look for obsolete registers
// for (int reg : usedRegisters) {
// if (i.overridesRegister(reg)) {
// usedRegisters.remove(reg);
// break; // there can't be more than one obsolete
// }
// }

// look for new registers
for (int reg : usedRegisters) {
int newRegister = i.movesToRegister(reg);
if (newRegister != -1) {
usedRegisters.add(newRegister);
usedRegisters.remove(reg);
break; // there can't be more than one new
}
}
private Constant getArrayElement(Number element, int elementsWidth) {
if (elementsWidth == 2) {
// For size = 2 the only possible array type is short[]
return IntConstant.v(element.shortValue());
}

if (elementType == null) {
// throw new InternalError("Unable to find array type to type array elements!");
logger.warn("Unable to find array type to type array elements! Array was not defined! (obfuscated bytecode?)");
return null;
}

NumericConstant value;

if (elementType instanceof BooleanType) {
value = IntConstant.v(element.intValue());
IntConstant ic = (IntConstant) value;
if (ic.value != 0) {
value = IntConstant.v(1);
}
} else if (elementType instanceof ByteType) {
value = IntConstant.v(element.byteValue());
} else if (elementType instanceof CharType || elementType instanceof ShortType) {
value = IntConstant.v(element.shortValue());
} else if (elementType instanceof DoubleType) {
value = DoubleConstant.v(Double.longBitsToDouble(element.longValue()));
} else if (elementType instanceof FloatType) {
value = FloatConstant.v(Float.intBitsToFloat(element.intValue()));
} else if (elementType instanceof IntType) {
value = IntConstant.v(element.intValue());
} else if (elementType instanceof LongType) {
value = LongConstant.v(element.longValue());
} else {
throw new RuntimeException("Invalid Array Type occured in FillArrayDataInstruction: " + elementType);
if (elementsWidth <= 4) {
// can be array of int, char, boolean, float
return UntypedIntOrFloatConstant.v(element.intValue());
}
return value;

// can be array of long or double
return UntypedLongOrDoubleConstant.v(element.longValue());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,22 @@ public IntConstant toIntConstant() {
return IntConstant.v(value);
}

public IntConstant toBooleanConstant() {
if (value != 0) {
return IntConstant.v(1);
}
return IntConstant.v(value);
}

@Override
public Value defineType(Type t) {
if (t instanceof FloatType) {
return this.toFloatConstant();
} else if (t instanceof IntType || t instanceof CharType || t instanceof BooleanType || t instanceof ByteType
|| t instanceof ShortType) {
return this.toIntConstant();
} else if (t instanceof BooleanType) {
return toBooleanConstant();
} else {
if (value == 0 && t instanceof RefLikeType) {
return NullConstant.v();
Expand Down

0 comments on commit 944263d

Please sign in to comment.