diff --git a/console/build.sbt b/console/build.sbt index 0b47db9aadb8..9587509bcc2a 100644 --- a/console/build.sbt +++ b/console/build.sbt @@ -12,6 +12,7 @@ dependsOn( Projects.macros, Projects.javasrc2cpg, Projects.jssrc2cpg, + Projects.php2cpg, Projects.pysrc2cpg, Projects.rubysrc2cpg, Projects.x2cpg % "compile->compile;test->test" diff --git a/console/src/main/scala/io/joern/console/cpgcreation/PhpCpgGenerator.scala b/console/src/main/scala/io/joern/console/cpgcreation/PhpCpgGenerator.scala index 5452a4050437..aaf18fd0c297 100644 --- a/console/src/main/scala/io/joern/console/cpgcreation/PhpCpgGenerator.scala +++ b/console/src/main/scala/io/joern/console/cpgcreation/PhpCpgGenerator.scala @@ -1,15 +1,21 @@ package io.joern.console.cpgcreation import io.joern.console.FrontendConfig +import io.joern.php2cpg.{Config, Frontend, Php2Cpg} +import io.joern.x2cpg.X2Cpg +import io.joern.x2cpg.passes.frontend.XTypeRecoveryConfig +import io.shiftleft.codepropertygraph.Cpg import java.nio.file.Path import scala.util.Try case class PhpCpgGenerator(config: FrontendConfig, rootPath: Path) extends CpgGenerator { - private lazy val command: Path = if (isWin) rootPath.resolve("php2cpg.bat") else rootPath.resolve("php2cpg") + private lazy val command: Path = if (isWin) rootPath.resolve("php2cpg.bat") else rootPath.resolve("php2cpg") + private var phpConfig: Option[Config] = None override def generate(inputPath: String, outputPath: String): Try[String] = { val arguments = List(inputPath) ++ Seq("-o", outputPath) ++ config.cmdLineParams + phpConfig = X2Cpg.parseCommandLine(arguments.toArray, Frontend.cmdLineParser, Config()) runShellCommand(command.toString, arguments).map(_ => outputPath) } @@ -17,4 +23,9 @@ case class PhpCpgGenerator(config: FrontendConfig, rootPath: Path) extends CpgGe command.toFile.exists override def isJvmBased = true + + override def applyPostProcessingPasses(cpg: Cpg): Cpg = { + Php2Cpg.postProcessingPasses(cpg, phpConfig).foreach(_.createAndApply()) + cpg + } } diff --git a/joern-cli/frontends/php2cpg/src/main/resources/known_function_signatures.txt b/joern-cli/frontends/php2cpg/src/main/resources/known_function_signatures.txt new file mode 100644 index 000000000000..a0c9aad5e141 --- /dev/null +++ b/joern-cli/frontends/php2cpg/src/main/resources/known_function_signatures.txt @@ -0,0 +1,56 @@ +// function name; r1, r2; p1_t1, p1_t2; p2_t1; ... +add_post_meta; int, bool; int; string; mixed; bool +apply_filters; mixed; string; mixed; mixed +array_map; array; callable, null; array; array; array +array_merge; array; array; array; array +array_walk_recursive; bool; array, object; callable; mixed +base64_decode; string; string; bool +base64_encode; string; string +count; int; array, countable; int +current; mixed; array, object +do_action; ; string; mixed; +echo; void; string +empty; bool; mixed +explode; array; string; string; int +floatval; float; mixed +in_array; bool; mixed; array; bool +intval; int; mixed +is_array; bool; mixed +is_bool; bool; mixed +is_double; bool; mixed +is_float; bool; mixed +is_int; bool; mixed +is_integer; bool; mixed +is_iterable; bool; mixed +is_long; bool; mixed +is_null; bool; mixed +is_numeric; bool; mixed +is_object; bool; mixed +is_real; bool; mixed +is_resource; bool; mixed +is_scalar; bool; mixed +is_string; bool; mixed +isset; bool; mixed; array; bool +list; array; mixed; mixed; mixed; mixed +maybe_unserialize; mixed; string +number_format; string; float; int; string, null; string, null +preg_match; int, bool; string; string; array; int; int +preg_match_all; int, bool; string; string; array; int; int +preg_replace; string, array, null; string, array; string, array; string, array; int; int +printf; int; string; mixed; mixed; mixed; mixed +rawurldecode; string; string +rtrim; string; string; string +selected; string; mixed; mixed; bool +serialize; string; mixed +sort; bool; array; int +sprintf; string; string; mixed +strip_tags; string; string; array, string, null +strpos; int, bool; string; string; int +strtolower; string; string +strtotime; int, bool; string; int, null +substr; string; string; int; int, null +trim; string; string; string +unserialize; mixed; string; array +urldecode; string; string +var_dump; ; mixed; mixed +wp_json_encode; string,bool; mixed; int; int \ No newline at end of file diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Main.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Main.scala index 4f1bbd1f216c..9461ba1fcc8e 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Main.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Main.scala @@ -1,13 +1,15 @@ package io.joern.php2cpg import io.joern.x2cpg.{X2CpgConfig, X2CpgMain} +import io.joern.x2cpg.passes.frontend.{TypeRecoveryParserConfig, XTypeRecovery} import io.joern.php2cpg.Frontend._ import scopt.OParser /** Command line configuration parameters */ final case class Config(phpIni: Option[String] = None, phpParserBin: Option[String] = None) - extends X2CpgConfig[Config] { + extends X2CpgConfig[Config] + with TypeRecoveryParserConfig[Config] { def withPhpIni(phpIni: String): Config = { copy(phpIni = Some(phpIni)).withInheritedFields(this) } @@ -17,7 +19,7 @@ final case class Config(phpIni: Option[String] = None, phpParserBin: Option[Stri } } -private object Frontend { +object Frontend { implicit val defaultConfig: Config = Config() @@ -31,7 +33,8 @@ private object Frontend { .text("php.ini path used by php-parser. Defaults to php.ini shipped with Joern."), opt[String]("php-parser-bin") .action((x, c) => c.withPhpParserBin(x)) - .text("path to php-parser.phar binary. Defaults to php-parser shipped with Joern.") + .text("path to php-parser.phar binary. Defaults to php-parser shipped with Joern."), + XTypeRecovery.parserOptions ) } } diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala index 6ffaf4362a5f..8363a2b9b4ce 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala @@ -1,12 +1,21 @@ package io.joern.php2cpg import io.joern.php2cpg.parser.PhpParser -import io.joern.php2cpg.passes.{AnyTypePass, AstCreationPass, AstParentInfoPass, ClosureRefPass, LocalCreationPass} +import io.joern.php2cpg.passes.{ + AnyTypePass, + AstCreationPass, + AstParentInfoPass, + ClosureRefPass, + LocalCreationPass, + PhpSetKnownTypesPass, + PhpTypeRecoveryPass +} import io.joern.x2cpg.X2Cpg.withNewEmptyCpg import io.joern.x2cpg.X2CpgFrontend -import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass} +import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass, XTypeRecoveryConfig} import io.joern.x2cpg.utils.ExternalCommand import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.passes.CpgPassBase import io.shiftleft.codepropertygraph.generated.Languages import org.slf4j.LoggerFactory @@ -68,3 +77,13 @@ class Php2Cpg extends X2CpgFrontend[Config] { } } + +object Php2Cpg { + + def postProcessingPasses(cpg: Cpg, config: Option[Config] = None): List[CpgPassBase] = { + val typeRecoveryConfig = config + .map(c => XTypeRecoveryConfig(c.typePropagationIterations, !c.disableDummyTypes)) + .getOrElse(XTypeRecoveryConfig(iterations = 3)) + List(new PhpSetKnownTypesPass(cpg), new PhpTypeRecoveryPass(cpg, typeRecoveryConfig)) + } +} diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/PhpSetKnownTypes.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/PhpSetKnownTypes.scala new file mode 100644 index 000000000000..6eda8df54f9c --- /dev/null +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/PhpSetKnownTypes.scala @@ -0,0 +1,76 @@ +package io.joern.php2cpg.passes + +import better.files.File +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.passes.ForkJoinParallelCpgPass +import io.shiftleft.codepropertygraph.generated.nodes._ +import io.shiftleft.codepropertygraph.generated.PropertyNames +import io.shiftleft.codepropertygraph.generated.Operators +import io.shiftleft.semanticcpg.language._ +import io.shiftleft.semanticcpg.language.operatorextension.OpNodes +import org.slf4j.{Logger, LoggerFactory} +import overflowdb.BatchedUpdate + +import scala.io.Source +import java.io.{File => JFile} + +// Corresponds to a parsed row in the known functions file +case class KnownFunction( + name: String, + // return types. A function has at most one return value, but with one or more types. + rTypes: Seq[String] = Seq.empty, + // Index 0 = parameter at P0. A function has potentially multiple parameters, each with one or more types. + pTypes: Seq[Seq[String]] = Seq.empty +) + +/** Sets the return and parameter types for builtin functions with known function signatures. + * + * TODO: Need to handle variadic arguments. + */ +class PhpSetKnownTypesPass(cpg: Cpg, knownTypesFile: Option[JFile] = None) + extends ForkJoinParallelCpgPass[KnownFunction](cpg) { + + private val logger = LoggerFactory.getLogger(getClass) + + override def generateParts(): Array[KnownFunction] = { + /* parse file and return each row as a KnownFunction object */ + val source = knownTypesFile match { + case Some(file) => Source.fromFile(file) + case _ => Source.fromResource("known_function_signatures.txt") + } + val contents = source.getLines().filterNot(_.startsWith("//")) + val arr = contents.flatMap(line => createKnownFunctionFromLine(line)).toArray + source.close + arr + } + + override def runOnPart(builder: overflowdb.BatchedUpdate.DiffGraphBuilder, part: KnownFunction): Unit = { + /* calculate the result of this part - this is done as a concurrent task */ + val builtinMethod = cpg.method.fullNameExact(part.name).l + builtinMethod.foreach(mNode => { + setTypes(builder, mNode.methodReturn, part.rTypes) + (mNode.parameter.l zip part.pTypes).map((p, pTypes) => setTypes(builder, p, pTypes)) + }) + } + + def createKnownFunctionFromLine(line: String): Option[KnownFunction] = { + line.split(";").map(_.strip).toList match { + case Nil => None + case name :: Nil => Some(KnownFunction(name)) + case name :: rTypes :: Nil => Some(KnownFunction(name, scanReturnTypes(rTypes))) + case name :: rTypes :: pTypes => Some(KnownFunction(name, scanReturnTypes(rTypes), scanParamTypes(pTypes))) + } + } + + /* From comma separated list of types, create list of types. */ + def scanReturnTypes(rTypesRaw: String): Seq[String] = rTypesRaw.split(",").map(_.strip).toSeq + + /* From a semicolon separated list of parameters, each with a comma separated list of types, + * create a list of lists of types. */ + def scanParamTypes(pTypesRawArr: List[String]): Seq[Seq[String]] = + pTypesRawArr.map(paramTypeRaw => paramTypeRaw.split(",").map(_.strip).toSeq).toSeq + + protected def setTypes(builder: overflowdb.BatchedUpdate.DiffGraphBuilder, n: StoredNode, types: Seq[String]): Unit = + if (types.size == 1) builder.setNodeProperty(n, PropertyNames.TYPE_FULL_NAME, types.head) + else builder.setNodeProperty(n, PropertyNames.DYNAMIC_TYPE_HINT_FULL_NAME, types) +} diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/PhpTypeRecovery.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/PhpTypeRecovery.scala new file mode 100644 index 000000000000..9e6bd11cd869 --- /dev/null +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/PhpTypeRecovery.scala @@ -0,0 +1,285 @@ +package io.joern.php2cpg.passes + +import io.joern.x2cpg.Defines +import io.joern.x2cpg.passes.frontend._ +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes._ +import io.shiftleft.codepropertygraph.generated.{Operators, PropertyNames, DispatchTypes} +import io.shiftleft.semanticcpg.language._ +import io.shiftleft.semanticcpg.language.operatorextension.OpNodes +import io.shiftleft.semanticcpg.language.operatorextension.OpNodes.{Assignment, FieldAccess} +import overflowdb.BatchedUpdate.DiffGraphBuilder + +import scala.annotation.tailrec +import scala.collection.mutable + +class PhpTypeRecoveryPass(cpg: Cpg, config: XTypeRecoveryConfig = XTypeRecoveryConfig(iterations = 3)) + extends XTypeRecoveryPass[NamespaceBlock](cpg, config) { + + override protected def generateRecoveryPass(state: XTypeRecoveryState): XTypeRecovery[NamespaceBlock] = + new PhpTypeRecovery(cpg, state) +} + +private class PhpTypeRecovery(cpg: Cpg, state: XTypeRecoveryState) extends XTypeRecovery[NamespaceBlock](cpg, state) { + + override def compilationUnit: Iterator[NamespaceBlock] = cpg.file.namespaceBlock.iterator + + override def generateRecoveryForCompilationUnitTask( + unit: NamespaceBlock, + builder: DiffGraphBuilder + ): RecoverForXCompilationUnit[NamespaceBlock] = { + val newConfig = state.config.copy(enabledDummyTypes = state.isFinalIteration && state.config.enabledDummyTypes) + new RecoverForPhpFile(cpg, unit, builder, state.copy(config = newConfig)) + } +} + +private class RecoverForPhpFile(cpg: Cpg, cu: NamespaceBlock, builder: DiffGraphBuilder, state: XTypeRecoveryState) + extends RecoverForXCompilationUnit[NamespaceBlock](cpg, cu, builder, state) { + override protected def prepopulateSymbolTableEntry(x: AstNode): Unit = x match { + case x: Call => + x.methodFullName match { + case Operators.alloc => + case _ => symbolTable.append(x, (x.methodFullName +: x.dynamicTypeHintFullName).toSet) + } + case _ => super.prepopulateSymbolTableEntry(x) + } + + protected val methodTypesTable = mutable.Map[Method, mutable.HashSet[String]]() + + override def isConstructor(c: Call): Boolean = + isConstructor(c.name) && c.code.endsWith(")") + + override protected def isConstructor(name: String): Boolean = + !name.isBlank && name.charAt(0).isUpper + + override def assignments: Iterator[Assignment] = + cu.ast.isCall.nameExact(Operators.assignment).map(new OpNodes.Assignment(_)) + + protected def unresolvedDynamicCalls: Iterator[Call] = cu.ast.isCall + .filter(_.dispatchType == DispatchTypes.DYNAMIC_DISPATCH) + .filter(_.methodFullName.startsWith(Defines.UnresolvedNamespace)) + + /* Register post-processing pass that executes in the super class */ + override protected def postSetTypeInformation(): Unit = { + unresolvedDynamicCalls.foreach(visitUnresolvedDynamicCall) + } + override protected def visitIdentifierAssignedToConstructor(i: Identifier, c: Call): Set[String] = { + val constructorPaths = symbolTable.get(c).map(_.stripSuffix(s"${pathSep}")) + associateTypes(i, constructorPaths) + } + + override protected def visitIdentifierAssignedToCallRetVal(i: Identifier, c: Call): Set[String] = { + + if (symbolTable.contains(c)) { + val callReturns = methodReturnValues(symbolTable.get(c).toSeq) + associateTypes(i, callReturns) + } else if (c.argument.exists(_.argumentIndex == 0)) { + val callFullNames = (c.argument(0) match { + case i: Identifier if symbolTable.contains(LocalVar(i.name)) => symbolTable.get(LocalVar(i.name)) + case i: Identifier if symbolTable.contains(CallAlias(i.name)) => symbolTable.get(CallAlias(i.name)) + case _ => Set.empty + }).map(_.concat(s"$pathSep${c.name}")).toSeq + val callReturns = methodReturnValues(callFullNames) + associateTypes(i, callReturns) + } else { + /* CPG may already contain type info for this method (globally, outside of compilation) + * unit. If not, use dummy return value. + */ + val rs = methodReturnValues(Seq(c.methodFullName)) + if (rs.isEmpty) associateTypes(i, Set(s"${c.name}$pathSep${XTypeRecovery.DummyReturnType}")) + else associateTypes(i, rs) + } + } + + override protected def visitReturns(ret: Return): Unit = { + /* A bug in XTypeRecovery mishandles functions that have multiple return + * statements. We add a new "symbol table" (methodTypesTable) for method + * return types as they get collected across the multiple return statements + * for a single function. + */ + val m = ret.method + val existingTypes = mutable.HashSet.from( + (m.methodReturn.typeFullName +: m.methodReturn.dynamicTypeHintFullName) + .filterNot(_ == "ANY") + .filterNot(_.startsWith(Defines.UnresolvedNamespace)) + ) + existingTypes.addAll(methodTypesTable.getOrElse(m, mutable.HashSet())) + + @tailrec + def extractTypes(xs: List[CfgNode]): Set[String] = xs match { + case ::(head: Literal, Nil) if head.typeFullName != "ANY" => + Set(head.typeFullName) + case ::(head: Call, Nil) if head.name == Operators.fieldAccess => + val fieldAccess = new FieldAccess(head) + val (sym, ts) = getSymbolFromCall(fieldAccess) + val cpgTypes = cpg.typeDecl + .fullNameExact(ts.map(_.compUnitFullName).toSeq: _*) + .member + .nameExact(sym.identifier) + .flatMap(m => m.typeFullName +: m.dynamicTypeHintFullName) + .filterNot { x => x == "ANY" || x == "this" } + .toSet + if (cpgTypes.nonEmpty) cpgTypes + else symbolTable.get(sym) + case ::(head: Call, Nil) if symbolTable.contains(head) => + val callPaths = symbolTable.get(head) + val returnValues = methodReturnValues(callPaths.toSeq) + if (returnValues.isEmpty) + callPaths.map(c => s"$c$pathSep${XTypeRecovery.DummyReturnType}") + else + returnValues + case ::(head: Call, Nil) if head.argumentOut.headOption.exists(symbolTable.contains) => + symbolTable + .get(head.argumentOut.head) + .map(t => Seq(t, head.name, XTypeRecovery.DummyReturnType).mkString(pathSep.toString)) + case ::(identifier: Identifier, Nil) if symbolTable.contains(identifier) => + symbolTable.get(identifier) + case ::(head: Call, Nil) => + extractTypes(head.argument.l) + case _ => Set.empty + } + val returnTypes = extractTypes(ret.argumentOut.l) + existingTypes.addAll(returnTypes) + + /* Check whether method return is already known, and if so, remove dummy value */ + val saveTypes = existingTypes.filterNot(typeName => { + if (typeName.startsWith(Defines.UnresolvedNamespace)) + true + else if (typeName.endsWith(s"${XTypeRecovery.DummyReturnType}")) + typeName.split(pathSep).headOption match { + case Some(methodName) => { + val methodReturns = methodReturnValues(Seq(methodName)) + .filterNot(_.endsWith(s"${XTypeRecovery.DummyReturnType}")) + !methodReturns.isEmpty + } + case None => false + } + else + false + }) + methodTypesTable.update(m, saveTypes) + builder.setNodeProperty(ret.method.methodReturn, PropertyNames.DYNAMIC_TYPE_HINT_FULL_NAME, saveTypes) + } + + /* Necessary to change the filter regex from (this|self) to (\\$this|this), in order to account for $this PHP + * convention. + */ + override protected def associateTypes(symbol: LocalVar, fa: FieldAccess, types: Set[String]): Set[String] = { + fa.astChildren.filterNot(_.code.matches("(\\$this|this|self)")).headOption.collect { + case fi: FieldIdentifier => + getFieldParents(fa).foreach(t => persistMemberWithTypeDecl(t, fi.canonicalName, types)) + case i: Identifier if isField(i) => + getFieldParents(fa).foreach(t => persistMemberWithTypeDecl(t, i.name, types)) + } + symbolTable.append(symbol, types) + } + + /* Reference the PythonTypeRecovery implementation. The XTypeRecovery one seems incorrect. */ + override protected def getFieldParents(fa: FieldAccess): Set[String] = { + if (fa.method.name == "") { + Set(fa.method.fullName) + } else if (fa.method.typeDecl.nonEmpty) { + val parentTypes = fa.method.typeDecl.fullName.toSet + val baseTypeFullNames = cpg.typeDecl.fullNameExact(parentTypes.toSeq: _*).inheritsFromTypeFullName.toSet + (parentTypes ++ baseTypeFullNames).filterNot(_.matches("(?i)(any|object)")) + } else { + super.getFieldParents(fa) + } + } + + override protected def getTypesFromCall(c: Call): Set[String] = c.name match { + case Operators.fieldAccess => symbolTable.get(LocalVar(getFieldName(new FieldAccess(c)))) + case _ if symbolTable.contains(c) => symbolTable.get(c) + case Operators.indexAccess => getIndexAccessTypes(c) + case n => methodReturnValues(Seq(c.methodFullName)) + } + + override protected def indexAccessToCollectionVar(c: Call): Option[CollectionVar] = { + def callName(x: Call) = + if (x.name == Operators.fieldAccess) + getFieldName(new FieldAccess(x)) + else if (x.name == Operators.indexAccess) + indexAccessToCollectionVar(x) + .map(cv => s"${cv.identifier}[${cv.idx}]") + .getOrElse(XTypeRecovery.DummyIndexAccess) + else x.name + + val collectionVar = Option(c.argumentOut.l match { + case List(i: Identifier, idx: Literal) => CollectionVar(i.name, idx.code) + case List(i: Identifier, idx: Identifier) => CollectionVar(i.name, idx.code) + case List(c: Call, idx: Call) => CollectionVar(callName(c), callName(idx)) + case List(c: Call, idx: Literal) => CollectionVar(callName(c), idx.code) + case List(c: Call, idx: Identifier) => CollectionVar(callName(c), idx.code) + case xs => + logger.debug(s"Unhandled index access ${xs.map(x => (x.label, x.code)).mkString(",")} @ ${c.name}") + null + }) + + collectionVar + } + override protected def assignTypesToCall(x: Call, types: Set[String]): Set[String] = { + if (types.nonEmpty) { + getSymbolFromCall(x) match { + case (lhs, globalKeys) if globalKeys.nonEmpty => { + globalKeys.foreach { (fieldVar: FieldPath) => + persistMemberWithTypeDecl(fieldVar.compUnitFullName, fieldVar.identifier, types) + } + symbolTable.append(lhs, types) + } + case (lhs, _) => symbolTable.append(lhs, types) + } + } else Set.empty + } + + override protected def methodReturnValues(methodFullNames: Seq[String]): Set[String] = { + /* Look up methods in existing CPG */ + val rs = cpg.method + .fullNameExact(methodFullNames: _*) + .methodReturn + .flatMap(mr => mr.typeFullName +: mr.dynamicTypeHintFullName) + .filterNot(_ == "ANY") + .filterNot(_.endsWith("alloc.")) + .filterNot(_.endsWith(s"${XTypeRecovery.DummyReturnType}")) + .toSet + if (rs.isEmpty) + /* Return dummy return type if not found */ + methodFullNames + .flatMap(m => Set(m.concat(s"$pathSep${XTypeRecovery.DummyReturnType}"))) + .toSet + else rs + } + + /* If we know the type of the method's first parameter, use that to determine the method scope. + * + * TODO: Are there methods / instances where this doesn't work? Static methods? + * TODO: What if the first parameter could take multiple types? + * TODO: Test on nested dynamic calls, e.g. foo->bar->baz() + */ + protected def visitUnresolvedDynamicCall(c: Call): Unit = { + + if (c.argument.exists(_.argumentIndex == 0)) { + c.argument(0) match { + case p: Identifier => { + val ts = (p.typeFullName +: p.dynamicTypeHintFullName) + .filterNot(_ == "ANY") + .distinct + ts match { + case Seq() => + case Seq(t) => { + val newFullName = t + "->" + c.name + builder.setNodeProperty(c, PropertyNames.METHOD_FULL_NAME, newFullName) + builder.setNodeProperty( + c, + PropertyNames.TYPE_FULL_NAME, + s"${newFullName}$pathSep${XTypeRecovery.DummyReturnType}" + ) + builder.setNodeProperty(c, PropertyNames.DYNAMIC_TYPE_HINT_FULL_NAME, Seq.empty) + } + case _ => { /* TODO: case where multiple possible types are identified */ } + } + } + case _ => + } + } + } +} diff --git a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/passes/PhpTypeRecoveryPassTests.scala b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/passes/PhpTypeRecoveryPassTests.scala new file mode 100644 index 000000000000..68866c288f36 --- /dev/null +++ b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/passes/PhpTypeRecoveryPassTests.scala @@ -0,0 +1,425 @@ +package io.joern.php2cpg.passes + +import io.joern.php2cpg.testfixtures.PhpCode2CpgFixture +import io.shiftleft.semanticcpg.language._ + +class PhpTypeRecoveryPassTests extends PhpCode2CpgFixture() { + + /* TODO: Future tests to specify correct type recovery behaviors: + * - Method call inherited from a super class should be recovered + * - A type hint on a parameter should be sufficient to resolve method full names at calls + * - Parameter types on builtins with variadic parameters + */ + + "literals declared from built-in types" should { + lazy val cpg = code(""" + |attrA; + | } + |} + |function foo_instantiate_classA() { + | $a = new ClassA(); + | return $a; + |} + |""".stripMargin).cpg + + "recover type of class member assigned to literal" in { + val List(attrA) = cpg.typeDecl("ClassA").member.name("attrA").take(1).l + attrA.typeFullName shouldBe "int" + } + + "recover type of method that returns class member" in { + val List(barMethod) = cpg.typeDecl("ClassA").method.name("bar").take(1).l + barMethod.methodReturn.dynamicTypeHintFullName shouldBe Seq("int") + } + + "recover type of object that instantiates a class" in { + val List(aObject) = cpg.identifier("a").take(1).l + aObject.typeFullName shouldBe "ClassA" + } + + "recover type of function that returns object" in { + val List(fooMethod) = cpg.method("foo_instantiate_classA").take(1).l + fooMethod.methodReturn.dynamicTypeHintFullName shouldBe Seq("ClassA") + } + } + + "functions that return multiple objects" should { + lazy val cpg = code(""" + |foo; + | } + |} + |class ClassB { + | private $foo = 0; + | + | function baz() { + | return $this->foo; + | } + |} + | + |function foo_return_different_objects($type_param) { + | if ($type_param == 0) { + | $a = new ClassA(); + | } else { + | $a = new ClassB(); + | } + | return $a; + |} + |""".stripMargin).cpg + + "recover both possible types for local variable" in { + val List(aIdentifier) = cpg.identifier("a").take(1).l + aIdentifier.dynamicTypeHintFullName shouldBe Seq("ClassA", "ClassB") + } + + "recover both possible types for function return" in { + val List(fooMethod) = cpg.method("foo_return_different_objects").take(1).l + fooMethod.methodReturn.dynamicTypeHintFullName shouldBe Seq("ClassA", "ClassB") + } + } + + /* Joern's PHP front-end does not currently handle comments. This test is + * ignored, but should be revisited when comments are handled. + */ + "functions with docblock type information" should { + lazy val cpg = code(""" + |foo = 1; + | } + | + | function get_foo() { + | return $this->foo; + | } + |} + |""".stripMargin).cpg + + "identify class member type from setter" in { + val List(fooMember) = cpg.typeDecl("ClassA").member.name("foo").take(1).l + fooMember.typeFullName shouldBe "int" + } + + "identify getter return type from class member" in { + val List(getterMethod) = cpg.method("get_foo").take(1).l + getterMethod.methodReturn.dynamicTypeHintFullName shouldBe Seq("int") + } + } + + "functions with multiple return statements with two different types" should { + lazy val cpg = code(""" + |.indexAccess").take(1).l + indexAccessCall.typeFullName shouldBe "int" + } + } + + "function declarations with type hints" should { + lazy val cpg = code(""" + |foo(); + | } + |} + | + |function baz() { + | $a = new ClassA(); + | return $a->foo(); + |} + """.stripMargin).cpg + + "be properly resolved when called with $this" in { + val List(fooCall) = cpg.method("bar").ast.isCall.take(1).l + fooCall.methodFullName shouldBe "ClassA->foo" + } + + "be properly resolved when called through class with known type" in { + val List(fooCall) = cpg.method("baz").ast.isCall.filter(_.code == "$a->foo()").take(1).l + fooCall.methodFullName shouldBe "ClassA->foo" + } + + "propagate type information to calling method" in { + val List(bazMethod) = cpg.method("baz").take(1).l + bazMethod.methodReturn.dynamicTypeHintFullName shouldBe Seq("int") + } + + "propagate type information to calling method when called with $this" in { + val List(barMethod) = cpg.method("bar").take(1).l + barMethod.methodReturn.dynamicTypeHintFullName shouldBe Seq("int") + } + } + + "modules that import modules" should { + lazy val cpg = code( + """ + |foo(); + |} + |""".stripMargin, + "useA.php" + ) + + "recover the type of object instantiated from imported module class" in { + val List(aIdentifier) = cpg.identifier("a").take(1).l + aIdentifier.typeFullName shouldBe "ClassA" + } + + "recover method return value assigned from class method" in { + val List(barMethod) = cpg.method("bar").take(1).l + barMethod.methodReturn.dynamicTypeHintFullName shouldBe Seq("int") + } + } +} diff --git a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala index 2bbbb3714d17..15a9c7f3ee50 100644 --- a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala +++ b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala @@ -3,6 +3,7 @@ package io.joern.php2cpg.testfixtures import io.joern.dataflowengineoss.queryengine.EngineContext import io.joern.php2cpg.{Config, Php2Cpg} import io.joern.x2cpg.testfixtures.{Code2CpgFixture, DefaultTestCpg, LanguageFrontend} +import io.joern.x2cpg.passes.frontend.XTypeRecoveryConfig import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.semanticcpg.language.{ICallResolver, NoResolve} @@ -12,6 +13,7 @@ import io.joern.x2cpg.X2Cpg import io.shiftleft.semanticcpg.layers.LayerCreatorContext import io.joern.dataflowengineoss.layers.dataflows.OssDataFlowOptions import io.joern.dataflowengineoss.layers.dataflows.OssDataFlow +import io.joern.php2cpg.passes.PhpSetKnownTypesPass trait PhpFrontend extends LanguageFrontend { override val fileSuffix: String = ".php" @@ -31,8 +33,8 @@ class PhpTestCpg(runOssDataflow: Boolean) extends TestCpg with PhpFrontend { val options = new OssDataFlowOptions() new OssDataFlow(options).run(context) } + Php2Cpg.postProcessingPasses(this).foreach(_.createAndApply()) } - } class PhpCode2CpgFixture(runOssDataflow: Boolean = false)