From 66089b8680151460e73fdf6fc9e2f96907ab6029 Mon Sep 17 00:00:00 2001 From: andreas Date: Wed, 15 Nov 2023 14:22:52 -0500 Subject: [PATCH] [php2cpg] Php type recovery (#3723) * Create PHP Type Recovery pass * Remove erroneous types from symbolTable * Implement builtins type recovery lookup * Update PhpTypeRecovery pass to lookup in CPG * Remove overrides of methods that are meant to be private * Run scalafmt * Create initial test file for PhpTypeRecoveryPass * Add PhpTypeRecoveryPassTests * Add PhpSetKnownTypes pass for builtin PHP functions Pass will set the types for builtin functions with known function signatures. This pass will run before the PhpTypeRecovery pass. Currently, the pass does not handle variadic parameters. * Minor update to tests for SetKnownTypes pass * Fix formatting with scalafmt * WIP: Refactor PhpSetKnowTypes pass * WIP: Refactor PhpSetKnownTypes pass to be cleaner Currently the method return types are set correctly, but need to still update the parameter types. * Refactor PhpSetKnownTypesPass as a ForkJoinParallelCpgPass * Ignore tests that depend on context sensitivity and comments * Ignore more context sensitive tests * Remove multi-line debug statements * Remove builtins table from PhpTypeRecoveryPass * Fix how multiple return statements are handled This bug comes from the XTypeRecovery pass, but is fixed in PhpTypeRecovery by overriding the visitReturns method. If this approach (using another "symbol table" for methods) is sufficient, it should also be fixed abstract parent class. * Fix typo in PhpTypeRecoveryPassTests * Filter out dummy return values when saving types * Refactor the dummy type removal to be less coarse * Resolve unknown namespaces with new type info * Ignore array field type recovery tests for now * Run scalafmt * Fix MatchError in resolving dynamic calls * Implement Php2Cpg.postProcessingPasses for deduplication This defines the list of default PHP postProcessingPasses in one location, so that they can all be applied without code duplication between the different places where passes are applied, like PhpCpgGenerator and PhpCode2CpgFixture. Additionally, the XTypeRecoveryConfig options are now exposed to the frontend command line arguments for PHP analysis as a result of this refactor. * Refactor PhpSetKnowTypes use idiomatic scala matches * Replace hardcoded string with defined value * Remove excessive debug statements and format comments * Run test:scalafmt * Address minor comments - Changed string equality comparisons from equals() to == - Changed "" to "" as top-level method namespace - Removed errant debug statement --- console/build.sbt | 1 + .../console/cpgcreation/PhpCpgGenerator.scala | 13 +- .../resources/known_function_signatures.txt | 56 +++ .../main/scala/io/joern/php2cpg/Main.scala | 9 +- .../main/scala/io/joern/php2cpg/Php2Cpg.scala | 23 +- .../php2cpg/passes/PhpSetKnownTypes.scala | 76 ++++ .../php2cpg/passes/PhpTypeRecovery.scala | 285 ++++++++++++ .../passes/PhpTypeRecoveryPassTests.scala | 425 ++++++++++++++++++ .../testfixtures/PhpCode2CpgFixture.scala | 4 +- 9 files changed, 885 insertions(+), 7 deletions(-) create mode 100644 joern-cli/frontends/php2cpg/src/main/resources/known_function_signatures.txt create mode 100644 joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/PhpSetKnownTypes.scala create mode 100644 joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/PhpTypeRecovery.scala create mode 100644 joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/passes/PhpTypeRecoveryPassTests.scala diff --git a/console/build.sbt b/console/build.sbt index 0b47db9aadb8..9587509bcc2a 100644 --- a/console/build.sbt +++ b/console/build.sbt @@ -12,6 +12,7 @@ dependsOn( Projects.macros, Projects.javasrc2cpg, Projects.jssrc2cpg, + Projects.php2cpg, Projects.pysrc2cpg, Projects.rubysrc2cpg, Projects.x2cpg % "compile->compile;test->test" diff --git a/console/src/main/scala/io/joern/console/cpgcreation/PhpCpgGenerator.scala b/console/src/main/scala/io/joern/console/cpgcreation/PhpCpgGenerator.scala index 5452a4050437..aaf18fd0c297 100644 --- a/console/src/main/scala/io/joern/console/cpgcreation/PhpCpgGenerator.scala +++ b/console/src/main/scala/io/joern/console/cpgcreation/PhpCpgGenerator.scala @@ -1,15 +1,21 @@ package io.joern.console.cpgcreation import io.joern.console.FrontendConfig +import io.joern.php2cpg.{Config, Frontend, Php2Cpg} +import io.joern.x2cpg.X2Cpg +import io.joern.x2cpg.passes.frontend.XTypeRecoveryConfig +import io.shiftleft.codepropertygraph.Cpg import java.nio.file.Path import scala.util.Try case class PhpCpgGenerator(config: FrontendConfig, rootPath: Path) extends CpgGenerator { - private lazy val command: Path = if (isWin) rootPath.resolve("php2cpg.bat") else rootPath.resolve("php2cpg") + private lazy val command: Path = if (isWin) rootPath.resolve("php2cpg.bat") else rootPath.resolve("php2cpg") + private var phpConfig: Option[Config] = None override def generate(inputPath: String, outputPath: String): Try[String] = { val arguments = List(inputPath) ++ Seq("-o", outputPath) ++ config.cmdLineParams + phpConfig = X2Cpg.parseCommandLine(arguments.toArray, Frontend.cmdLineParser, Config()) runShellCommand(command.toString, arguments).map(_ => outputPath) } @@ -17,4 +23,9 @@ case class PhpCpgGenerator(config: FrontendConfig, rootPath: Path) extends CpgGe command.toFile.exists override def isJvmBased = true + + override def applyPostProcessingPasses(cpg: Cpg): Cpg = { + Php2Cpg.postProcessingPasses(cpg, phpConfig).foreach(_.createAndApply()) + cpg + } } diff --git a/joern-cli/frontends/php2cpg/src/main/resources/known_function_signatures.txt b/joern-cli/frontends/php2cpg/src/main/resources/known_function_signatures.txt new file mode 100644 index 000000000000..a0c9aad5e141 --- /dev/null +++ b/joern-cli/frontends/php2cpg/src/main/resources/known_function_signatures.txt @@ -0,0 +1,56 @@ +// function name; r1, r2; p1_t1, p1_t2; p2_t1; ... +add_post_meta; int, bool; int; string; mixed; bool +apply_filters; mixed; string; mixed; mixed +array_map; array; callable, null; array; array; array +array_merge; array; array; array; array +array_walk_recursive; bool; array, object; callable; mixed +base64_decode; string; string; bool +base64_encode; string; string +count; int; array, countable; int +current; mixed; array, object +do_action; ; string; mixed; +echo; void; string +empty; bool; mixed +explode; array; string; string; int +floatval; float; mixed +in_array; bool; mixed; array; bool +intval; int; mixed +is_array; bool; mixed +is_bool; bool; mixed +is_double; bool; mixed +is_float; bool; mixed +is_int; bool; mixed +is_integer; bool; mixed +is_iterable; bool; mixed +is_long; bool; mixed +is_null; bool; mixed +is_numeric; bool; mixed +is_object; bool; mixed +is_real; bool; mixed +is_resource; bool; mixed +is_scalar; bool; mixed +is_string; bool; mixed +isset; bool; mixed; array; bool +list; array; mixed; mixed; mixed; mixed +maybe_unserialize; mixed; string +number_format; string; float; int; string, null; string, null +preg_match; int, bool; string; string; array; int; int +preg_match_all; int, bool; string; string; array; int; int +preg_replace; string, array, null; string, array; string, array; string, array; int; int +printf; int; string; mixed; mixed; mixed; mixed +rawurldecode; string; string +rtrim; string; string; string +selected; string; mixed; mixed; bool +serialize; string; mixed +sort; bool; array; int +sprintf; string; string; mixed +strip_tags; string; string; array, string, null +strpos; int, bool; string; string; int +strtolower; string; string +strtotime; int, bool; string; int, null +substr; string; string; int; int, null +trim; string; string; string +unserialize; mixed; string; array +urldecode; string; string +var_dump; ; mixed; mixed +wp_json_encode; string,bool; mixed; int; int \ No newline at end of file diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Main.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Main.scala index 4f1bbd1f216c..9461ba1fcc8e 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Main.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Main.scala @@ -1,13 +1,15 @@ package io.joern.php2cpg import io.joern.x2cpg.{X2CpgConfig, X2CpgMain} +import io.joern.x2cpg.passes.frontend.{TypeRecoveryParserConfig, XTypeRecovery} import io.joern.php2cpg.Frontend._ import scopt.OParser /** Command line configuration parameters */ final case class Config(phpIni: Option[String] = None, phpParserBin: Option[String] = None) - extends X2CpgConfig[Config] { + extends X2CpgConfig[Config] + with TypeRecoveryParserConfig[Config] { def withPhpIni(phpIni: String): Config = { copy(phpIni = Some(phpIni)).withInheritedFields(this) } @@ -17,7 +19,7 @@ final case class Config(phpIni: Option[String] = None, phpParserBin: Option[Stri } } -private object Frontend { +object Frontend { implicit val defaultConfig: Config = Config() @@ -31,7 +33,8 @@ private object Frontend { .text("php.ini path used by php-parser. Defaults to php.ini shipped with Joern."), opt[String]("php-parser-bin") .action((x, c) => c.withPhpParserBin(x)) - .text("path to php-parser.phar binary. Defaults to php-parser shipped with Joern.") + .text("path to php-parser.phar binary. Defaults to php-parser shipped with Joern."), + XTypeRecovery.parserOptions ) } } diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala index 6ffaf4362a5f..8363a2b9b4ce 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala @@ -1,12 +1,21 @@ package io.joern.php2cpg import io.joern.php2cpg.parser.PhpParser -import io.joern.php2cpg.passes.{AnyTypePass, AstCreationPass, AstParentInfoPass, ClosureRefPass, LocalCreationPass} +import io.joern.php2cpg.passes.{ + AnyTypePass, + AstCreationPass, + AstParentInfoPass, + ClosureRefPass, + LocalCreationPass, + PhpSetKnownTypesPass, + PhpTypeRecoveryPass +} import io.joern.x2cpg.X2Cpg.withNewEmptyCpg import io.joern.x2cpg.X2CpgFrontend -import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass} +import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass, XTypeRecoveryConfig} import io.joern.x2cpg.utils.ExternalCommand import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.passes.CpgPassBase import io.shiftleft.codepropertygraph.generated.Languages import org.slf4j.LoggerFactory @@ -68,3 +77,13 @@ class Php2Cpg extends X2CpgFrontend[Config] { } } + +object Php2Cpg { + + def postProcessingPasses(cpg: Cpg, config: Option[Config] = None): List[CpgPassBase] = { + val typeRecoveryConfig = config + .map(c => XTypeRecoveryConfig(c.typePropagationIterations, !c.disableDummyTypes)) + .getOrElse(XTypeRecoveryConfig(iterations = 3)) + List(new PhpSetKnownTypesPass(cpg), new PhpTypeRecoveryPass(cpg, typeRecoveryConfig)) + } +} diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/PhpSetKnownTypes.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/PhpSetKnownTypes.scala new file mode 100644 index 000000000000..6eda8df54f9c --- /dev/null +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/PhpSetKnownTypes.scala @@ -0,0 +1,76 @@ +package io.joern.php2cpg.passes + +import better.files.File +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.passes.ForkJoinParallelCpgPass +import io.shiftleft.codepropertygraph.generated.nodes._ +import io.shiftleft.codepropertygraph.generated.PropertyNames +import io.shiftleft.codepropertygraph.generated.Operators +import io.shiftleft.semanticcpg.language._ +import io.shiftleft.semanticcpg.language.operatorextension.OpNodes +import org.slf4j.{Logger, LoggerFactory} +import overflowdb.BatchedUpdate + +import scala.io.Source +import java.io.{File => JFile} + +// Corresponds to a parsed row in the known functions file +case class KnownFunction( + name: String, + // return types. A function has at most one return value, but with one or more types. + rTypes: Seq[String] = Seq.empty, + // Index 0 = parameter at P0. A function has potentially multiple parameters, each with one or more types. + pTypes: Seq[Seq[String]] = Seq.empty +) + +/** Sets the return and parameter types for builtin functions with known function signatures. + * + * TODO: Need to handle variadic arguments. + */ +class PhpSetKnownTypesPass(cpg: Cpg, knownTypesFile: Option[JFile] = None) + extends ForkJoinParallelCpgPass[KnownFunction](cpg) { + + private val logger = LoggerFactory.getLogger(getClass) + + override def generateParts(): Array[KnownFunction] = { + /* parse file and return each row as a KnownFunction object */ + val source = knownTypesFile match { + case Some(file) => Source.fromFile(file) + case _ => Source.fromResource("known_function_signatures.txt") + } + val contents = source.getLines().filterNot(_.startsWith("//")) + val arr = contents.flatMap(line => createKnownFunctionFromLine(line)).toArray + source.close + arr + } + + override def runOnPart(builder: overflowdb.BatchedUpdate.DiffGraphBuilder, part: KnownFunction): Unit = { + /* calculate the result of this part - this is done as a concurrent task */ + val builtinMethod = cpg.method.fullNameExact(part.name).l + builtinMethod.foreach(mNode => { + setTypes(builder, mNode.methodReturn, part.rTypes) + (mNode.parameter.l zip part.pTypes).map((p, pTypes) => setTypes(builder, p, pTypes)) + }) + } + + def createKnownFunctionFromLine(line: String): Option[KnownFunction] = { + line.split(";").map(_.strip).toList match { + case Nil => None + case name :: Nil => Some(KnownFunction(name)) + case name :: rTypes :: Nil => Some(KnownFunction(name, scanReturnTypes(rTypes))) + case name :: rTypes :: pTypes => Some(KnownFunction(name, scanReturnTypes(rTypes), scanParamTypes(pTypes))) + } + } + + /* From comma separated list of types, create list of types. */ + def scanReturnTypes(rTypesRaw: String): Seq[String] = rTypesRaw.split(",").map(_.strip).toSeq + + /* From a semicolon separated list of parameters, each with a comma separated list of types, + * create a list of lists of types. */ + def scanParamTypes(pTypesRawArr: List[String]): Seq[Seq[String]] = + pTypesRawArr.map(paramTypeRaw => paramTypeRaw.split(",").map(_.strip).toSeq).toSeq + + protected def setTypes(builder: overflowdb.BatchedUpdate.DiffGraphBuilder, n: StoredNode, types: Seq[String]): Unit = + if (types.size == 1) builder.setNodeProperty(n, PropertyNames.TYPE_FULL_NAME, types.head) + else builder.setNodeProperty(n, PropertyNames.DYNAMIC_TYPE_HINT_FULL_NAME, types) +} diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/PhpTypeRecovery.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/PhpTypeRecovery.scala new file mode 100644 index 000000000000..9e6bd11cd869 --- /dev/null +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/PhpTypeRecovery.scala @@ -0,0 +1,285 @@ +package io.joern.php2cpg.passes + +import io.joern.x2cpg.Defines +import io.joern.x2cpg.passes.frontend._ +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes._ +import io.shiftleft.codepropertygraph.generated.{Operators, PropertyNames, DispatchTypes} +import io.shiftleft.semanticcpg.language._ +import io.shiftleft.semanticcpg.language.operatorextension.OpNodes +import io.shiftleft.semanticcpg.language.operatorextension.OpNodes.{Assignment, FieldAccess} +import overflowdb.BatchedUpdate.DiffGraphBuilder + +import scala.annotation.tailrec +import scala.collection.mutable + +class PhpTypeRecoveryPass(cpg: Cpg, config: XTypeRecoveryConfig = XTypeRecoveryConfig(iterations = 3)) + extends XTypeRecoveryPass[NamespaceBlock](cpg, config) { + + override protected def generateRecoveryPass(state: XTypeRecoveryState): XTypeRecovery[NamespaceBlock] = + new PhpTypeRecovery(cpg, state) +} + +private class PhpTypeRecovery(cpg: Cpg, state: XTypeRecoveryState) extends XTypeRecovery[NamespaceBlock](cpg, state) { + + override def compilationUnit: Iterator[NamespaceBlock] = cpg.file.namespaceBlock.iterator + + override def generateRecoveryForCompilationUnitTask( + unit: NamespaceBlock, + builder: DiffGraphBuilder + ): RecoverForXCompilationUnit[NamespaceBlock] = { + val newConfig = state.config.copy(enabledDummyTypes = state.isFinalIteration && state.config.enabledDummyTypes) + new RecoverForPhpFile(cpg, unit, builder, state.copy(config = newConfig)) + } +} + +private class RecoverForPhpFile(cpg: Cpg, cu: NamespaceBlock, builder: DiffGraphBuilder, state: XTypeRecoveryState) + extends RecoverForXCompilationUnit[NamespaceBlock](cpg, cu, builder, state) { + override protected def prepopulateSymbolTableEntry(x: AstNode): Unit = x match { + case x: Call => + x.methodFullName match { + case Operators.alloc => + case _ => symbolTable.append(x, (x.methodFullName +: x.dynamicTypeHintFullName).toSet) + } + case _ => super.prepopulateSymbolTableEntry(x) + } + + protected val methodTypesTable = mutable.Map[Method, mutable.HashSet[String]]() + + override def isConstructor(c: Call): Boolean = + isConstructor(c.name) && c.code.endsWith(")") + + override protected def isConstructor(name: String): Boolean = + !name.isBlank && name.charAt(0).isUpper + + override def assignments: Iterator[Assignment] = + cu.ast.isCall.nameExact(Operators.assignment).map(new OpNodes.Assignment(_)) + + protected def unresolvedDynamicCalls: Iterator[Call] = cu.ast.isCall + .filter(_.dispatchType == DispatchTypes.DYNAMIC_DISPATCH) + .filter(_.methodFullName.startsWith(Defines.UnresolvedNamespace)) + + /* Register post-processing pass that executes in the super class */ + override protected def postSetTypeInformation(): Unit = { + unresolvedDynamicCalls.foreach(visitUnresolvedDynamicCall) + } + override protected def visitIdentifierAssignedToConstructor(i: Identifier, c: Call): Set[String] = { + val constructorPaths = symbolTable.get(c).map(_.stripSuffix(s"${pathSep}")) + associateTypes(i, constructorPaths) + } + + override protected def visitIdentifierAssignedToCallRetVal(i: Identifier, c: Call): Set[String] = { + + if (symbolTable.contains(c)) { + val callReturns = methodReturnValues(symbolTable.get(c).toSeq) + associateTypes(i, callReturns) + } else if (c.argument.exists(_.argumentIndex == 0)) { + val callFullNames = (c.argument(0) match { + case i: Identifier if symbolTable.contains(LocalVar(i.name)) => symbolTable.get(LocalVar(i.name)) + case i: Identifier if symbolTable.contains(CallAlias(i.name)) => symbolTable.get(CallAlias(i.name)) + case _ => Set.empty + }).map(_.concat(s"$pathSep${c.name}")).toSeq + val callReturns = methodReturnValues(callFullNames) + associateTypes(i, callReturns) + } else { + /* CPG may already contain type info for this method (globally, outside of compilation) + * unit. If not, use dummy return value. + */ + val rs = methodReturnValues(Seq(c.methodFullName)) + if (rs.isEmpty) associateTypes(i, Set(s"${c.name}$pathSep${XTypeRecovery.DummyReturnType}")) + else associateTypes(i, rs) + } + } + + override protected def visitReturns(ret: Return): Unit = { + /* A bug in XTypeRecovery mishandles functions that have multiple return + * statements. We add a new "symbol table" (methodTypesTable) for method + * return types as they get collected across the multiple return statements + * for a single function. + */ + val m = ret.method + val existingTypes = mutable.HashSet.from( + (m.methodReturn.typeFullName +: m.methodReturn.dynamicTypeHintFullName) + .filterNot(_ == "ANY") + .filterNot(_.startsWith(Defines.UnresolvedNamespace)) + ) + existingTypes.addAll(methodTypesTable.getOrElse(m, mutable.HashSet())) + + @tailrec + def extractTypes(xs: List[CfgNode]): Set[String] = xs match { + case ::(head: Literal, Nil) if head.typeFullName != "ANY" => + Set(head.typeFullName) + case ::(head: Call, Nil) if head.name == Operators.fieldAccess => + val fieldAccess = new FieldAccess(head) + val (sym, ts) = getSymbolFromCall(fieldAccess) + val cpgTypes = cpg.typeDecl + .fullNameExact(ts.map(_.compUnitFullName).toSeq: _*) + .member + .nameExact(sym.identifier) + .flatMap(m => m.typeFullName +: m.dynamicTypeHintFullName) + .filterNot { x => x == "ANY" || x == "this" } + .toSet + if (cpgTypes.nonEmpty) cpgTypes + else symbolTable.get(sym) + case ::(head: Call, Nil) if symbolTable.contains(head) => + val callPaths = symbolTable.get(head) + val returnValues = methodReturnValues(callPaths.toSeq) + if (returnValues.isEmpty) + callPaths.map(c => s"$c$pathSep${XTypeRecovery.DummyReturnType}") + else + returnValues + case ::(head: Call, Nil) if head.argumentOut.headOption.exists(symbolTable.contains) => + symbolTable + .get(head.argumentOut.head) + .map(t => Seq(t, head.name, XTypeRecovery.DummyReturnType).mkString(pathSep.toString)) + case ::(identifier: Identifier, Nil) if symbolTable.contains(identifier) => + symbolTable.get(identifier) + case ::(head: Call, Nil) => + extractTypes(head.argument.l) + case _ => Set.empty + } + val returnTypes = extractTypes(ret.argumentOut.l) + existingTypes.addAll(returnTypes) + + /* Check whether method return is already known, and if so, remove dummy value */ + val saveTypes = existingTypes.filterNot(typeName => { + if (typeName.startsWith(Defines.UnresolvedNamespace)) + true + else if (typeName.endsWith(s"${XTypeRecovery.DummyReturnType}")) + typeName.split(pathSep).headOption match { + case Some(methodName) => { + val methodReturns = methodReturnValues(Seq(methodName)) + .filterNot(_.endsWith(s"${XTypeRecovery.DummyReturnType}")) + !methodReturns.isEmpty + } + case None => false + } + else + false + }) + methodTypesTable.update(m, saveTypes) + builder.setNodeProperty(ret.method.methodReturn, PropertyNames.DYNAMIC_TYPE_HINT_FULL_NAME, saveTypes) + } + + /* Necessary to change the filter regex from (this|self) to (\\$this|this), in order to account for $this PHP + * convention. + */ + override protected def associateTypes(symbol: LocalVar, fa: FieldAccess, types: Set[String]): Set[String] = { + fa.astChildren.filterNot(_.code.matches("(\\$this|this|self)")).headOption.collect { + case fi: FieldIdentifier => + getFieldParents(fa).foreach(t => persistMemberWithTypeDecl(t, fi.canonicalName, types)) + case i: Identifier if isField(i) => + getFieldParents(fa).foreach(t => persistMemberWithTypeDecl(t, i.name, types)) + } + symbolTable.append(symbol, types) + } + + /* Reference the PythonTypeRecovery implementation. The XTypeRecovery one seems incorrect. */ + override protected def getFieldParents(fa: FieldAccess): Set[String] = { + if (fa.method.name == "") { + Set(fa.method.fullName) + } else if (fa.method.typeDecl.nonEmpty) { + val parentTypes = fa.method.typeDecl.fullName.toSet + val baseTypeFullNames = cpg.typeDecl.fullNameExact(parentTypes.toSeq: _*).inheritsFromTypeFullName.toSet + (parentTypes ++ baseTypeFullNames).filterNot(_.matches("(?i)(any|object)")) + } else { + super.getFieldParents(fa) + } + } + + override protected def getTypesFromCall(c: Call): Set[String] = c.name match { + case Operators.fieldAccess => symbolTable.get(LocalVar(getFieldName(new FieldAccess(c)))) + case _ if symbolTable.contains(c) => symbolTable.get(c) + case Operators.indexAccess => getIndexAccessTypes(c) + case n => methodReturnValues(Seq(c.methodFullName)) + } + + override protected def indexAccessToCollectionVar(c: Call): Option[CollectionVar] = { + def callName(x: Call) = + if (x.name == Operators.fieldAccess) + getFieldName(new FieldAccess(x)) + else if (x.name == Operators.indexAccess) + indexAccessToCollectionVar(x) + .map(cv => s"${cv.identifier}[${cv.idx}]") + .getOrElse(XTypeRecovery.DummyIndexAccess) + else x.name + + val collectionVar = Option(c.argumentOut.l match { + case List(i: Identifier, idx: Literal) => CollectionVar(i.name, idx.code) + case List(i: Identifier, idx: Identifier) => CollectionVar(i.name, idx.code) + case List(c: Call, idx: Call) => CollectionVar(callName(c), callName(idx)) + case List(c: Call, idx: Literal) => CollectionVar(callName(c), idx.code) + case List(c: Call, idx: Identifier) => CollectionVar(callName(c), idx.code) + case xs => + logger.debug(s"Unhandled index access ${xs.map(x => (x.label, x.code)).mkString(",")} @ ${c.name}") + null + }) + + collectionVar + } + override protected def assignTypesToCall(x: Call, types: Set[String]): Set[String] = { + if (types.nonEmpty) { + getSymbolFromCall(x) match { + case (lhs, globalKeys) if globalKeys.nonEmpty => { + globalKeys.foreach { (fieldVar: FieldPath) => + persistMemberWithTypeDecl(fieldVar.compUnitFullName, fieldVar.identifier, types) + } + symbolTable.append(lhs, types) + } + case (lhs, _) => symbolTable.append(lhs, types) + } + } else Set.empty + } + + override protected def methodReturnValues(methodFullNames: Seq[String]): Set[String] = { + /* Look up methods in existing CPG */ + val rs = cpg.method + .fullNameExact(methodFullNames: _*) + .methodReturn + .flatMap(mr => mr.typeFullName +: mr.dynamicTypeHintFullName) + .filterNot(_ == "ANY") + .filterNot(_.endsWith("alloc.")) + .filterNot(_.endsWith(s"${XTypeRecovery.DummyReturnType}")) + .toSet + if (rs.isEmpty) + /* Return dummy return type if not found */ + methodFullNames + .flatMap(m => Set(m.concat(s"$pathSep${XTypeRecovery.DummyReturnType}"))) + .toSet + else rs + } + + /* If we know the type of the method's first parameter, use that to determine the method scope. + * + * TODO: Are there methods / instances where this doesn't work? Static methods? + * TODO: What if the first parameter could take multiple types? + * TODO: Test on nested dynamic calls, e.g. foo->bar->baz() + */ + protected def visitUnresolvedDynamicCall(c: Call): Unit = { + + if (c.argument.exists(_.argumentIndex == 0)) { + c.argument(0) match { + case p: Identifier => { + val ts = (p.typeFullName +: p.dynamicTypeHintFullName) + .filterNot(_ == "ANY") + .distinct + ts match { + case Seq() => + case Seq(t) => { + val newFullName = t + "->" + c.name + builder.setNodeProperty(c, PropertyNames.METHOD_FULL_NAME, newFullName) + builder.setNodeProperty( + c, + PropertyNames.TYPE_FULL_NAME, + s"${newFullName}$pathSep${XTypeRecovery.DummyReturnType}" + ) + builder.setNodeProperty(c, PropertyNames.DYNAMIC_TYPE_HINT_FULL_NAME, Seq.empty) + } + case _ => { /* TODO: case where multiple possible types are identified */ } + } + } + case _ => + } + } + } +} diff --git a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/passes/PhpTypeRecoveryPassTests.scala b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/passes/PhpTypeRecoveryPassTests.scala new file mode 100644 index 000000000000..68866c288f36 --- /dev/null +++ b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/passes/PhpTypeRecoveryPassTests.scala @@ -0,0 +1,425 @@ +package io.joern.php2cpg.passes + +import io.joern.php2cpg.testfixtures.PhpCode2CpgFixture +import io.shiftleft.semanticcpg.language._ + +class PhpTypeRecoveryPassTests extends PhpCode2CpgFixture() { + + /* TODO: Future tests to specify correct type recovery behaviors: + * - Method call inherited from a super class should be recovered + * - A type hint on a parameter should be sufficient to resolve method full names at calls + * - Parameter types on builtins with variadic parameters + */ + + "literals declared from built-in types" should { + lazy val cpg = code(""" + |attrA; + | } + |} + |function foo_instantiate_classA() { + | $a = new ClassA(); + | return $a; + |} + |""".stripMargin).cpg + + "recover type of class member assigned to literal" in { + val List(attrA) = cpg.typeDecl("ClassA").member.name("attrA").take(1).l + attrA.typeFullName shouldBe "int" + } + + "recover type of method that returns class member" in { + val List(barMethod) = cpg.typeDecl("ClassA").method.name("bar").take(1).l + barMethod.methodReturn.dynamicTypeHintFullName shouldBe Seq("int") + } + + "recover type of object that instantiates a class" in { + val List(aObject) = cpg.identifier("a").take(1).l + aObject.typeFullName shouldBe "ClassA" + } + + "recover type of function that returns object" in { + val List(fooMethod) = cpg.method("foo_instantiate_classA").take(1).l + fooMethod.methodReturn.dynamicTypeHintFullName shouldBe Seq("ClassA") + } + } + + "functions that return multiple objects" should { + lazy val cpg = code(""" + |foo; + | } + |} + |class ClassB { + | private $foo = 0; + | + | function baz() { + | return $this->foo; + | } + |} + | + |function foo_return_different_objects($type_param) { + | if ($type_param == 0) { + | $a = new ClassA(); + | } else { + | $a = new ClassB(); + | } + | return $a; + |} + |""".stripMargin).cpg + + "recover both possible types for local variable" in { + val List(aIdentifier) = cpg.identifier("a").take(1).l + aIdentifier.dynamicTypeHintFullName shouldBe Seq("ClassA", "ClassB") + } + + "recover both possible types for function return" in { + val List(fooMethod) = cpg.method("foo_return_different_objects").take(1).l + fooMethod.methodReturn.dynamicTypeHintFullName shouldBe Seq("ClassA", "ClassB") + } + } + + /* Joern's PHP front-end does not currently handle comments. This test is + * ignored, but should be revisited when comments are handled. + */ + "functions with docblock type information" should { + lazy val cpg = code(""" + |foo = 1; + | } + | + | function get_foo() { + | return $this->foo; + | } + |} + |""".stripMargin).cpg + + "identify class member type from setter" in { + val List(fooMember) = cpg.typeDecl("ClassA").member.name("foo").take(1).l + fooMember.typeFullName shouldBe "int" + } + + "identify getter return type from class member" in { + val List(getterMethod) = cpg.method("get_foo").take(1).l + getterMethod.methodReturn.dynamicTypeHintFullName shouldBe Seq("int") + } + } + + "functions with multiple return statements with two different types" should { + lazy val cpg = code(""" + |.indexAccess").take(1).l + indexAccessCall.typeFullName shouldBe "int" + } + } + + "function declarations with type hints" should { + lazy val cpg = code(""" + |foo(); + | } + |} + | + |function baz() { + | $a = new ClassA(); + | return $a->foo(); + |} + """.stripMargin).cpg + + "be properly resolved when called with $this" in { + val List(fooCall) = cpg.method("bar").ast.isCall.take(1).l + fooCall.methodFullName shouldBe "ClassA->foo" + } + + "be properly resolved when called through class with known type" in { + val List(fooCall) = cpg.method("baz").ast.isCall.filter(_.code == "$a->foo()").take(1).l + fooCall.methodFullName shouldBe "ClassA->foo" + } + + "propagate type information to calling method" in { + val List(bazMethod) = cpg.method("baz").take(1).l + bazMethod.methodReturn.dynamicTypeHintFullName shouldBe Seq("int") + } + + "propagate type information to calling method when called with $this" in { + val List(barMethod) = cpg.method("bar").take(1).l + barMethod.methodReturn.dynamicTypeHintFullName shouldBe Seq("int") + } + } + + "modules that import modules" should { + lazy val cpg = code( + """ + |foo(); + |} + |""".stripMargin, + "useA.php" + ) + + "recover the type of object instantiated from imported module class" in { + val List(aIdentifier) = cpg.identifier("a").take(1).l + aIdentifier.typeFullName shouldBe "ClassA" + } + + "recover method return value assigned from class method" in { + val List(barMethod) = cpg.method("bar").take(1).l + barMethod.methodReturn.dynamicTypeHintFullName shouldBe Seq("int") + } + } +} diff --git a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala index 2bbbb3714d17..15a9c7f3ee50 100644 --- a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala +++ b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala @@ -3,6 +3,7 @@ package io.joern.php2cpg.testfixtures import io.joern.dataflowengineoss.queryengine.EngineContext import io.joern.php2cpg.{Config, Php2Cpg} import io.joern.x2cpg.testfixtures.{Code2CpgFixture, DefaultTestCpg, LanguageFrontend} +import io.joern.x2cpg.passes.frontend.XTypeRecoveryConfig import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.semanticcpg.language.{ICallResolver, NoResolve} @@ -12,6 +13,7 @@ import io.joern.x2cpg.X2Cpg import io.shiftleft.semanticcpg.layers.LayerCreatorContext import io.joern.dataflowengineoss.layers.dataflows.OssDataFlowOptions import io.joern.dataflowengineoss.layers.dataflows.OssDataFlow +import io.joern.php2cpg.passes.PhpSetKnownTypesPass trait PhpFrontend extends LanguageFrontend { override val fileSuffix: String = ".php" @@ -31,8 +33,8 @@ class PhpTestCpg(runOssDataflow: Boolean) extends TestCpg with PhpFrontend { val options = new OssDataFlowOptions() new OssDataFlow(options).run(context) } + Php2Cpg.postProcessingPasses(this).foreach(_.createAndApply()) } - } class PhpCode2CpgFixture(runOssDataflow: Boolean = false)