Skip to content

Commit

Permalink
[php2cpg] Php type recovery (#3723)
Browse files Browse the repository at this point in the history
* Create PHP Type Recovery pass

* Remove erroneous <init> types from symbolTable

* Implement builtins type recovery lookup

* Update PhpTypeRecovery pass to lookup in CPG

* Remove overrides of methods that are meant to be private

* Run scalafmt

* Create initial test file for PhpTypeRecoveryPass

* Add PhpTypeRecoveryPassTests

* Add PhpSetKnownTypes pass for builtin PHP functions

Pass will set the types for builtin functions with known function
signatures. This pass will run before the PhpTypeRecovery pass.

Currently, the pass does not handle variadic parameters.

* Minor update to tests for SetKnownTypes pass

* Fix formatting with scalafmt

* WIP: Refactor PhpSetKnowTypes pass

* WIP: Refactor PhpSetKnownTypes pass to be cleaner

Currently the method return types are set correctly, but need to still
update the parameter types.

* Refactor PhpSetKnownTypesPass as a ForkJoinParallelCpgPass

* Ignore tests that depend on context sensitivity and comments

* Ignore more context sensitive tests

* Remove multi-line debug statements

* Remove builtins table from PhpTypeRecoveryPass

* Fix how multiple return statements are handled

This bug comes from the XTypeRecovery pass, but is fixed in
PhpTypeRecovery by overriding the visitReturns method. If this approach
(using another "symbol table" for methods) is sufficient, it should also
be fixed abstract parent class.

* Fix typo in PhpTypeRecoveryPassTests

* Filter out dummy return values when saving types

* Refactor the dummy type removal to be less coarse

* Resolve unknown namespaces with new type info

* Ignore array field type recovery tests for now

* Run scalafmt

* Fix MatchError in resolving dynamic calls

* Implement Php2Cpg.postProcessingPasses for deduplication

This defines the list of default PHP postProcessingPasses in one
location, so that they can all be applied without code duplication
between the different places where passes are applied, like
PhpCpgGenerator and PhpCode2CpgFixture.

Additionally, the XTypeRecoveryConfig options are now exposed to the
frontend command line arguments for PHP analysis as a result of this
refactor.

* Refactor PhpSetKnowTypes use idiomatic scala matches

* Replace hardcoded string with defined value

* Remove excessive debug statements and format comments

* Run test:scalafmt

* Address minor comments

- Changed string equality comparisons from equals() to ==
- Changed "<module>" to "<global>" as top-level method namespace
- Removed errant debug statement
  • Loading branch information
wunused authored Nov 15, 2023
1 parent cb2a6ef commit 66089b8
Show file tree
Hide file tree
Showing 9 changed files with 885 additions and 7 deletions.
1 change: 1 addition & 0 deletions console/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dependsOn(
Projects.macros,
Projects.javasrc2cpg,
Projects.jssrc2cpg,
Projects.php2cpg,
Projects.pysrc2cpg,
Projects.rubysrc2cpg,
Projects.x2cpg % "compile->compile;test->test"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,31 @@
package io.joern.console.cpgcreation

import io.joern.console.FrontendConfig
import io.joern.php2cpg.{Config, Frontend, Php2Cpg}
import io.joern.x2cpg.X2Cpg
import io.joern.x2cpg.passes.frontend.XTypeRecoveryConfig
import io.shiftleft.codepropertygraph.Cpg

import java.nio.file.Path
import scala.util.Try

case class PhpCpgGenerator(config: FrontendConfig, rootPath: Path) extends CpgGenerator {
private lazy val command: Path = if (isWin) rootPath.resolve("php2cpg.bat") else rootPath.resolve("php2cpg")
private lazy val command: Path = if (isWin) rootPath.resolve("php2cpg.bat") else rootPath.resolve("php2cpg")
private var phpConfig: Option[Config] = None

override def generate(inputPath: String, outputPath: String): Try[String] = {
val arguments = List(inputPath) ++ Seq("-o", outputPath) ++ config.cmdLineParams
phpConfig = X2Cpg.parseCommandLine(arguments.toArray, Frontend.cmdLineParser, Config())
runShellCommand(command.toString, arguments).map(_ => outputPath)
}

override def isAvailable: Boolean =
command.toFile.exists

override def isJvmBased = true

override def applyPostProcessingPasses(cpg: Cpg): Cpg = {
Php2Cpg.postProcessingPasses(cpg, phpConfig).foreach(_.createAndApply())
cpg
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// function name; r1, r2; p1_t1, p1_t2; p2_t1; ...
add_post_meta; int, bool; int; string; mixed; bool
apply_filters; mixed; string; mixed; mixed
array_map; array; callable, null; array; array; array
array_merge; array; array; array; array
array_walk_recursive; bool; array, object; callable; mixed
base64_decode; string; string; bool
base64_encode; string; string
count; int; array, countable; int
current; mixed; array, object
do_action; ; string; mixed;
echo; void; string
empty; bool; mixed
explode; array; string; string; int
floatval; float; mixed
in_array; bool; mixed; array; bool
intval; int; mixed
is_array; bool; mixed
is_bool; bool; mixed
is_double; bool; mixed
is_float; bool; mixed
is_int; bool; mixed
is_integer; bool; mixed
is_iterable; bool; mixed
is_long; bool; mixed
is_null; bool; mixed
is_numeric; bool; mixed
is_object; bool; mixed
is_real; bool; mixed
is_resource; bool; mixed
is_scalar; bool; mixed
is_string; bool; mixed
isset; bool; mixed; array; bool
list; array; mixed; mixed; mixed; mixed
maybe_unserialize; mixed; string
number_format; string; float; int; string, null; string, null
preg_match; int, bool; string; string; array; int; int
preg_match_all; int, bool; string; string; array; int; int
preg_replace; string, array, null; string, array; string, array; string, array; int; int
printf; int; string; mixed; mixed; mixed; mixed
rawurldecode; string; string
rtrim; string; string; string
selected; string; mixed; mixed; bool
serialize; string; mixed
sort; bool; array; int
sprintf; string; string; mixed
strip_tags; string; string; array, string, null
strpos; int, bool; string; string; int
strtolower; string; string
strtotime; int, bool; string; int, null
substr; string; string; int; int, null
trim; string; string; string
unserialize; mixed; string; array
urldecode; string; string
var_dump; ; mixed; mixed
wp_json_encode; string,bool; mixed; int; int
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
package io.joern.php2cpg

import io.joern.x2cpg.{X2CpgConfig, X2CpgMain}
import io.joern.x2cpg.passes.frontend.{TypeRecoveryParserConfig, XTypeRecovery}
import io.joern.php2cpg.Frontend._
import scopt.OParser

/** Command line configuration parameters
*/
final case class Config(phpIni: Option[String] = None, phpParserBin: Option[String] = None)
extends X2CpgConfig[Config] {
extends X2CpgConfig[Config]
with TypeRecoveryParserConfig[Config] {
def withPhpIni(phpIni: String): Config = {
copy(phpIni = Some(phpIni)).withInheritedFields(this)
}
Expand All @@ -17,7 +19,7 @@ final case class Config(phpIni: Option[String] = None, phpParserBin: Option[Stri
}
}

private object Frontend {
object Frontend {

implicit val defaultConfig: Config = Config()

Expand All @@ -31,7 +33,8 @@ private object Frontend {
.text("php.ini path used by php-parser. Defaults to php.ini shipped with Joern."),
opt[String]("php-parser-bin")
.action((x, c) => c.withPhpParserBin(x))
.text("path to php-parser.phar binary. Defaults to php-parser shipped with Joern.")
.text("path to php-parser.phar binary. Defaults to php-parser shipped with Joern."),
XTypeRecovery.parserOptions
)
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
package io.joern.php2cpg

import io.joern.php2cpg.parser.PhpParser
import io.joern.php2cpg.passes.{AnyTypePass, AstCreationPass, AstParentInfoPass, ClosureRefPass, LocalCreationPass}
import io.joern.php2cpg.passes.{
AnyTypePass,
AstCreationPass,
AstParentInfoPass,
ClosureRefPass,
LocalCreationPass,
PhpSetKnownTypesPass,
PhpTypeRecoveryPass
}
import io.joern.x2cpg.X2Cpg.withNewEmptyCpg
import io.joern.x2cpg.X2CpgFrontend
import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass}
import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass, XTypeRecoveryConfig}
import io.joern.x2cpg.utils.ExternalCommand
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.passes.CpgPassBase
import io.shiftleft.codepropertygraph.generated.Languages
import org.slf4j.LoggerFactory

Expand Down Expand Up @@ -68,3 +77,13 @@ class Php2Cpg extends X2CpgFrontend[Config] {

}
}

object Php2Cpg {

def postProcessingPasses(cpg: Cpg, config: Option[Config] = None): List[CpgPassBase] = {
val typeRecoveryConfig = config
.map(c => XTypeRecoveryConfig(c.typePropagationIterations, !c.disableDummyTypes))
.getOrElse(XTypeRecoveryConfig(iterations = 3))
List(new PhpSetKnownTypesPass(cpg), new PhpTypeRecoveryPass(cpg, typeRecoveryConfig))
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package io.joern.php2cpg.passes

import better.files.File
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.passes.ForkJoinParallelCpgPass
import io.shiftleft.codepropertygraph.generated.nodes._
import io.shiftleft.codepropertygraph.generated.PropertyNames
import io.shiftleft.codepropertygraph.generated.Operators
import io.shiftleft.semanticcpg.language._
import io.shiftleft.semanticcpg.language.operatorextension.OpNodes
import org.slf4j.{Logger, LoggerFactory}
import overflowdb.BatchedUpdate

import scala.io.Source
import java.io.{File => JFile}

// Corresponds to a parsed row in the known functions file
case class KnownFunction(
name: String,
// return types. A function has at most one return value, but with one or more types.
rTypes: Seq[String] = Seq.empty,
// Index 0 = parameter at P0. A function has potentially multiple parameters, each with one or more types.
pTypes: Seq[Seq[String]] = Seq.empty
)

/** Sets the return and parameter types for builtin functions with known function signatures.
*
* TODO: Need to handle variadic arguments.
*/
class PhpSetKnownTypesPass(cpg: Cpg, knownTypesFile: Option[JFile] = None)
extends ForkJoinParallelCpgPass[KnownFunction](cpg) {

private val logger = LoggerFactory.getLogger(getClass)

override def generateParts(): Array[KnownFunction] = {
/* parse file and return each row as a KnownFunction object */
val source = knownTypesFile match {
case Some(file) => Source.fromFile(file)
case _ => Source.fromResource("known_function_signatures.txt")
}
val contents = source.getLines().filterNot(_.startsWith("//"))
val arr = contents.flatMap(line => createKnownFunctionFromLine(line)).toArray
source.close
arr
}

override def runOnPart(builder: overflowdb.BatchedUpdate.DiffGraphBuilder, part: KnownFunction): Unit = {
/* calculate the result of this part - this is done as a concurrent task */
val builtinMethod = cpg.method.fullNameExact(part.name).l
builtinMethod.foreach(mNode => {
setTypes(builder, mNode.methodReturn, part.rTypes)
(mNode.parameter.l zip part.pTypes).map((p, pTypes) => setTypes(builder, p, pTypes))
})
}

def createKnownFunctionFromLine(line: String): Option[KnownFunction] = {
line.split(";").map(_.strip).toList match {
case Nil => None
case name :: Nil => Some(KnownFunction(name))
case name :: rTypes :: Nil => Some(KnownFunction(name, scanReturnTypes(rTypes)))
case name :: rTypes :: pTypes => Some(KnownFunction(name, scanReturnTypes(rTypes), scanParamTypes(pTypes)))
}
}

/* From comma separated list of types, create list of types. */
def scanReturnTypes(rTypesRaw: String): Seq[String] = rTypesRaw.split(",").map(_.strip).toSeq

/* From a semicolon separated list of parameters, each with a comma separated list of types,
* create a list of lists of types. */
def scanParamTypes(pTypesRawArr: List[String]): Seq[Seq[String]] =
pTypesRawArr.map(paramTypeRaw => paramTypeRaw.split(",").map(_.strip).toSeq).toSeq

protected def setTypes(builder: overflowdb.BatchedUpdate.DiffGraphBuilder, n: StoredNode, types: Seq[String]): Unit =
if (types.size == 1) builder.setNodeProperty(n, PropertyNames.TYPE_FULL_NAME, types.head)
else builder.setNodeProperty(n, PropertyNames.DYNAMIC_TYPE_HINT_FULL_NAME, types)
}
Loading

0 comments on commit 66089b8

Please sign in to comment.