Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[php2cpg] Php type recovery #3723

Merged
merged 35 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
6672fee
Create PHP Type Recovery pass
wunused Sep 18, 2023
7713eeb
Remove erroneous <init> types from symbolTable
wunused Sep 18, 2023
2db1e3c
Implement builtins type recovery lookup
wunused Sep 22, 2023
98b74af
Update PhpTypeRecovery pass to lookup in CPG
wunused Oct 6, 2023
cdb0a93
Remove overrides of methods that are meant to be private
wunused Oct 6, 2023
8a91dbc
Run scalafmt
wunused Oct 11, 2023
2d3ca8b
Create initial test file for PhpTypeRecoveryPass
wunused Oct 12, 2023
2e0eb6b
Add PhpTypeRecoveryPassTests
wunused Oct 13, 2023
7a364ed
Add PhpSetKnownTypes pass for builtin PHP functions
wunused Oct 15, 2023
4b5a999
Minor update to tests for SetKnownTypes pass
wunused Oct 16, 2023
0e7035c
Fix formatting with scalafmt
wunused Oct 16, 2023
ce032cd
WIP: Refactor PhpSetKnowTypes pass
wunused Oct 18, 2023
e9f73de
Merge branch 'master' into php-type-recovery
wunused Oct 18, 2023
f042555
WIP: Refactor PhpSetKnownTypes pass to be cleaner
wunused Oct 20, 2023
2d35ca5
Refactor PhpSetKnownTypesPass as a ForkJoinParallelCpgPass
wunused Oct 20, 2023
9891dfb
Ignore tests that depend on context sensitivity and comments
wunused Oct 20, 2023
ed01edf
Ignore more context sensitive tests
wunused Oct 24, 2023
dba5738
Remove multi-line debug statements
wunused Oct 24, 2023
afab2aa
Remove builtins table from PhpTypeRecoveryPass
wunused Oct 24, 2023
f1e3826
Fix how multiple return statements are handled
wunused Oct 24, 2023
e831118
Fix typo in PhpTypeRecoveryPassTests
wunused Nov 2, 2023
67e26b3
Filter out dummy return values when saving types
wunused Nov 2, 2023
b1cfe82
Refactor the dummy type removal to be less coarse
wunused Nov 2, 2023
96395b5
Resolve unknown namespaces with new type info
wunused Nov 7, 2023
572cf4c
Ignore array field type recovery tests for now
wunused Nov 7, 2023
f1d3ac3
Merge branch 'master' into php-type-recovery
wunused Nov 8, 2023
f4ec343
Run scalafmt
wunused Nov 8, 2023
f3dd991
Fix MatchError in resolving dynamic calls
wunused Nov 8, 2023
676acc4
Implement Php2Cpg.postProcessingPasses for deduplication
wunused Nov 13, 2023
b2dc5b2
Refactor PhpSetKnowTypes use idiomatic scala matches
wunused Nov 13, 2023
7c7d988
Replace hardcoded string with defined value
wunused Nov 13, 2023
8b29208
Merge branch 'master' into php-type-recovery
wunused Nov 14, 2023
335a14c
Remove excessive debug statements and format comments
wunused Nov 15, 2023
4061821
Run test:scalafmt
wunused Nov 15, 2023
7019a12
Address minor comments
wunused Nov 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions console/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dependsOn(
Projects.macros,
Projects.javasrc2cpg,
Projects.jssrc2cpg,
Projects.php2cpg,
Projects.pysrc2cpg,
Projects.rubysrc2cpg,
Projects.x2cpg % "compile->compile;test->test"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,31 @@
package io.joern.console.cpgcreation

import io.joern.console.FrontendConfig
import io.joern.php2cpg.{Config, Frontend, Php2Cpg}
import io.joern.x2cpg.X2Cpg
import io.joern.x2cpg.passes.frontend.XTypeRecoveryConfig
import io.shiftleft.codepropertygraph.Cpg

import java.nio.file.Path
import scala.util.Try

case class PhpCpgGenerator(config: FrontendConfig, rootPath: Path) extends CpgGenerator {
private lazy val command: Path = if (isWin) rootPath.resolve("php2cpg.bat") else rootPath.resolve("php2cpg")
private lazy val command: Path = if (isWin) rootPath.resolve("php2cpg.bat") else rootPath.resolve("php2cpg")
private var phpConfig: Option[Config] = None

override def generate(inputPath: String, outputPath: String): Try[String] = {
val arguments = List(inputPath) ++ Seq("-o", outputPath) ++ config.cmdLineParams
phpConfig = X2Cpg.parseCommandLine(arguments.toArray, Frontend.cmdLineParser, Config())
runShellCommand(command.toString, arguments).map(_ => outputPath)
}

override def isAvailable: Boolean =
command.toFile.exists

override def isJvmBased = true

override def applyPostProcessingPasses(cpg: Cpg): Cpg = {
Php2Cpg.postProcessingPasses(cpg, phpConfig).foreach(_.createAndApply())
cpg
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// function name; r1, r2; p1_t1, p1_t2; p2_t1; ...
add_post_meta; int, bool; int; string; mixed; bool
apply_filters; mixed; string; mixed; mixed
array_map; array; callable, null; array; array; array
array_merge; array; array; array; array
array_walk_recursive; bool; array, object; callable; mixed
base64_decode; string; string; bool
base64_encode; string; string
count; int; array, countable; int
current; mixed; array, object
do_action; ; string; mixed;
echo; void; string
empty; bool; mixed
explode; array; string; string; int
floatval; float; mixed
in_array; bool; mixed; array; bool
intval; int; mixed
is_array; bool; mixed
is_bool; bool; mixed
is_double; bool; mixed
is_float; bool; mixed
is_int; bool; mixed
is_integer; bool; mixed
is_iterable; bool; mixed
is_long; bool; mixed
is_null; bool; mixed
is_numeric; bool; mixed
is_object; bool; mixed
is_real; bool; mixed
is_resource; bool; mixed
is_scalar; bool; mixed
is_string; bool; mixed
isset; bool; mixed; array; bool
list; array; mixed; mixed; mixed; mixed
maybe_unserialize; mixed; string
number_format; string; float; int; string, null; string, null
preg_match; int, bool; string; string; array; int; int
preg_match_all; int, bool; string; string; array; int; int
preg_replace; string, array, null; string, array; string, array; string, array; int; int
printf; int; string; mixed; mixed; mixed; mixed
rawurldecode; string; string
rtrim; string; string; string
selected; string; mixed; mixed; bool
serialize; string; mixed
sort; bool; array; int
sprintf; string; string; mixed
strip_tags; string; string; array, string, null
strpos; int, bool; string; string; int
strtolower; string; string
strtotime; int, bool; string; int, null
substr; string; string; int; int, null
trim; string; string; string
unserialize; mixed; string; array
urldecode; string; string
var_dump; ; mixed; mixed
wp_json_encode; string,bool; mixed; int; int
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
package io.joern.php2cpg

import io.joern.x2cpg.{X2CpgConfig, X2CpgMain}
import io.joern.x2cpg.passes.frontend.{TypeRecoveryParserConfig, XTypeRecovery}
import io.joern.php2cpg.Frontend._
import scopt.OParser

/** Command line configuration parameters
*/
final case class Config(phpIni: Option[String] = None, phpParserBin: Option[String] = None)
extends X2CpgConfig[Config] {
extends X2CpgConfig[Config]
with TypeRecoveryParserConfig[Config] {
def withPhpIni(phpIni: String): Config = {
copy(phpIni = Some(phpIni)).withInheritedFields(this)
}
Expand All @@ -17,7 +19,7 @@ final case class Config(phpIni: Option[String] = None, phpParserBin: Option[Stri
}
}

private object Frontend {
object Frontend {

implicit val defaultConfig: Config = Config()

Expand All @@ -31,7 +33,8 @@ private object Frontend {
.text("php.ini path used by php-parser. Defaults to php.ini shipped with Joern."),
opt[String]("php-parser-bin")
.action((x, c) => c.withPhpParserBin(x))
.text("path to php-parser.phar binary. Defaults to php-parser shipped with Joern.")
.text("path to php-parser.phar binary. Defaults to php-parser shipped with Joern."),
XTypeRecovery.parserOptions
)
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
package io.joern.php2cpg

import io.joern.php2cpg.parser.PhpParser
import io.joern.php2cpg.passes.{AnyTypePass, AstCreationPass, AstParentInfoPass, ClosureRefPass, LocalCreationPass}
import io.joern.php2cpg.passes.{
AnyTypePass,
AstCreationPass,
AstParentInfoPass,
ClosureRefPass,
LocalCreationPass,
PhpSetKnownTypesPass,
PhpTypeRecoveryPass
}
import io.joern.x2cpg.X2Cpg.withNewEmptyCpg
import io.joern.x2cpg.X2CpgFrontend
import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass}
import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass, XTypeRecoveryConfig}
import io.joern.x2cpg.utils.ExternalCommand
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.passes.CpgPassBase
import io.shiftleft.codepropertygraph.generated.Languages
import org.slf4j.LoggerFactory

Expand Down Expand Up @@ -68,3 +77,13 @@ class Php2Cpg extends X2CpgFrontend[Config] {

}
}

object Php2Cpg {

def postProcessingPasses(cpg: Cpg, config: Option[Config] = None): List[CpgPassBase] = {
val typeRecoveryConfig = config
.map(c => XTypeRecoveryConfig(c.typePropagationIterations, !c.disableDummyTypes))
.getOrElse(XTypeRecoveryConfig(iterations = 3))
List(new PhpSetKnownTypesPass(cpg), new PhpTypeRecoveryPass(cpg, typeRecoveryConfig))
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package io.joern.php2cpg.passes

import better.files.File
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.passes.ForkJoinParallelCpgPass
import io.shiftleft.codepropertygraph.generated.nodes._
import io.shiftleft.codepropertygraph.generated.PropertyNames
import io.shiftleft.codepropertygraph.generated.Operators
import io.shiftleft.semanticcpg.language._
import io.shiftleft.semanticcpg.language.operatorextension.OpNodes
import org.slf4j.{Logger, LoggerFactory}
import overflowdb.BatchedUpdate

import scala.io.Source
import java.io.{File => JFile}

// Corresponds to a parsed row in the known functions file
case class KnownFunction(
name: String,
// return types. A function has at most one return value, but with one or more types.
rTypes: Seq[String] = Seq.empty,
// Index 0 = parameter at P0. A function has potentially multiple parameters, each with one or more types.
pTypes: Seq[Seq[String]] = Seq.empty
)

/** Sets the return and parameter types for builtin functions with known function signatures.
*
* TODO: Need to handle variadic arguments.
*/
class PhpSetKnownTypesPass(cpg: Cpg, knownTypesFile: Option[JFile] = None)
extends ForkJoinParallelCpgPass[KnownFunction](cpg) {

private val logger = LoggerFactory.getLogger(getClass)

override def generateParts(): Array[KnownFunction] = {
/* parse file and return each row as a KnownFunction object */
val source = knownTypesFile match {
case Some(file) => Source.fromFile(file)
case _ => Source.fromResource("known_function_signatures.txt")
}
val contents = source.getLines().filterNot(_.startsWith("//"))
val arr = contents.flatMap(line => createKnownFunctionFromLine(line)).toArray
source.close
arr
}

override def runOnPart(builder: overflowdb.BatchedUpdate.DiffGraphBuilder, part: KnownFunction): Unit = {
/* calculate the result of this part - this is done as a concurrent task */
val builtinMethod = cpg.method.fullNameExact(part.name).l
builtinMethod.foreach(mNode => {
setTypes(builder, mNode.methodReturn, part.rTypes)
(mNode.parameter.l zip part.pTypes).map((p, pTypes) => setTypes(builder, p, pTypes))
})
}

def createKnownFunctionFromLine(line: String): Option[KnownFunction] = {
line.split(";").map(_.strip).toList match {
case Nil => None
case name :: Nil => Some(KnownFunction(name))
case name :: rTypes :: Nil => Some(KnownFunction(name, scanReturnTypes(rTypes)))
case name :: rTypes :: pTypes => Some(KnownFunction(name, scanReturnTypes(rTypes), scanParamTypes(pTypes)))
}
}

/* From comma separated list of types, create list of types. */
def scanReturnTypes(rTypesRaw: String): Seq[String] = rTypesRaw.split(",").map(_.strip).toSeq

/* From a semicolon separated list of parameters, each with a comma separated list of types,
* create a list of lists of types. */
def scanParamTypes(pTypesRawArr: List[String]): Seq[Seq[String]] =
pTypesRawArr.map(paramTypeRaw => paramTypeRaw.split(",").map(_.strip).toSeq).toSeq

protected def setTypes(builder: overflowdb.BatchedUpdate.DiffGraphBuilder, n: StoredNode, types: Seq[String]): Unit =
if (types.size == 1) builder.setNodeProperty(n, PropertyNames.TYPE_FULL_NAME, types.head)
else builder.setNodeProperty(n, PropertyNames.DYNAMIC_TYPE_HINT_FULL_NAME, types)
}
Loading
Loading