Skip to content

Commit

Permalink
[pysrc2cpg] Module Reference Query Layer (#3824)
Browse files Browse the repository at this point in the history
This query package, `modulevariable`, adds the necessary complexity to reason and handle the changes introduced by #3750

See the README.md for additional context.

One can now navigate the module members, the block references, the locals and identifiers referencing these variables, and go navigate back concisely.
  • Loading branch information
DavidBakerEffendi authored Nov 14, 2023
1 parent ff5e780 commit dea6ab2
Show file tree
Hide file tree
Showing 11 changed files with 290 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ package io.joern.pysrc2cpg.passes
import io.joern.pysrc2cpg.PySrc2CpgFixture
import io.joern.x2cpg.passes.frontend.ImportsPass.*
import io.joern.x2cpg.passes.frontend.{ImportsPass, XTypeHintCallLinker}
import io.shiftleft.codepropertygraph.generated.Operators
import io.shiftleft.codepropertygraph.generated.nodes.{Call, Identifier, Member}
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.operatorextension.OpNodes.FieldAccess

import java.io.File
import scala.collection.immutable.Seq
Expand Down Expand Up @@ -1256,4 +1259,89 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) {
}
}

"Resolved module variable references" should {
val cpg = code(
"""from fastapi import FastAPI
|import itemsrouter
|import usersrouter
|
|app = FastAPI()
|
|app.include_router(
| itemsrouter.router,
| prefix="/items",
| tags=["items"],
| responses={404: {"description": "Not found"}},
|)
|app.include_router(
| usersrouter.normal_router,
| usersrouter.admin_router,
| prefix="/users",
| tags=["users"],
| responses={404: {"description": "Not found"}},
|)
|""".stripMargin,
"main.py"
)
.moreCode(
"""
|from fastapi import APIRouter
|
|router = APIRouter()
|fake_items_db = {"gun": {"name": "Portal Gun"}}
|
|@router.get("/")
|async def read_items():
| return fake_items_db
|""".stripMargin,
"itemsrouter.py"
)
.moreCode(
"""
|from fastapi import APIRouter
|
|normal_router = APIRouter()
|admin_router = APIRouter()
|fake_users_db = {"plumbus": {"name": "Plumbus"}}
|fake_admins_db = {"flumbus": {"name": "Flumbus"}}
|
|@normal_router.get("/")
|async def read_users():
| return fake_users_db
|
|@admin_router.get("/admin")
| return fake_admins_db
|
|""".stripMargin,
"usersrouter.py"
)

"enable traversing from a module variable, to its references, back to other module variable references" in {
val appIncludeRouterCalls =
cpg.moduleVariables
.where(_.typeFullName(".*FastAPI.*"))
.invokingCalls
.nameExact("include_router")
.l
val includedRouters = appIncludeRouterCalls.argument.argumentIndexGte(1).l
val definitionsOfRouters = includedRouters.isCall.fieldAccess.referencedMember.moduleVariables.definitions.l
val List(adminRouter, normalRouter, itemsRouter) =
definitionsOfRouters.map(x => (x.code, x.method.fullName)).sortBy(_._1).l: @unchecked

adminRouter shouldBe ("admin_router = APIRouter()", "usersrouter.py:<module>")
normalRouter shouldBe ("normal_router = APIRouter()", "usersrouter.py:<module>")
itemsRouter shouldBe ("router = APIRouter()", "itemsrouter.py:<module>")
}

"enable traversing from a module variable, to its referencing local" in {
val appIncludeRouterCalls =
cpg.moduleVariables
.where(_.typeFullName(".*FastAPI.*"))
.l
val appLocal = appIncludeRouterCalls.moduleVariableRefs.referencingLocals.head
appLocal.name shouldBe "app"
appLocal.method.fullName.head shouldBe "main.py:<module>"
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package io.shiftleft.semanticcpg.language.modulevariable

import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.Member
import io.shiftleft.semanticcpg.language.modulevariable.NodeTypeStarters
import io.shiftleft.semanticcpg.language.modulevariable.nodemethods.{
ModuleVariableAsMemberMethods,
ModuleVariableReferenceMethods
}

import scala.language.implicitConversions

trait Implicits {

implicit def toNodeTypeStartersModuleVariableExtension(cpg: Cpg): NodeTypeStarters = new NodeTypeStarters(cpg)

implicit def toModuleVariableAsMemberExt(node: Member): ModuleVariableAsMemberMethods =
new ModuleVariableAsMemberMethods(node)

implicit def toModuleVariableAsMemberTrav(steps: Iterator[Member]): ModuleVariableAsMemberTraversal =
new ModuleVariableAsMemberTraversal(steps)

implicit def toModuleVariablesExt(node: OpNodes.ModuleVariableReference): ModuleVariableReferenceMethods =
new ModuleVariableReferenceMethods(node)

implicit def toModuleVariablesTrav(steps: IterableOnce[OpNodes.ModuleVariable]): ModuleVariableTraversal =
new ModuleVariableTraversal(steps.iterator)

implicit def toModuleReferenceTrav(steps: IterableOnce[OpNodes.ModuleVariableReference]): ModuleReferenceTraversal =
new ModuleReferenceTraversal(steps.iterator)

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package io.shiftleft.semanticcpg.language.modulevariable

import io.shiftleft.codepropertygraph.generated.nodes.Local
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.operatorextension.OpNodes.Assignment
import overflowdb.traversal.help.Doc

class ModuleReferenceTraversal(traversal: Iterator[OpNodes.ModuleVariableReference]) extends AnyVal {

@Doc(info = "The module variable that this references to")
def moduleVariable: Iterator[OpNodes.ModuleVariable] =
traversal.flatMap(_.moduleVariable)

@Doc(info = "All assignments where the module reference in this traversal are the target")
def definitions: Iterator[Assignment] = {
val varRefs = traversal.toList
val moduleVarNames = varRefs.iterator.moduleVariable.name.distinct.toSeq
varRefs.iterator.inAssignment
.where(_.target.isIdentifier.nameExact(moduleVarNames*))
.dedup
}

@Doc(info = "All local variables concerning the module variable")
def referencingLocals: Iterator[Local] = {
traversal.inAssignment.target.isIdentifier.refsTo.collectAll[Local]
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package io.shiftleft.semanticcpg.language.modulevariable

import io.shiftleft.codepropertygraph.generated.nodes.{Call, Member}
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.modulevariable.OpNodes.ModuleVariable
import overflowdb.traversal.help.Doc

class ModuleVariableAsMemberTraversal(traversal: Iterator[Member]) extends AnyVal {

@Doc(info = "Members representing module variables")
def moduleVariables: Iterator[ModuleVariable] = {
val sourceMembers = traversal.toList
sourceMembers
.where(
_.ref.parentBlock.inAssignment.target.isIdentifier
.where(_.and(_.nameExact(sourceMembers.name.toSeq*), _.method.nameExact("<module>")))
)
.map(new ModuleVariable(_))
.iterator
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package io.shiftleft.semanticcpg.language.modulevariable

import io.shiftleft.codepropertygraph.generated.nodes.{Call, Identifier}
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.operatorextension.OpNodes.Assignment
import overflowdb.traversal.help.Doc

class ModuleVariableTraversal(traversal: Iterator[OpNodes.ModuleVariable]) extends AnyVal {

@Doc(info = "All module references where the module variables in this traversal are the target")
def moduleVariableRefs: Iterator[OpNodes.ModuleVariableReference] =
traversal.ref.parentBlock.map(new OpNodes.ModuleVariableReference(_))

@Doc(info = "All assignments where the module variables in this traversal are the target")
def definitions: Iterator[Assignment] = {
val sourceMembers = traversal.toList
sourceMembers.iterator.moduleVariableRefs.inAssignment
.where(_.target.isIdentifier.nameExact(sourceMembers.name.toSeq*))
.dedup
}

@Doc(info = "All identifiers related to the module variables in this traversal")
def referencingIdentifiers: Iterator[Identifier] = {
definitions.target.isIdentifier.refsTo._refIn.collectAll[Identifier]
}

@Doc(info = "Calls this module variable invokes")
def invokingCalls: Iterator[Call] = {
referencingIdentifiers.argumentIndexLte(1).inCall.dedup.iterator
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package io.shiftleft.semanticcpg.language.modulevariable

import io.shiftleft.codepropertygraph.Cpg
import overflowdb.traversal.help.{Doc, TraversalSource}
import io.shiftleft.semanticcpg.language.*

@TraversalSource
class NodeTypeStarters(cpg: Cpg) {

@Doc(info = "All module-level variables, e.g., variables declared at the root of a file in Python or JavaScript.")
def moduleVariables: Iterator[OpNodes.ModuleVariable] =
cpg.member.moduleVariables

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package io.shiftleft.semanticcpg.language.modulevariable

import io.shiftleft.codepropertygraph.generated.nodes.{Block, Member}

object OpNodes {

/** Represents a module-level global variable. This kind of node behaves like both a local variable and a field access
* and is common in languages such as Python/JavaScript.
*/
class ModuleVariable(node: Member) extends Member(node.graph(), node.id)

/** Represents a module variable access block, which represents a lowered view of operations taken on both the local
* variable and member-level access.
*
* @see
* <a href="https://github.com/joernio/joern/pull/3750/">[pysrc2cpg] Model Field-like Behaviour of Module
* Variables</a>
*/
class ModuleVariableReference(node: Block) extends Block(node.graph(), node.id)

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Module Variables

This package adds the necessary complexity to reason and handle the changes introduced by:
[\[pysrc2cpg\] Model Field-like Behaviour of Module Variables](https://github.com/joernio/joern/pull/3750/)

There is likely a much better schema-level way of addressing this hybrid behaviour of module-level variables, but this
is the least intrusive and does not require some larger amount of planning. Thus, we leave the following note:

TODO: Replace with a far-reaching, but simpler solution across frontends.
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package io.shiftleft.semanticcpg.language.modulevariable.nodemethods

import io.shiftleft.codepropertygraph.generated.nodes.Member
import io.shiftleft.semanticcpg.language.*
import overflowdb.traversal.help.Doc

class ModuleVariableAsMemberMethods(node: Member) extends AnyVal {

@Doc(info = "If this member refers to a module variable")
def isModuleVariable: Boolean = {
Iterator(node)
.where(
_.ref.parentBlock.inAssignment.target.isIdentifier
.where(_.and(_.nameExact(node.name), _.method.nameExact("<module>")))
)
.nonEmpty
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package io.shiftleft.semanticcpg.language.modulevariable.nodemethods

import io.shiftleft.semanticcpg.language.modulevariable.OpNodes
import io.shiftleft.semanticcpg.language.*
import overflowdb.traversal.help.Doc

class ModuleVariableReferenceMethods(node: OpNodes.ModuleVariableReference) extends AnyVal {

@Doc(info = "The module variable being referenced")
def moduleVariable: Iterator[OpNodes.ModuleVariable] =
node.astChildren.headOption.isCall.fieldAccess.referencedMember.map(new OpNodes.ModuleVariable(_))

}
Original file line number Diff line number Diff line change
@@ -1,32 +1,36 @@
package io.shiftleft.semanticcpg

import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes._
import io.shiftleft.codepropertygraph.generated.nodes.*
import io.shiftleft.codepropertygraph.generated.traversal.NodeTraversalImplicits
import io.shiftleft.semanticcpg.language.bindingextension.{
MethodTraversal => BindingMethodTraversal,
TypeDeclTraversal => BindingTypeDeclTraversal
MethodTraversal as BindingMethodTraversal,
TypeDeclTraversal as BindingTypeDeclTraversal
}
import io.shiftleft.semanticcpg.language.callgraphextension.{CallTraversal, MethodTraversal}
import io.shiftleft.semanticcpg.language.dotextension.{AstNodeDot, CfgNodeDot, InterproceduralNodeDot}
import io.shiftleft.semanticcpg.language.nodemethods._
import io.shiftleft.semanticcpg.language.nodemethods.*
import io.shiftleft.semanticcpg.language.types.expressions.generalizations.{
AstNodeTraversal,
CfgNodeTraversal,
DeclarationTraversal,
ExpressionTraversal
}
import io.shiftleft.semanticcpg.language.types.expressions.{CallTraversal => OriginalCall, _}
import io.shiftleft.semanticcpg.language.types.propertyaccessors._
import io.shiftleft.semanticcpg.language.types.structure.{MethodTraversal => OriginalMethod, _}
import io.shiftleft.semanticcpg.language.types.expressions.{CallTraversal as OriginalCall, *}
import io.shiftleft.semanticcpg.language.types.propertyaccessors.*
import io.shiftleft.semanticcpg.language.types.structure.{MethodTraversal as OriginalMethod, *}
import overflowdb.NodeOrDetachedNode

/** Language for traversing the code property graph
*
* Implicit conversions to specific steps, based on the node at hand. Automatically in scope when using anything in the
* `steps` package, e.g. `Steps`
*/
package object language extends operatorextension.Implicits with LowPrioImplicits with NodeTraversalImplicits {
package object language
extends operatorextension.Implicits
with modulevariable.Implicits
with LowPrioImplicits
with NodeTraversalImplicits {
// Implicit conversions from generated node types. We use these to add methods
// to generated node types.

Expand Down

0 comments on commit dea6ab2

Please sign in to comment.