Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pysrc2cpg] Module Reference Query Layer #3824

Merged
merged 2 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ package io.joern.pysrc2cpg.passes
import io.joern.pysrc2cpg.PySrc2CpgFixture
import io.joern.x2cpg.passes.frontend.ImportsPass.*
import io.joern.x2cpg.passes.frontend.{ImportsPass, XTypeHintCallLinker}
import io.shiftleft.codepropertygraph.generated.Operators
import io.shiftleft.codepropertygraph.generated.nodes.{Call, Identifier, Member}
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.operatorextension.OpNodes.FieldAccess

import java.io.File
import scala.collection.immutable.Seq
Expand Down Expand Up @@ -1256,4 +1259,89 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) {
}
}

"Resolved module variable references" should {
val cpg = code(
"""from fastapi import FastAPI
|import itemsrouter
|import usersrouter
|
|app = FastAPI()
|
|app.include_router(
| itemsrouter.router,
| prefix="/items",
| tags=["items"],
| responses={404: {"description": "Not found"}},
|)
|app.include_router(
| usersrouter.normal_router,
| usersrouter.admin_router,
| prefix="/users",
| tags=["users"],
| responses={404: {"description": "Not found"}},
|)
|""".stripMargin,
"main.py"
)
.moreCode(
"""
|from fastapi import APIRouter
|
|router = APIRouter()
|fake_items_db = {"gun": {"name": "Portal Gun"}}
|
|@router.get("/")
|async def read_items():
| return fake_items_db
|""".stripMargin,
"itemsrouter.py"
)
.moreCode(
"""
|from fastapi import APIRouter
|
|normal_router = APIRouter()
|admin_router = APIRouter()
|fake_users_db = {"plumbus": {"name": "Plumbus"}}
|fake_admins_db = {"flumbus": {"name": "Flumbus"}}
|
|@normal_router.get("/")
|async def read_users():
| return fake_users_db
|
|@admin_router.get("/admin")
| return fake_admins_db
|
|""".stripMargin,
"usersrouter.py"
)

"enable traversing from a module variable, to its references, back to other module variable references" in {
val appIncludeRouterCalls =
cpg.moduleVariables
.where(_.typeFullName(".*FastAPI.*"))
.invokingCalls
.nameExact("include_router")
.l
val includedRouters = appIncludeRouterCalls.argument.argumentIndexGte(1).l
val definitionsOfRouters = includedRouters.isCall.fieldAccess.referencedMember.moduleVariables.definitions.l
val List(adminRouter, normalRouter, itemsRouter) =
definitionsOfRouters.map(x => (x.code, x.method.fullName)).sortBy(_._1).l: @unchecked

adminRouter shouldBe ("admin_router = APIRouter()", "usersrouter.py:<module>")
normalRouter shouldBe ("normal_router = APIRouter()", "usersrouter.py:<module>")
itemsRouter shouldBe ("router = APIRouter()", "itemsrouter.py:<module>")
}

"enable traversing from a module variable, to its referencing local" in {
val appIncludeRouterCalls =
cpg.moduleVariables
.where(_.typeFullName(".*FastAPI.*"))
.l
val appLocal = appIncludeRouterCalls.moduleVariableRefs.referencingLocals.head
appLocal.name shouldBe "app"
appLocal.method.fullName.head shouldBe "main.py:<module>"
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package io.shiftleft.semanticcpg.language.modulevariable

import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.Member
import io.shiftleft.semanticcpg.language.modulevariable.NodeTypeStarters
import io.shiftleft.semanticcpg.language.modulevariable.nodemethods.{
ModuleVariableAsMemberMethods,
ModuleVariableReferenceMethods
}

import scala.language.implicitConversions

trait Implicits {

implicit def toNodeTypeStartersModuleVariableExtension(cpg: Cpg): NodeTypeStarters = new NodeTypeStarters(cpg)

implicit def toModuleVariableAsMemberExt(node: Member): ModuleVariableAsMemberMethods =
new ModuleVariableAsMemberMethods(node)

implicit def toModuleVariableAsMemberTrav(steps: Iterator[Member]): ModuleVariableAsMemberTraversal =
new ModuleVariableAsMemberTraversal(steps)

implicit def toModuleVariablesExt(node: OpNodes.ModuleVariableReference): ModuleVariableReferenceMethods =
new ModuleVariableReferenceMethods(node)

implicit def toModuleVariablesTrav(steps: IterableOnce[OpNodes.ModuleVariable]): ModuleVariableTraversal =
new ModuleVariableTraversal(steps.iterator)

implicit def toModuleReferenceTrav(steps: IterableOnce[OpNodes.ModuleVariableReference]): ModuleReferenceTraversal =
new ModuleReferenceTraversal(steps.iterator)

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package io.shiftleft.semanticcpg.language.modulevariable

import io.shiftleft.codepropertygraph.generated.nodes.Local
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.operatorextension.OpNodes.Assignment
import overflowdb.traversal.help.Doc

class ModuleReferenceTraversal(traversal: Iterator[OpNodes.ModuleVariableReference]) extends AnyVal {

@Doc(info = "The module variable that this references to")
def moduleVariable: Iterator[OpNodes.ModuleVariable] =
traversal.flatMap(_.moduleVariable)

@Doc(info = "All assignments where the module reference in this traversal are the target")
def definitions: Iterator[Assignment] = {
val varRefs = traversal.toList
val moduleVarNames = varRefs.iterator.moduleVariable.name.distinct.toSeq
varRefs.iterator.inAssignment
.where(_.target.isIdentifier.nameExact(moduleVarNames*))
.dedup
}

@Doc(info = "All local variables concerning the module variable")
def referencingLocals: Iterator[Local] = {
traversal.inAssignment.target.isIdentifier.refsTo.collectAll[Local]
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package io.shiftleft.semanticcpg.language.modulevariable

import io.shiftleft.codepropertygraph.generated.nodes.{Call, Member}
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.modulevariable.OpNodes.ModuleVariable
import overflowdb.traversal.help.Doc

class ModuleVariableAsMemberTraversal(traversal: Iterator[Member]) extends AnyVal {

@Doc(info = "Members representing module variables")
def moduleVariables: Iterator[ModuleVariable] = {
val sourceMembers = traversal.toList
sourceMembers
.where(
_.ref.parentBlock.inAssignment.target.isIdentifier
.where(_.and(_.nameExact(sourceMembers.name.toSeq*), _.method.nameExact("<module>")))
)
.map(new ModuleVariable(_))
.iterator
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package io.shiftleft.semanticcpg.language.modulevariable

import io.shiftleft.codepropertygraph.generated.nodes.{Call, Identifier}
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.semanticcpg.language.operatorextension.OpNodes.Assignment
import overflowdb.traversal.help.Doc

class ModuleVariableTraversal(traversal: Iterator[OpNodes.ModuleVariable]) extends AnyVal {

@Doc(info = "All module references where the module variables in this traversal are the target")
def moduleVariableRefs: Iterator[OpNodes.ModuleVariableReference] =
traversal.ref.parentBlock.map(new OpNodes.ModuleVariableReference(_))

@Doc(info = "All assignments where the module variables in this traversal are the target")
def definitions: Iterator[Assignment] = {
val sourceMembers = traversal.toList
sourceMembers.iterator.moduleVariableRefs.inAssignment
.where(_.target.isIdentifier.nameExact(sourceMembers.name.toSeq*))
.dedup
}

@Doc(info = "All identifiers related to the module variables in this traversal")
def referencingIdentifiers: Iterator[Identifier] = {
definitions.target.isIdentifier.refsTo._refIn.collectAll[Identifier]
}

@Doc(info = "Calls this module variable invokes")
def invokingCalls: Iterator[Call] = {
referencingIdentifiers.argumentIndexLte(1).inCall.dedup.iterator
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package io.shiftleft.semanticcpg.language.modulevariable

import io.shiftleft.codepropertygraph.Cpg
import overflowdb.traversal.help.{Doc, TraversalSource}
import io.shiftleft.semanticcpg.language.*

@TraversalSource
class NodeTypeStarters(cpg: Cpg) {

@Doc(info = "All module-level variables, e.g., variables declared at the root of a file in Python or JavaScript.")
def moduleVariables: Iterator[OpNodes.ModuleVariable] =
cpg.member.moduleVariables

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package io.shiftleft.semanticcpg.language.modulevariable

import io.shiftleft.codepropertygraph.generated.nodes.{Block, Member}

object OpNodes {

/** Represents a module-level global variable. This kind of node behaves like both a local variable and a field access
* and is common in languages such as Python/JavaScript.
*/
class ModuleVariable(node: Member) extends Member(node.graph(), node.id)

/** Represents a module variable access block, which represents a lowered view of operations taken on both the local
* variable and member-level access.
*
* @see
* <a href="https://github.com/joernio/joern/pull/3750/">[pysrc2cpg] Model Field-like Behaviour of Module
* Variables</a>
*/
class ModuleVariableReference(node: Block) extends Block(node.graph(), node.id)

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Module Variables

This package adds the necessary complexity to reason and handle the changes introduced by:
[\[pysrc2cpg\] Model Field-like Behaviour of Module Variables](https://github.com/joernio/joern/pull/3750/)

There is likely a much better schema-level way of addressing this hybrid behaviour of module-level variables, but this
is the least intrusive and does not require some larger amount of planning. Thus, we leave the following note:

TODO: Replace with a far-reaching, but simpler solution across frontends.
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package io.shiftleft.semanticcpg.language.modulevariable.nodemethods

import io.shiftleft.codepropertygraph.generated.nodes.Member
import io.shiftleft.semanticcpg.language.*
import overflowdb.traversal.help.Doc

class ModuleVariableAsMemberMethods(node: Member) extends AnyVal {

@Doc(info = "If this member refers to a module variable")
def isModuleVariable: Boolean = {
Iterator(node)
.where(
_.ref.parentBlock.inAssignment.target.isIdentifier
.where(_.and(_.nameExact(node.name), _.method.nameExact("<module>")))
)
.nonEmpty
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package io.shiftleft.semanticcpg.language.modulevariable.nodemethods

import io.shiftleft.semanticcpg.language.modulevariable.OpNodes
import io.shiftleft.semanticcpg.language.*
import overflowdb.traversal.help.Doc

class ModuleVariableReferenceMethods(node: OpNodes.ModuleVariableReference) extends AnyVal {

@Doc(info = "The module variable being referenced")
def moduleVariable: Iterator[OpNodes.ModuleVariable] =
node.astChildren.headOption.isCall.fieldAccess.referencedMember.map(new OpNodes.ModuleVariable(_))

}
Original file line number Diff line number Diff line change
@@ -1,32 +1,36 @@
package io.shiftleft.semanticcpg

import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes._
import io.shiftleft.codepropertygraph.generated.nodes.*
import io.shiftleft.codepropertygraph.generated.traversal.NodeTraversalImplicits
import io.shiftleft.semanticcpg.language.bindingextension.{
MethodTraversal => BindingMethodTraversal,
TypeDeclTraversal => BindingTypeDeclTraversal
MethodTraversal as BindingMethodTraversal,
TypeDeclTraversal as BindingTypeDeclTraversal
}
import io.shiftleft.semanticcpg.language.callgraphextension.{CallTraversal, MethodTraversal}
import io.shiftleft.semanticcpg.language.dotextension.{AstNodeDot, CfgNodeDot, InterproceduralNodeDot}
import io.shiftleft.semanticcpg.language.nodemethods._
import io.shiftleft.semanticcpg.language.nodemethods.*
import io.shiftleft.semanticcpg.language.types.expressions.generalizations.{
AstNodeTraversal,
CfgNodeTraversal,
DeclarationTraversal,
ExpressionTraversal
}
import io.shiftleft.semanticcpg.language.types.expressions.{CallTraversal => OriginalCall, _}
import io.shiftleft.semanticcpg.language.types.propertyaccessors._
import io.shiftleft.semanticcpg.language.types.structure.{MethodTraversal => OriginalMethod, _}
import io.shiftleft.semanticcpg.language.types.expressions.{CallTraversal as OriginalCall, *}
import io.shiftleft.semanticcpg.language.types.propertyaccessors.*
import io.shiftleft.semanticcpg.language.types.structure.{MethodTraversal as OriginalMethod, *}
import overflowdb.NodeOrDetachedNode

/** Language for traversing the code property graph
*
* Implicit conversions to specific steps, based on the node at hand. Automatically in scope when using anything in the
* `steps` package, e.g. `Steps`
*/
package object language extends operatorextension.Implicits with LowPrioImplicits with NodeTraversalImplicits {
package object language
extends operatorextension.Implicits
with modulevariable.Implicits
with LowPrioImplicits
with NodeTraversalImplicits {
// Implicit conversions from generated node types. We use these to add methods
// to generated node types.

Expand Down
Loading