diff --git a/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/passes/TypeRecoveryPassTests.scala b/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/passes/TypeRecoveryPassTests.scala index 1f495764f219..9fa5a14d6ca9 100644 --- a/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/passes/TypeRecoveryPassTests.scala +++ b/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/passes/TypeRecoveryPassTests.scala @@ -3,7 +3,10 @@ package io.joern.pysrc2cpg.passes import io.joern.pysrc2cpg.PySrc2CpgFixture import io.joern.x2cpg.passes.frontend.ImportsPass.* import io.joern.x2cpg.passes.frontend.{ImportsPass, XTypeHintCallLinker} +import io.shiftleft.codepropertygraph.generated.Operators +import io.shiftleft.codepropertygraph.generated.nodes.{Call, Identifier, Member} import io.shiftleft.semanticcpg.language.* +import io.shiftleft.semanticcpg.language.operatorextension.OpNodes.FieldAccess import java.io.File import scala.collection.immutable.Seq @@ -1256,4 +1259,89 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) { } } + "Resolved module variable references" should { + val cpg = code( + """from fastapi import FastAPI + |import itemsrouter + |import usersrouter + | + |app = FastAPI() + | + |app.include_router( + | itemsrouter.router, + | prefix="/items", + | tags=["items"], + | responses={404: {"description": "Not found"}}, + |) + |app.include_router( + | usersrouter.normal_router, + | usersrouter.admin_router, + | prefix="/users", + | tags=["users"], + | responses={404: {"description": "Not found"}}, + |) + |""".stripMargin, + "main.py" + ) + .moreCode( + """ + |from fastapi import APIRouter + | + |router = APIRouter() + |fake_items_db = {"gun": {"name": "Portal Gun"}} + | + |@router.get("/") + |async def read_items(): + | return fake_items_db + |""".stripMargin, + "itemsrouter.py" + ) + .moreCode( + """ + |from fastapi import APIRouter + | + |normal_router = APIRouter() + |admin_router = APIRouter() + |fake_users_db = {"plumbus": {"name": "Plumbus"}} + |fake_admins_db = {"flumbus": {"name": "Flumbus"}} + | + |@normal_router.get("/") + |async def read_users(): + | return fake_users_db + | + |@admin_router.get("/admin") + | return fake_admins_db + | + |""".stripMargin, + "usersrouter.py" + ) + + "enable traversing from a module variable, to its references, back to other module variable references" in { + val appIncludeRouterCalls = + cpg.moduleVariables + .where(_.typeFullName(".*FastAPI.*")) + .invokingCalls + .nameExact("include_router") + .l + val includedRouters = appIncludeRouterCalls.argument.argumentIndexGte(1).l + val definitionsOfRouters = includedRouters.isCall.fieldAccess.referencedMember.moduleVariables.definitions.l + val List(adminRouter, normalRouter, itemsRouter) = + definitionsOfRouters.map(x => (x.code, x.method.fullName)).sortBy(_._1).l: @unchecked + + adminRouter shouldBe ("admin_router = APIRouter()", "usersrouter.py:") + normalRouter shouldBe ("normal_router = APIRouter()", "usersrouter.py:") + itemsRouter shouldBe ("router = APIRouter()", "itemsrouter.py:") + } + + "enable traversing from a module variable, to its referencing local" in { + val appIncludeRouterCalls = + cpg.moduleVariables + .where(_.typeFullName(".*FastAPI.*")) + .l + val appLocal = appIncludeRouterCalls.moduleVariableRefs.referencingLocals.head + appLocal.name shouldBe "app" + appLocal.method.fullName.head shouldBe "main.py:" + } + } + } diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/Implicits.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/Implicits.scala new file mode 100644 index 000000000000..1a012d60b25c --- /dev/null +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/Implicits.scala @@ -0,0 +1,32 @@ +package io.shiftleft.semanticcpg.language.modulevariable + +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.Member +import io.shiftleft.semanticcpg.language.modulevariable.NodeTypeStarters +import io.shiftleft.semanticcpg.language.modulevariable.nodemethods.{ + ModuleVariableAsMemberMethods, + ModuleVariableReferenceMethods +} + +import scala.language.implicitConversions + +trait Implicits { + + implicit def toNodeTypeStartersModuleVariableExtension(cpg: Cpg): NodeTypeStarters = new NodeTypeStarters(cpg) + + implicit def toModuleVariableAsMemberExt(node: Member): ModuleVariableAsMemberMethods = + new ModuleVariableAsMemberMethods(node) + + implicit def toModuleVariableAsMemberTrav(steps: Iterator[Member]): ModuleVariableAsMemberTraversal = + new ModuleVariableAsMemberTraversal(steps) + + implicit def toModuleVariablesExt(node: OpNodes.ModuleVariableReference): ModuleVariableReferenceMethods = + new ModuleVariableReferenceMethods(node) + + implicit def toModuleVariablesTrav(steps: IterableOnce[OpNodes.ModuleVariable]): ModuleVariableTraversal = + new ModuleVariableTraversal(steps.iterator) + + implicit def toModuleReferenceTrav(steps: IterableOnce[OpNodes.ModuleVariableReference]): ModuleReferenceTraversal = + new ModuleReferenceTraversal(steps.iterator) + +} diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/ModuleReferenceTraversal.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/ModuleReferenceTraversal.scala new file mode 100644 index 000000000000..eb894fbf6418 --- /dev/null +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/ModuleReferenceTraversal.scala @@ -0,0 +1,28 @@ +package io.shiftleft.semanticcpg.language.modulevariable + +import io.shiftleft.codepropertygraph.generated.nodes.Local +import io.shiftleft.semanticcpg.language.* +import io.shiftleft.semanticcpg.language.operatorextension.OpNodes.Assignment +import overflowdb.traversal.help.Doc + +class ModuleReferenceTraversal(traversal: Iterator[OpNodes.ModuleVariableReference]) extends AnyVal { + + @Doc(info = "The module variable that this references to") + def moduleVariable: Iterator[OpNodes.ModuleVariable] = + traversal.flatMap(_.moduleVariable) + + @Doc(info = "All assignments where the module reference in this traversal are the target") + def definitions: Iterator[Assignment] = { + val varRefs = traversal.toList + val moduleVarNames = varRefs.iterator.moduleVariable.name.distinct.toSeq + varRefs.iterator.inAssignment + .where(_.target.isIdentifier.nameExact(moduleVarNames*)) + .dedup + } + + @Doc(info = "All local variables concerning the module variable") + def referencingLocals: Iterator[Local] = { + traversal.inAssignment.target.isIdentifier.refsTo.collectAll[Local] + } + +} diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/ModuleVariableAsMemberTraversal.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/ModuleVariableAsMemberTraversal.scala new file mode 100644 index 000000000000..914a5dde9931 --- /dev/null +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/ModuleVariableAsMemberTraversal.scala @@ -0,0 +1,22 @@ +package io.shiftleft.semanticcpg.language.modulevariable + +import io.shiftleft.codepropertygraph.generated.nodes.{Call, Member} +import io.shiftleft.semanticcpg.language.* +import io.shiftleft.semanticcpg.language.modulevariable.OpNodes.ModuleVariable +import overflowdb.traversal.help.Doc + +class ModuleVariableAsMemberTraversal(traversal: Iterator[Member]) extends AnyVal { + + @Doc(info = "Members representing module variables") + def moduleVariables: Iterator[ModuleVariable] = { + val sourceMembers = traversal.toList + sourceMembers + .where( + _.ref.parentBlock.inAssignment.target.isIdentifier + .where(_.and(_.nameExact(sourceMembers.name.toSeq*), _.method.nameExact(""))) + ) + .map(new ModuleVariable(_)) + .iterator + } + +} diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/ModuleVariableTraversal.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/ModuleVariableTraversal.scala new file mode 100644 index 000000000000..cd8121efd569 --- /dev/null +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/ModuleVariableTraversal.scala @@ -0,0 +1,32 @@ +package io.shiftleft.semanticcpg.language.modulevariable + +import io.shiftleft.codepropertygraph.generated.nodes.{Call, Identifier} +import io.shiftleft.semanticcpg.language.* +import io.shiftleft.semanticcpg.language.operatorextension.OpNodes.Assignment +import overflowdb.traversal.help.Doc + +class ModuleVariableTraversal(traversal: Iterator[OpNodes.ModuleVariable]) extends AnyVal { + + @Doc(info = "All module references where the module variables in this traversal are the target") + def moduleVariableRefs: Iterator[OpNodes.ModuleVariableReference] = + traversal.ref.parentBlock.map(new OpNodes.ModuleVariableReference(_)) + + @Doc(info = "All assignments where the module variables in this traversal are the target") + def definitions: Iterator[Assignment] = { + val sourceMembers = traversal.toList + sourceMembers.iterator.moduleVariableRefs.inAssignment + .where(_.target.isIdentifier.nameExact(sourceMembers.name.toSeq*)) + .dedup + } + + @Doc(info = "All identifiers related to the module variables in this traversal") + def referencingIdentifiers: Iterator[Identifier] = { + definitions.target.isIdentifier.refsTo._refIn.collectAll[Identifier] + } + + @Doc(info = "Calls this module variable invokes") + def invokingCalls: Iterator[Call] = { + referencingIdentifiers.argumentIndexLte(1).inCall.dedup.iterator + } + +} diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/NodeTypeStarters.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/NodeTypeStarters.scala new file mode 100644 index 000000000000..8ec569d055f0 --- /dev/null +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/NodeTypeStarters.scala @@ -0,0 +1,14 @@ +package io.shiftleft.semanticcpg.language.modulevariable + +import io.shiftleft.codepropertygraph.Cpg +import overflowdb.traversal.help.{Doc, TraversalSource} +import io.shiftleft.semanticcpg.language.* + +@TraversalSource +class NodeTypeStarters(cpg: Cpg) { + + @Doc(info = "All module-level variables, e.g., variables declared at the root of a file in Python or JavaScript.") + def moduleVariables: Iterator[OpNodes.ModuleVariable] = + cpg.member.moduleVariables + +} diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/OpNodes.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/OpNodes.scala new file mode 100644 index 000000000000..5433b110d94d --- /dev/null +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/OpNodes.scala @@ -0,0 +1,21 @@ +package io.shiftleft.semanticcpg.language.modulevariable + +import io.shiftleft.codepropertygraph.generated.nodes.{Block, Member} + +object OpNodes { + + /** Represents a module-level global variable. This kind of node behaves like both a local variable and a field access + * and is common in languages such as Python/JavaScript. + */ + class ModuleVariable(node: Member) extends Member(node.graph(), node.id) + + /** Represents a module variable access block, which represents a lowered view of operations taken on both the local + * variable and member-level access. + * + * @see + * [pysrc2cpg] Model Field-like Behaviour of Module + * Variables + */ + class ModuleVariableReference(node: Block) extends Block(node.graph(), node.id) + +} diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/README.md b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/README.md new file mode 100644 index 000000000000..bef2a56e8265 --- /dev/null +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/README.md @@ -0,0 +1,9 @@ +# Module Variables + +This package adds the necessary complexity to reason and handle the changes introduced by: +[\[pysrc2cpg\] Model Field-like Behaviour of Module Variables](https://github.com/joernio/joern/pull/3750/) + +There is likely a much better schema-level way of addressing this hybrid behaviour of module-level variables, but this +is the least intrusive and does not require some larger amount of planning. Thus, we leave the following note: + +TODO: Replace with a far-reaching, but simpler solution across frontends. diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/nodemethods/ModuleVariableAsMemberMethods.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/nodemethods/ModuleVariableAsMemberMethods.scala new file mode 100644 index 000000000000..abb80e3797f0 --- /dev/null +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/nodemethods/ModuleVariableAsMemberMethods.scala @@ -0,0 +1,19 @@ +package io.shiftleft.semanticcpg.language.modulevariable.nodemethods + +import io.shiftleft.codepropertygraph.generated.nodes.Member +import io.shiftleft.semanticcpg.language.* +import overflowdb.traversal.help.Doc + +class ModuleVariableAsMemberMethods(node: Member) extends AnyVal { + + @Doc(info = "If this member refers to a module variable") + def isModuleVariable: Boolean = { + Iterator(node) + .where( + _.ref.parentBlock.inAssignment.target.isIdentifier + .where(_.and(_.nameExact(node.name), _.method.nameExact(""))) + ) + .nonEmpty + } + +} diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/nodemethods/ModuleVariableReferenceMethods.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/nodemethods/ModuleVariableReferenceMethods.scala new file mode 100644 index 000000000000..4f443bb61b53 --- /dev/null +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/modulevariable/nodemethods/ModuleVariableReferenceMethods.scala @@ -0,0 +1,13 @@ +package io.shiftleft.semanticcpg.language.modulevariable.nodemethods + +import io.shiftleft.semanticcpg.language.modulevariable.OpNodes +import io.shiftleft.semanticcpg.language.* +import overflowdb.traversal.help.Doc + +class ModuleVariableReferenceMethods(node: OpNodes.ModuleVariableReference) extends AnyVal { + + @Doc(info = "The module variable being referenced") + def moduleVariable: Iterator[OpNodes.ModuleVariable] = + node.astChildren.headOption.isCall.fieldAccess.referencedMember.map(new OpNodes.ModuleVariable(_)) + +} diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/package.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/package.scala index 9f462586bdc9..22498eaa9d8e 100644 --- a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/package.scala +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/package.scala @@ -1,24 +1,24 @@ package io.shiftleft.semanticcpg import io.shiftleft.codepropertygraph.Cpg -import io.shiftleft.codepropertygraph.generated.nodes._ +import io.shiftleft.codepropertygraph.generated.nodes.* import io.shiftleft.codepropertygraph.generated.traversal.NodeTraversalImplicits import io.shiftleft.semanticcpg.language.bindingextension.{ - MethodTraversal => BindingMethodTraversal, - TypeDeclTraversal => BindingTypeDeclTraversal + MethodTraversal as BindingMethodTraversal, + TypeDeclTraversal as BindingTypeDeclTraversal } import io.shiftleft.semanticcpg.language.callgraphextension.{CallTraversal, MethodTraversal} import io.shiftleft.semanticcpg.language.dotextension.{AstNodeDot, CfgNodeDot, InterproceduralNodeDot} -import io.shiftleft.semanticcpg.language.nodemethods._ +import io.shiftleft.semanticcpg.language.nodemethods.* import io.shiftleft.semanticcpg.language.types.expressions.generalizations.{ AstNodeTraversal, CfgNodeTraversal, DeclarationTraversal, ExpressionTraversal } -import io.shiftleft.semanticcpg.language.types.expressions.{CallTraversal => OriginalCall, _} -import io.shiftleft.semanticcpg.language.types.propertyaccessors._ -import io.shiftleft.semanticcpg.language.types.structure.{MethodTraversal => OriginalMethod, _} +import io.shiftleft.semanticcpg.language.types.expressions.{CallTraversal as OriginalCall, *} +import io.shiftleft.semanticcpg.language.types.propertyaccessors.* +import io.shiftleft.semanticcpg.language.types.structure.{MethodTraversal as OriginalMethod, *} import overflowdb.NodeOrDetachedNode /** Language for traversing the code property graph @@ -26,7 +26,11 @@ import overflowdb.NodeOrDetachedNode * Implicit conversions to specific steps, based on the node at hand. Automatically in scope when using anything in the * `steps` package, e.g. `Steps` */ -package object language extends operatorextension.Implicits with LowPrioImplicits with NodeTraversalImplicits { +package object language + extends operatorextension.Implicits + with modulevariable.Implicits + with LowPrioImplicits + with NodeTraversalImplicits { // Implicit conversions from generated node types. We use these to add methods // to generated node types.