From e602413c7cbb143bf60b48390e6fbf7a80a30d38 Mon Sep 17 00:00:00 2001 From: David Baker Effendi Date: Fri, 1 Dec 2023 16:38:38 +0200 Subject: [PATCH] [py] Simplify Pseudo Imports for External Modules (#3872) The new import resolution simplified the world for internal code, then gave external code to the old fall-back functionality. The issue is that the old fall-back functionality still assumed the code may be in the CPG, but this would be assured not the case. This change fixes the pseudo-path based on this guarantee. --- .../pysrc2cpg/PythonImportResolverPass.scala | 56 ++----------------- .../passes/TypeRecoveryPassTests.scala | 22 ++++++++ 2 files changed, 28 insertions(+), 50 deletions(-) diff --git a/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/PythonImportResolverPass.scala b/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/PythonImportResolverPass.scala index b3b403707940..9c79e82e0e67 100644 --- a/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/PythonImportResolverPass.scala +++ b/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/PythonImportResolverPass.scala @@ -83,47 +83,14 @@ class PythonImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) { resolvedImports.foreach(x => evaluatedImportToTag(x, importCall, diffGraph)) } else { // Here we use heuristics to guess the correct paths, and make the types look friendly for querying - unresolvableImportToUnknownImport(currDir, fileName, importedEntity, importedAs) + createPseudoImports(importedEntity, importedAs) .foreach(x => evaluatedImportToTag(x, importCall, diffGraph)) } } - private def unresolvableImportToUnknownImport( - currDir: File, - currFileName: String, - importedEntity: String, - importedAs: String - ): Set[EvaluatedImport] = { - val (namespace, entityName) = if (importedEntity.contains(".")) { - val splitName = importedEntity.split('.').toSeq - val namespace = importedEntity.stripSuffix(s".${splitName.last}") - (relativizeNamespace(namespace, currFileName), splitName.last) - } else { - val relCurrDir = currDir.pathAsString.stripPrefix(codeRootDir).stripPrefix(JFile.separator) - - (relCurrDir, importedEntity) - } - - createPseudoImports(namespace, entityName, importedAs) - } - - private def relativizeNamespace(path: String, fileName: String): String = if (path.startsWith(".")) { - // TODO: pysrc2cpg does not link files to the correct namespace nodes - val sep = Matcher.quoteReplacement(JFile.separator) - // The below gives us the full path of the relative "." - val relativeNamespace = - if (fileName.contains(JFile.separator)) - fileName.substring(0, fileName.lastIndexOf(JFile.separator)).replaceAll(sep, ".") - else "" - (if (path.length > 1) relativeNamespace + path.replaceAll(sep, ".") - else relativeNamespace).stripPrefix(".") - } else path - /** For an unresolveable import, create a best-effort path of what could be imported, as well as what kind of entity * may be imported. * - * @param path - * the module path. * @param expEntity * the name of the imported entity. This could be a function, module, or variable/field. * @param alias @@ -131,9 +98,8 @@ class PythonImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) { * @return * the possible callee names */ - private def createPseudoImports(path: String, expEntity: String, alias: String): Set[EvaluatedImport] = { + private def createPseudoImports(expEntity: String, alias: String): Set[EvaluatedImport] = { val pathSep = "." - val sep = Matcher.quoteReplacement(JFile.separator) val isMaybeConstructor = expEntity.split("\\.").lastOption.exists(s => s.nonEmpty && s.charAt(0).isUpper) def toUnresolvedImport(pseudoPath: String): Set[EvaluatedImport] = { @@ -144,20 +110,10 @@ class PythonImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) { } } - if (path.isBlank) { - if (expEntity.contains(".")) { - // Case 1: Qualified path: import foo.bar - val splitFunc = expEntity.split("\\.") - val name = splitFunc.tail.mkString(".") - toUnresolvedImport(s"${splitFunc(0)}.py:$pathSep$name") - } else { - // Case 2: import of a module: import foo => foo.py - toUnresolvedImport(s"$expEntity.py:") - } - } else { - // Case 3: Import from module using alias, e.g. import bar from foo as faz - toUnresolvedImport(s"${path.replaceAll("\\.", sep)}.py:$pathSep$expEntity") - } + expEntity.split("\\.").reverse.toList match + case name :: Nil => toUnresolvedImport(s"$name.py:") + case name :: xs => toUnresolvedImport(s"${xs.reverse.mkString(JFile.separator)}.py:$pathSep$name") + case Nil => Set.empty } private sealed trait ImportableEntity { diff --git a/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/passes/TypeRecoveryPassTests.scala b/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/passes/TypeRecoveryPassTests.scala index 7dfd358862a6..6eaa544c189f 100644 --- a/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/passes/TypeRecoveryPassTests.scala +++ b/joern-cli/frontends/pysrc2cpg/src/test/scala/io/joern/pysrc2cpg/passes/TypeRecoveryPassTests.scala @@ -1347,4 +1347,26 @@ class TypeRecoveryPassTests extends PySrc2CpgFixture(withOssDataflow = false) { } } + "unknown imports, regardless of relative path" should { + val cpg = code( + """ + |import boto3 + | + |def get_thing(bucket: str, path: str, access_key: str, secret_key: str): + | client = boto3.client('s3', aws_access_key_id = access_key, aws_secret_access_key = secret_key) + | return client.get_object(Bucket=bucket, Key=path) + | + |""".stripMargin, + Seq("utils", "botowrapper.py").mkString(File.separator) + ) + + "be resolved with a simple pseudo-import" in { + cpg.call.nameExact("client").methodFullName.head shouldBe "boto3.py:.client" + } + + "propagate this value to the receiving identifier's call accordingly" in { + cpg.call.nameExact("get_object").methodFullName.head shouldBe "boto3.py:.client..get_object" + } + } + }