From ff5e780ca3efe2e6909c8cfd5eea00243c98900f Mon Sep 17 00:00:00 2001 From: KhemrajSingh Rathore <khemraj.rathore@privado.ai> Date: Mon, 13 Nov 2023 22:31:01 +0530 Subject: [PATCH] [kotlin2cpg] - Apply `X2CpgConfig` File Filtering in Kotlin (#3822) There was a bug, in Kotlin frontend where we are not applying file filtering to remove the excluded files. This PR takes care of that. This PR is an extension of https://github.com/joernio/joern/pull/3813 --- .../joern/c2cpg/passes/AstCreationPass.scala | 6 +-- .../joern/c2cpg/passes/PreprocessorPass.scala | 6 +-- .../joern/gosrc2cpg/utils/AstGenRunner.scala | 4 +- .../jpastprinter/JavaParserAstPrinter.scala | 6 +-- .../javasrc2cpg/passes/AstCreationPass.scala | 6 +-- .../io/joern/kotlin2cpg/Kotlin2Cpg.scala | 24 ++++++------ .../php2cpg/passes/AstCreationPass.scala | 4 +- .../joern/pysrc2cpg/Py2CpgOnFileSystem.scala | 4 +- .../deprecated/passes/AstCreationPass.scala | 4 +- .../rubysrc2cpg/passes/AstCreationPass.scala | 4 +- .../scala/io/joern/x2cpg/SourceFiles.scala | 37 ++++++++++--------- 11 files changed, 54 insertions(+), 51 deletions(-) diff --git a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/AstCreationPass.scala index e949a6239433..9d45763d7e9a 100644 --- a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/AstCreationPass.scala +++ b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/AstCreationPass.scala @@ -24,9 +24,9 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report()) .determine( config.inputPath, FileDefaults.SOURCE_FILE_EXTENSIONS ++ FileDefaults.HEADER_FILE_EXTENSIONS, - ignoredDefaultRegex = Some(DefaultIgnoredFolders), - ignoredFilesRegex = Some(config.ignoredFilesRegex), - ignoredFilesPath = Some(config.ignoredFiles) + ignoredDefaultRegex = Option(DefaultIgnoredFolders), + ignoredFilesRegex = Option(config.ignoredFilesRegex), + ignoredFilesPath = Option(config.ignoredFiles) ) .toArray diff --git a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/PreprocessorPass.scala b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/PreprocessorPass.scala index a396729c57ff..3a884d7a9257 100644 --- a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/PreprocessorPass.scala +++ b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/PreprocessorPass.scala @@ -23,9 +23,9 @@ class PreprocessorPass(config: Config) { .determine( config.inputPath, FileDefaults.SOURCE_FILE_EXTENSIONS, - ignoredDefaultRegex = Some(DefaultIgnoredFolders), - ignoredFilesRegex = Some(config.ignoredFilesRegex), - ignoredFilesPath = Some(config.ignoredFiles) + ignoredDefaultRegex = Option(DefaultIgnoredFolders), + ignoredFilesRegex = Option(config.ignoredFilesRegex), + ignoredFilesPath = Option(config.ignoredFiles) ) .par .flatMap(runOnPart) diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/utils/AstGenRunner.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/utils/AstGenRunner.scala index f76bc4d3d54a..d73e328742c9 100644 --- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/utils/AstGenRunner.scala +++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/utils/AstGenRunner.scala @@ -147,8 +147,8 @@ class AstGenRunner(config: Config) { val srcFiles = SourceFiles.determine( out.toString(), Set(".json"), - ignoredFilesRegex = Some(config.ignoredFilesRegex), - ignoredFilesPath = Some(config.ignoredFiles) + ignoredFilesRegex = Option(config.ignoredFilesRegex), + ignoredFilesPath = Option(config.ignoredFiles) ) val parsedModFile = filterModFile(srcFiles, out) val parsed = filterFiles(srcFiles, out) diff --git a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/jpastprinter/JavaParserAstPrinter.scala b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/jpastprinter/JavaParserAstPrinter.scala index 6a1444eb798f..91a0e62a9d90 100644 --- a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/jpastprinter/JavaParserAstPrinter.scala +++ b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/jpastprinter/JavaParserAstPrinter.scala @@ -19,9 +19,9 @@ object JavaParserAstPrinter { .determine( config.inputPath, JavaSrc2Cpg.sourceFileExtensions, - ignoredDefaultRegex = Some(JavaSrc2Cpg.DefaultIgnoredFilesRegex), - ignoredFilesRegex = Some(config.ignoredFilesRegex), - ignoredFilesPath = Some(config.ignoredFiles) + ignoredDefaultRegex = Option(JavaSrc2Cpg.DefaultIgnoredFilesRegex), + ignoredFilesRegex = Option(config.ignoredFilesRegex), + ignoredFilesPath = Option(config.ignoredFiles) ) .foreach { filename => val relativeFilename = Path.of(config.inputPath).relativize(Path.of(filename)).toString diff --git a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/passes/AstCreationPass.scala index 15b1d0ec4225..d354fd32300b 100644 --- a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/passes/AstCreationPass.scala +++ b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/passes/AstCreationPass.scala @@ -45,9 +45,9 @@ class AstCreationPass(config: Config, cpg: Cpg, sourcesOverride: Option[List[Str SourceFiles.determine( config.inputPath, JavaSrc2Cpg.sourceFileExtensions, - ignoredDefaultRegex = Some(JavaSrc2Cpg.DefaultIgnoredFilesRegex), - ignoredFilesRegex = Some(config.ignoredFilesRegex), - ignoredFilesPath = Some(config.ignoredFiles) + ignoredDefaultRegex = Option(JavaSrc2Cpg.DefaultIgnoredFilesRegex), + ignoredFilesRegex = Option(config.ignoredFilesRegex), + ignoredFilesPath = Option(config.ignoredFiles) ) ) .toArray diff --git a/joern-cli/frontends/kotlin2cpg/src/main/scala/io/joern/kotlin2cpg/Kotlin2Cpg.scala b/joern-cli/frontends/kotlin2cpg/src/main/scala/io/joern/kotlin2cpg/Kotlin2Cpg.scala index 391b5900f9f1..3843083cdeb0 100644 --- a/joern-cli/frontends/kotlin2cpg/src/main/scala/io/joern/kotlin2cpg/Kotlin2Cpg.scala +++ b/joern-cli/frontends/kotlin2cpg/src/main/scala/io/joern/kotlin2cpg/Kotlin2Cpg.scala @@ -25,6 +25,7 @@ import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass, XTypeRecovery import io.joern.x2cpg.utils.dependency.{DependencyResolver, DependencyResolverParams, GradleConfigKeys} import io.joern.kotlin2cpg.interop.JavasrcInterop import io.joern.kotlin2cpg.jar4import.UsesService +import io.joern.x2cpg.SourceFiles.filterFile import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.codepropertygraph.generated.Languages import io.shiftleft.semanticcpg.language.* @@ -77,8 +78,8 @@ class Kotlin2Cpg extends X2CpgFrontend[Config] with UsesService { val filesWithKtExtension = SourceFiles.determine( sourceDir, Set(".kt"), - ignoredFilesRegex = Some(config.ignoredFilesRegex), - ignoredFilesPath = Some(config.ignoredFiles) + ignoredFilesRegex = Option(config.ignoredFilesRegex), + ignoredFilesPath = Option(config.ignoredFiles) ) if (filesWithKtExtension.isEmpty) { println(s"The provided input directory does not contain files ending in '.kt' `$sourceDir`. Exiting.") @@ -89,8 +90,8 @@ class Kotlin2Cpg extends X2CpgFrontend[Config] with UsesService { val filesWithJavaExtension = SourceFiles.determine( sourceDir, Set(".java"), - ignoredFilesRegex = Some(config.ignoredFilesRegex), - ignoredFilesPath = Some(config.ignoredFiles) + ignoredFilesRegex = Option(config.ignoredFilesRegex), + ignoredFilesPath = Option(config.ignoredFiles) ) if (filesWithJavaExtension.nonEmpty) { logger.info(s"Found ${filesWithJavaExtension.size} files with the `.java` extension.") @@ -142,13 +143,14 @@ class Kotlin2Cpg extends X2CpgFrontend[Config] with UsesService { ) val sourceEntries = entriesForSources(environment.getSourceFiles.asScala, sourceDir) - val sources = sourceEntries.filterNot { entry => - config.ignoredFiles.exists { pathToIgnore => - val parent = Paths.get(pathToIgnore).toAbsolutePath - val child = Paths.get(entry.filename) - child.startsWith(parent) - } - } + val sources = sourceEntries.filter(entry => + SourceFiles.filterFile( + entry.filename, + config.inputPath, + ignoredFilesRegex = Option(config.ignoredFilesRegex), + ignoredFilesPath = Option(config.ignoredFiles) + ) + ) val configFiles = entriesForConfigFiles(SourceFilesPicker.configFiles(sourceDir), sourceDir) val typeInfoProvider = new DefaultTypeInfoProvider(environment) diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala index 82c75d573d52..b27d4fddab81 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala @@ -23,8 +23,8 @@ class AstCreationPass(config: Config, cpg: Cpg, parser: PhpParser)(implicit with .determine( config.inputPath, PhpSourceFileExtensions, - ignoredFilesRegex = Some(config.ignoredFilesRegex), - ignoredFilesPath = Some(config.ignoredFiles) + ignoredFilesRegex = Option(config.ignoredFilesRegex), + ignoredFilesPath = Option(config.ignoredFiles) ) .toArray diff --git a/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/Py2CpgOnFileSystem.scala b/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/Py2CpgOnFileSystem.scala index f379bcfa082b..e251c38c3657 100644 --- a/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/Py2CpgOnFileSystem.scala +++ b/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/Py2CpgOnFileSystem.scala @@ -66,8 +66,8 @@ class Py2CpgOnFileSystem extends X2CpgFrontend[Py2CpgOnFileSystemConfig] { .determine( config.inputPath, Set(".py"), - ignoredFilesRegex = Some(config.ignoredFilesRegex), - ignoredFilesPath = Some(config.ignoredFiles) + ignoredFilesRegex = Option(config.ignoredFilesRegex), + ignoredFilesPath = Option(config.ignoredFiles) ) .map(x => Path.of(x)) .filter { file => filterIgnoreDirNames(file, inputPath, ignoreDirNamesSet) } diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/AstCreationPass.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/AstCreationPass.scala index 4797ff54137f..c32ac407691e 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/AstCreationPass.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/AstCreationPass.scala @@ -32,8 +32,8 @@ class AstCreationPass( .determine( config.inputPath, RubySourceFileExtensions, - ignoredFilesRegex = Some(config.ignoredFilesRegex), - ignoredFilesPath = Some(config.ignoredFiles) + ignoredFilesRegex = Option(config.ignoredFilesRegex), + ignoredFilesPath = Option(config.ignoredFiles) ) .toArray diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/AstCreationPass.scala index b5f5076aa3b6..0947528fb856 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/AstCreationPass.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/AstCreationPass.scala @@ -20,8 +20,8 @@ class AstCreationPass(cpg: Cpg, parser: ResourceManagedParser, config: Config) .determine( config.inputPath, RubySourceFileExtensions, - ignoredFilesRegex = Some(config.ignoredFilesRegex), - ignoredFilesPath = Some(config.ignoredFiles) + ignoredFilesRegex = Option(config.ignoredFilesRegex), + ignoredFilesPath = Option(config.ignoredFiles) ) .toArray } diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/SourceFiles.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/SourceFiles.scala index 935bc0fdfa42..233fcb9d762c 100644 --- a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/SourceFiles.scala +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/SourceFiles.scala @@ -46,30 +46,31 @@ object SourceFiles { } } + /** Method to filter file based on the passed parameters + * @param file + * @param inputPath + * @param ignoredDefaultRegex + * @param ignoredFilesRegex + * @param ignoredFilesPath + * @return + */ + def filterFile( + file: String, + inputPath: String, + ignoredDefaultRegex: Option[Seq[Regex]] = None, + ignoredFilesRegex: Option[Regex] = None, + ignoredFilesPath: Option[Seq[String]] = None + ): Boolean = !ignoredDefaultRegex.exists(isIgnoredByDefaultRegex(file, inputPath, _)) + && !ignoredFilesRegex.exists(isIgnoredByRegex(file, inputPath, _)) + && !ignoredFilesPath.exists(isIgnoredByFileList(file, _)) + private def filterFiles( files: List[String], inputPath: String, ignoredDefaultRegex: Option[Seq[Regex]] = None, ignoredFilesRegex: Option[Regex] = None, ignoredFilesPath: Option[Seq[String]] = None - ): List[String] = files.filter { - case filePath - if ignoredDefaultRegex.isDefined && ignoredDefaultRegex.get.nonEmpty && isIgnoredByDefaultRegex( - filePath, - inputPath, - ignoredDefaultRegex.get - ) => - false - case filePath if ignoredFilesRegex.isDefined && isIgnoredByRegex(filePath, inputPath, ignoredFilesRegex.get) => - false - case filePath - if ignoredFilesPath.isDefined && ignoredFilesPath.get.nonEmpty && isIgnoredByFileList( - filePath, - ignoredFilesPath.get - ) => - false - case _ => true - } + ): List[String] = files.filter(filterFile(_, inputPath, ignoredDefaultRegex, ignoredFilesRegex, ignoredFilesPath)) /** For given input paths, determine all source files by inspecting filename extensions and filter the result if * following arguments ignoredDefaultRegex, ignoredFilesRegex and ignoredFilesPath are used