From ff5e780ca3efe2e6909c8cfd5eea00243c98900f Mon Sep 17 00:00:00 2001
From: KhemrajSingh Rathore <khemraj.rathore@privado.ai>
Date: Mon, 13 Nov 2023 22:31:01 +0530
Subject: [PATCH] [kotlin2cpg] - Apply `X2CpgConfig` File Filtering in Kotlin
 (#3822)

There was a bug, in Kotlin frontend where we are not applying file filtering to remove the excluded files. This PR takes care of that.

This PR is an extension of https://github.com/joernio/joern/pull/3813
---
 .../joern/c2cpg/passes/AstCreationPass.scala  |  6 +--
 .../joern/c2cpg/passes/PreprocessorPass.scala |  6 +--
 .../joern/gosrc2cpg/utils/AstGenRunner.scala  |  4 +-
 .../jpastprinter/JavaParserAstPrinter.scala   |  6 +--
 .../javasrc2cpg/passes/AstCreationPass.scala  |  6 +--
 .../io/joern/kotlin2cpg/Kotlin2Cpg.scala      | 24 ++++++------
 .../php2cpg/passes/AstCreationPass.scala      |  4 +-
 .../joern/pysrc2cpg/Py2CpgOnFileSystem.scala  |  4 +-
 .../deprecated/passes/AstCreationPass.scala   |  4 +-
 .../rubysrc2cpg/passes/AstCreationPass.scala  |  4 +-
 .../scala/io/joern/x2cpg/SourceFiles.scala    | 37 ++++++++++---------
 11 files changed, 54 insertions(+), 51 deletions(-)

diff --git a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/AstCreationPass.scala
index e949a6239433..9d45763d7e9a 100644
--- a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/AstCreationPass.scala
+++ b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/AstCreationPass.scala
@@ -24,9 +24,9 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report())
     .determine(
       config.inputPath,
       FileDefaults.SOURCE_FILE_EXTENSIONS ++ FileDefaults.HEADER_FILE_EXTENSIONS,
-      ignoredDefaultRegex = Some(DefaultIgnoredFolders),
-      ignoredFilesRegex = Some(config.ignoredFilesRegex),
-      ignoredFilesPath = Some(config.ignoredFiles)
+      ignoredDefaultRegex = Option(DefaultIgnoredFolders),
+      ignoredFilesRegex = Option(config.ignoredFilesRegex),
+      ignoredFilesPath = Option(config.ignoredFiles)
     )
     .toArray
 
diff --git a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/PreprocessorPass.scala b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/PreprocessorPass.scala
index a396729c57ff..3a884d7a9257 100644
--- a/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/PreprocessorPass.scala
+++ b/joern-cli/frontends/c2cpg/src/main/scala/io/joern/c2cpg/passes/PreprocessorPass.scala
@@ -23,9 +23,9 @@ class PreprocessorPass(config: Config) {
       .determine(
         config.inputPath,
         FileDefaults.SOURCE_FILE_EXTENSIONS,
-        ignoredDefaultRegex = Some(DefaultIgnoredFolders),
-        ignoredFilesRegex = Some(config.ignoredFilesRegex),
-        ignoredFilesPath = Some(config.ignoredFiles)
+        ignoredDefaultRegex = Option(DefaultIgnoredFolders),
+        ignoredFilesRegex = Option(config.ignoredFilesRegex),
+        ignoredFilesPath = Option(config.ignoredFiles)
       )
       .par
       .flatMap(runOnPart)
diff --git a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/utils/AstGenRunner.scala b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/utils/AstGenRunner.scala
index f76bc4d3d54a..d73e328742c9 100644
--- a/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/utils/AstGenRunner.scala
+++ b/joern-cli/frontends/gosrc2cpg/src/main/scala/io/joern/gosrc2cpg/utils/AstGenRunner.scala
@@ -147,8 +147,8 @@ class AstGenRunner(config: Config) {
         val srcFiles = SourceFiles.determine(
           out.toString(),
           Set(".json"),
-          ignoredFilesRegex = Some(config.ignoredFilesRegex),
-          ignoredFilesPath = Some(config.ignoredFiles)
+          ignoredFilesRegex = Option(config.ignoredFilesRegex),
+          ignoredFilesPath = Option(config.ignoredFiles)
         )
         val parsedModFile = filterModFile(srcFiles, out)
         val parsed        = filterFiles(srcFiles, out)
diff --git a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/jpastprinter/JavaParserAstPrinter.scala b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/jpastprinter/JavaParserAstPrinter.scala
index 6a1444eb798f..91a0e62a9d90 100644
--- a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/jpastprinter/JavaParserAstPrinter.scala
+++ b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/jpastprinter/JavaParserAstPrinter.scala
@@ -19,9 +19,9 @@ object JavaParserAstPrinter {
       .determine(
         config.inputPath,
         JavaSrc2Cpg.sourceFileExtensions,
-        ignoredDefaultRegex = Some(JavaSrc2Cpg.DefaultIgnoredFilesRegex),
-        ignoredFilesRegex = Some(config.ignoredFilesRegex),
-        ignoredFilesPath = Some(config.ignoredFiles)
+        ignoredDefaultRegex = Option(JavaSrc2Cpg.DefaultIgnoredFilesRegex),
+        ignoredFilesRegex = Option(config.ignoredFilesRegex),
+        ignoredFilesPath = Option(config.ignoredFiles)
       )
       .foreach { filename =>
         val relativeFilename = Path.of(config.inputPath).relativize(Path.of(filename)).toString
diff --git a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/passes/AstCreationPass.scala
index 15b1d0ec4225..d354fd32300b 100644
--- a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/passes/AstCreationPass.scala
+++ b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/passes/AstCreationPass.scala
@@ -45,9 +45,9 @@ class AstCreationPass(config: Config, cpg: Cpg, sourcesOverride: Option[List[Str
       SourceFiles.determine(
         config.inputPath,
         JavaSrc2Cpg.sourceFileExtensions,
-        ignoredDefaultRegex = Some(JavaSrc2Cpg.DefaultIgnoredFilesRegex),
-        ignoredFilesRegex = Some(config.ignoredFilesRegex),
-        ignoredFilesPath = Some(config.ignoredFiles)
+        ignoredDefaultRegex = Option(JavaSrc2Cpg.DefaultIgnoredFilesRegex),
+        ignoredFilesRegex = Option(config.ignoredFilesRegex),
+        ignoredFilesPath = Option(config.ignoredFiles)
       )
     )
     .toArray
diff --git a/joern-cli/frontends/kotlin2cpg/src/main/scala/io/joern/kotlin2cpg/Kotlin2Cpg.scala b/joern-cli/frontends/kotlin2cpg/src/main/scala/io/joern/kotlin2cpg/Kotlin2Cpg.scala
index 391b5900f9f1..3843083cdeb0 100644
--- a/joern-cli/frontends/kotlin2cpg/src/main/scala/io/joern/kotlin2cpg/Kotlin2Cpg.scala
+++ b/joern-cli/frontends/kotlin2cpg/src/main/scala/io/joern/kotlin2cpg/Kotlin2Cpg.scala
@@ -25,6 +25,7 @@ import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass, XTypeRecovery
 import io.joern.x2cpg.utils.dependency.{DependencyResolver, DependencyResolverParams, GradleConfigKeys}
 import io.joern.kotlin2cpg.interop.JavasrcInterop
 import io.joern.kotlin2cpg.jar4import.UsesService
+import io.joern.x2cpg.SourceFiles.filterFile
 import io.shiftleft.codepropertygraph.Cpg
 import io.shiftleft.codepropertygraph.generated.Languages
 import io.shiftleft.semanticcpg.language.*
@@ -77,8 +78,8 @@ class Kotlin2Cpg extends X2CpgFrontend[Config] with UsesService {
       val filesWithKtExtension = SourceFiles.determine(
         sourceDir,
         Set(".kt"),
-        ignoredFilesRegex = Some(config.ignoredFilesRegex),
-        ignoredFilesPath = Some(config.ignoredFiles)
+        ignoredFilesRegex = Option(config.ignoredFilesRegex),
+        ignoredFilesPath = Option(config.ignoredFiles)
       )
       if (filesWithKtExtension.isEmpty) {
         println(s"The provided input directory does not contain files ending in '.kt' `$sourceDir`. Exiting.")
@@ -89,8 +90,8 @@ class Kotlin2Cpg extends X2CpgFrontend[Config] with UsesService {
       val filesWithJavaExtension = SourceFiles.determine(
         sourceDir,
         Set(".java"),
-        ignoredFilesRegex = Some(config.ignoredFilesRegex),
-        ignoredFilesPath = Some(config.ignoredFiles)
+        ignoredFilesRegex = Option(config.ignoredFilesRegex),
+        ignoredFilesPath = Option(config.ignoredFiles)
       )
       if (filesWithJavaExtension.nonEmpty) {
         logger.info(s"Found ${filesWithJavaExtension.size} files with the `.java` extension.")
@@ -142,13 +143,14 @@ class Kotlin2Cpg extends X2CpgFrontend[Config] with UsesService {
         )
 
       val sourceEntries = entriesForSources(environment.getSourceFiles.asScala, sourceDir)
-      val sources = sourceEntries.filterNot { entry =>
-        config.ignoredFiles.exists { pathToIgnore =>
-          val parent = Paths.get(pathToIgnore).toAbsolutePath
-          val child  = Paths.get(entry.filename)
-          child.startsWith(parent)
-        }
-      }
+      val sources = sourceEntries.filter(entry =>
+        SourceFiles.filterFile(
+          entry.filename,
+          config.inputPath,
+          ignoredFilesRegex = Option(config.ignoredFilesRegex),
+          ignoredFilesPath = Option(config.ignoredFiles)
+        )
+      )
       val configFiles      = entriesForConfigFiles(SourceFilesPicker.configFiles(sourceDir), sourceDir)
       val typeInfoProvider = new DefaultTypeInfoProvider(environment)
 
diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala
index 82c75d573d52..b27d4fddab81 100644
--- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala
+++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala
@@ -23,8 +23,8 @@ class AstCreationPass(config: Config, cpg: Cpg, parser: PhpParser)(implicit with
     .determine(
       config.inputPath,
       PhpSourceFileExtensions,
-      ignoredFilesRegex = Some(config.ignoredFilesRegex),
-      ignoredFilesPath = Some(config.ignoredFiles)
+      ignoredFilesRegex = Option(config.ignoredFilesRegex),
+      ignoredFilesPath = Option(config.ignoredFiles)
     )
     .toArray
 
diff --git a/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/Py2CpgOnFileSystem.scala b/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/Py2CpgOnFileSystem.scala
index f379bcfa082b..e251c38c3657 100644
--- a/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/Py2CpgOnFileSystem.scala
+++ b/joern-cli/frontends/pysrc2cpg/src/main/scala/io/joern/pysrc2cpg/Py2CpgOnFileSystem.scala
@@ -66,8 +66,8 @@ class Py2CpgOnFileSystem extends X2CpgFrontend[Py2CpgOnFileSystemConfig] {
         .determine(
           config.inputPath,
           Set(".py"),
-          ignoredFilesRegex = Some(config.ignoredFilesRegex),
-          ignoredFilesPath = Some(config.ignoredFiles)
+          ignoredFilesRegex = Option(config.ignoredFilesRegex),
+          ignoredFilesPath = Option(config.ignoredFiles)
         )
         .map(x => Path.of(x))
         .filter { file => filterIgnoreDirNames(file, inputPath, ignoreDirNamesSet) }
diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/AstCreationPass.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/AstCreationPass.scala
index 4797ff54137f..c32ac407691e 100644
--- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/AstCreationPass.scala
+++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/deprecated/passes/AstCreationPass.scala
@@ -32,8 +32,8 @@ class AstCreationPass(
       .determine(
         config.inputPath,
         RubySourceFileExtensions,
-        ignoredFilesRegex = Some(config.ignoredFilesRegex),
-        ignoredFilesPath = Some(config.ignoredFiles)
+        ignoredFilesRegex = Option(config.ignoredFilesRegex),
+        ignoredFilesPath = Option(config.ignoredFiles)
       )
       .toArray
 
diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/AstCreationPass.scala
index b5f5076aa3b6..0947528fb856 100644
--- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/AstCreationPass.scala
+++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/AstCreationPass.scala
@@ -20,8 +20,8 @@ class AstCreationPass(cpg: Cpg, parser: ResourceManagedParser, config: Config)
       .determine(
         config.inputPath,
         RubySourceFileExtensions,
-        ignoredFilesRegex = Some(config.ignoredFilesRegex),
-        ignoredFilesPath = Some(config.ignoredFiles)
+        ignoredFilesRegex = Option(config.ignoredFilesRegex),
+        ignoredFilesPath = Option(config.ignoredFiles)
       )
       .toArray
   }
diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/SourceFiles.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/SourceFiles.scala
index 935bc0fdfa42..233fcb9d762c 100644
--- a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/SourceFiles.scala
+++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/SourceFiles.scala
@@ -46,30 +46,31 @@ object SourceFiles {
     }
   }
 
+  /** Method to filter file based on the passed parameters
+    * @param file
+    * @param inputPath
+    * @param ignoredDefaultRegex
+    * @param ignoredFilesRegex
+    * @param ignoredFilesPath
+    * @return
+    */
+  def filterFile(
+    file: String,
+    inputPath: String,
+    ignoredDefaultRegex: Option[Seq[Regex]] = None,
+    ignoredFilesRegex: Option[Regex] = None,
+    ignoredFilesPath: Option[Seq[String]] = None
+  ): Boolean = !ignoredDefaultRegex.exists(isIgnoredByDefaultRegex(file, inputPath, _))
+    && !ignoredFilesRegex.exists(isIgnoredByRegex(file, inputPath, _))
+    && !ignoredFilesPath.exists(isIgnoredByFileList(file, _))
+
   private def filterFiles(
     files: List[String],
     inputPath: String,
     ignoredDefaultRegex: Option[Seq[Regex]] = None,
     ignoredFilesRegex: Option[Regex] = None,
     ignoredFilesPath: Option[Seq[String]] = None
-  ): List[String] = files.filter {
-    case filePath
-        if ignoredDefaultRegex.isDefined && ignoredDefaultRegex.get.nonEmpty && isIgnoredByDefaultRegex(
-          filePath,
-          inputPath,
-          ignoredDefaultRegex.get
-        ) =>
-      false
-    case filePath if ignoredFilesRegex.isDefined && isIgnoredByRegex(filePath, inputPath, ignoredFilesRegex.get) =>
-      false
-    case filePath
-        if ignoredFilesPath.isDefined && ignoredFilesPath.get.nonEmpty && isIgnoredByFileList(
-          filePath,
-          ignoredFilesPath.get
-        ) =>
-      false
-    case _ => true
-  }
+  ): List[String] = files.filter(filterFile(_, inputPath, ignoredDefaultRegex, ignoredFilesRegex, ignoredFilesPath))
 
   /** For given input paths, determine all source files by inspecting filename extensions and filter the result if
     * following arguments ignoredDefaultRegex, ignoredFilesRegex and ignoredFilesPath are used