Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ruby] Builtin type-stubs for Ruby #4501

Merged
merged 18 commits into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
19d3188
[ruby] download of type-stubs and putting in resources dir working
AndreiDreyer Apr 25, 2024
5663527
[ruby] working on getting resources fro stage
AndreiDreyer Apr 25, 2024
4cc3d92
[ruby] type-stubs checksums are working as expected from build step. …
AndreiDreyer Apr 25, 2024
929d781
Merge branch 'master' into andrei/ruby/pull-type-stubs
AndreiDreyer Apr 25, 2024
71baac8
[ruby] Reading directly from zip working
AndreiDreyer Apr 25, 2024
1ce29a8
[ruby] Reading of builtin type-stubs working
AndreiDreyer Apr 26, 2024
a273d89
[ruby] Renamed functions for builtin types. Added builtin types to in…
AndreiDreyer Apr 26, 2024
386e242
[ruby] working on type resolving via the namespace type map
AndreiDreyer Apr 29, 2024
5e82aed
Merge branch 'master' into andrei/ruby/pull-type-stubs
AndreiDreyer Apr 29, 2024
e84a846
[ruby] finished tests on typesmap for builtin_types
AndreiDreyer Apr 29, 2024
ca5ea6e
Merge branch 'master' into andrei/ruby/pull-type-stubs
AndreiDreyer Apr 29, 2024
496cd15
[ruby] Adding some debugging
AndreiDreyer Apr 30, 2024
246666a
[workflows] disabling cache for testing
AndreiDreyer Apr 30, 2024
1f36092
[workflows] re-enabled cache
AndreiDreyer Apr 30, 2024
d03e664
[ruby] added loggers for debugging
AndreiDreyer Apr 30, 2024
1f8da02
Aligned type stubs closer to AstGen Strategy
DavidBakerEffendi Apr 30, 2024
50d6c75
[ruby] remove debug logs from build
AndreiDreyer Apr 30, 2024
2f68991
Merge branch 'master' into andrei/ruby/pull-type-stubs
AndreiDreyer Apr 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ null
**/dotnetastgen-win-arm.exe
**/dotnetastgen-macos
**/dotnetastgen-macos-arm
**/*_builtin_types.zip
**/*_builtin_types.zip.sha512
slices.json
**/.antlr
/joern-cli/frontends/csharpsrc2cpg/bin
Expand Down Expand Up @@ -74,3 +76,5 @@ flake.lock
.bloop
**/.DS_Store
**/.bsp


Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@ import java.io.{ByteArrayInputStream, InputStream}
import scala.annotation.targetName
import scala.collection.mutable.ListBuffer
import scala.io.Source
import scala.jdk.CollectionConverters.*
import scala.util.{Failure, Success, Try}
import java.net.JarURLConnection
import scala.util.Using

import scala.jdk.CollectionConverters.*

type NamespaceToTypeMap = Map[String, Set[CSharpType]]

/** A mapping of type stubs of known types within the scope of the analysis.
Expand Down Expand Up @@ -75,6 +76,8 @@ object CSharpProgramSummary {
/*
Doing this because java actually cannot read directories from the classPath.
We're assuming there's no further nesting in the builtin_types directory structure.
TODO: Once MessagePack types and compression is implemented for CSharp, the `resourcePaths` building can
be moved into `ProgramSummary` since all subclasses of it will need to do this to find builtin types
*/
val resourcePaths: List[String] = Option(getClass.getClassLoader.getResource(builtinDirectory)) match {
case Some(url) if url.getProtocol == "jar" =>
Expand Down
1 change: 1 addition & 0 deletions joern-cli/frontends/rubysrc2cpg/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Created by IntelliJ's ANTLR plugin
gen/
*.tokens
type_stubs
52 changes: 52 additions & 0 deletions joern-cli/frontends/rubysrc2cpg/build.sbt
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
import com.typesafe.config.{Config, ConfigFactory}

name := "rubysrc2cpg"

dependsOn(Projects.dataflowengineoss % "compile->compile;test->test", Projects.x2cpg % "compile->compile;test->test")

lazy val appProperties = settingKey[Config]("App Properties")
appProperties := {
val path = (Compile / resourceDirectory).value / "application.conf"
val applicationConf = ConfigFactory.parseFile(path).resolve()
applicationConf
}

lazy val joernTypeStubsVersion = settingKey[String]("joern_type_stub version")
joernTypeStubsVersion := appProperties.value.getString("rubysrc2cpg.joern_type_stubs_version")

libraryDependencies ++= Seq(
"io.shiftleft" %% "codepropertygraph" % Versions.cpg,
"org.apache.commons" % "commons-compress" % Versions.commonsCompress, // For unpacking Gems with `--download-dependencies`
Expand All @@ -14,3 +26,43 @@ enablePlugins(JavaAppPackaging, LauncherJarPlugin, Antlr4Plugin)
Antlr4 / antlr4Version := Versions.antlr
Antlr4 / antlr4GenVisitor := true
Antlr4 / javaSource := (Compile / sourceManaged).value

lazy val joernTypeStubsDlUrl = settingKey[String]("joern_type_stubs download url")
joernTypeStubsDlUrl := s"https://github.com/joernio/joern-type-stubs/releases/download/v${joernTypeStubsVersion.value}/"

lazy val joernTypeStubsDlTask = taskKey[Unit]("Download joern-type-stubs")
joernTypeStubsDlTask := {
val joernTypeStubsDir = baseDirectory.value / "type_stubs"
val fileName = "rubysrc_builtin_types.zip"
val shaFileName = s"$fileName.sha512"

joernTypeStubsDir.mkdir()

DownloadHelper.ensureIsAvailable(s"${joernTypeStubsDlUrl.value}$fileName", joernTypeStubsDir / fileName)
DownloadHelper.ensureIsAvailable(s"${joernTypeStubsDlUrl.value}$shaFileName", joernTypeStubsDir / shaFileName)

val typeStubsFile = better.files.File(joernTypeStubsDir.getAbsolutePath) / fileName
val checksumFile = better.files.File(joernTypeStubsDir.getAbsolutePath) / shaFileName

val typestubsSha = typeStubsFile.sha512

// Checksum file must contain exactly 1 line, if more or less we automatically fail.
if (checksumFile.lineIterator.size != 1) {
throw new IllegalStateException("Checksum File should only have 1 line")
}

// Checksum from terminal adds the filename to the line, so we split on whitespace to get the checksum
// separate from the filename
if (checksumFile.lineIterator.next().split("\\s+")(0).toUpperCase != typestubsSha) {
throw new Exception("Checksums do not match for type stubs!")
}

val distDir = (Universal / stagingDirectory).value / "type_stubs"
distDir.mkdirs()
IO.copyDirectory(joernTypeStubsDir, distDir)
}

Compile / compile := ((Compile / compile) dependsOn joernTypeStubsDlTask).value

Universal / packageName := name.value
Universal / topLevelDirectory := None
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
rubysrc2cpg {
joern_type_stubs_version: "0.6.0"
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,19 @@ package io.joern.rubysrc2cpg

import io.joern.rubysrc2cpg.Frontend.*
import io.joern.x2cpg.passes.frontend.{TypeRecoveryParserConfig, XTypeRecovery}
import io.joern.x2cpg.typestub.TypeStubConfig
import io.joern.x2cpg.{DependencyDownloadConfig, X2CpgConfig, X2CpgMain}
import scopt.OParser

final case class Config(
antlrCacheMemLimit: Double = 0.6d,
useDeprecatedFrontend: Boolean = false,
downloadDependencies: Boolean = false
downloadDependencies: Boolean = false,
useTypeStubs: Boolean = true
) extends X2CpgConfig[Config]
with DependencyDownloadConfig[Config]
with TypeRecoveryParserConfig[Config] {
with TypeRecoveryParserConfig[Config]
with TypeStubConfig[Config] {

this.defaultIgnoredFilesRegex = List("spec", "test").flatMap { directory =>
List(s"(^|\\\\)$directory($$|\\\\)".r.unanchored, s"(^|/)$directory($$|/)".r.unanchored)
Expand All @@ -28,6 +31,10 @@ final case class Config(
override def withDownloadDependencies(value: Boolean): Config = {
copy(downloadDependencies = value).withInheritedFields(this)
}

override def withTypeStubs(value: Boolean): Config = {
copy(useTypeStubs = value).withInheritedFields(this)
}
}

private object Frontend {
Expand Down Expand Up @@ -55,7 +62,8 @@ private object Frontend {
.action((_, c) => c.withUseDeprecatedFrontend(true))
.text("uses the original (but deprecated) Ruby frontend (default false)"),
DependencyDownloadConfig.parserOptions,
XTypeRecovery.parserOptions
XTypeRecovery.parserOptions,
TypeStubConfig.parserOptions
)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import io.joern.x2cpg.X2Cpg.withNewEmptyCpg
import io.joern.x2cpg.passes.base.AstLinkerPass
import io.joern.x2cpg.passes.callgraph.NaiveCallLinker
import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass}
import io.joern.x2cpg.typestub.TypeStubMetaData
import io.joern.x2cpg.utils.{ConcurrentTaskUtil, ExternalCommand}
import io.joern.x2cpg.{SourceFiles, X2CpgFrontend}
import io.shiftleft.codepropertygraph.Cpg
Expand Down Expand Up @@ -62,8 +63,7 @@ class RubySrc2Cpg extends X2CpgFrontend[Config] {
case Failure(exception) => logger.warn(s"Unable to pre-parse Ruby file, skipping - ", exception); None
case Success(summary) => Option(summary)
}
.reduceOption((a, b) => a ++ b)
.getOrElse(RubyProgramSummary())
.foldLeft(RubyProgramSummary(RubyProgramSummary.BuiltinTypes(config.typeStubMetaData)))(_ ++ _)

val programSummary = if (config.downloadDependencies) {
DependencyDownloader(cpg, internalProgramSummary).download()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,27 @@
package io.joern.rubysrc2cpg.datastructures

import better.files.File
import io.joern.x2cpg.Defines as XDefines
import io.joern.x2cpg.datastructures.{FieldLike, MethodLike, ProgramSummary, TypeLike}
import io.joern.x2cpg.typestub.{TypeStubMetaData, TypeStubUtil}
import org.slf4j.LoggerFactory

import java.io.{ByteArrayInputStream, InputStream}
import scala.annotation.targetName
import scala.io.Source
import java.net.JarURLConnection
import java.util.zip.ZipInputStream
import scala.util.{Failure, Success, Try, Using}
import scala.jdk.CollectionConverters.*
import upickle.default.*

import scala.collection.mutable.ListBuffer

type NamespaceToTypeMap = Map[String, Set[RubyType]]

class RubyProgramSummary(
initialNamespaceMap: Map[String, Set[RubyType]] = Map.empty,
initialPathMap: Map[String, Set[RubyType]] = Map.empty
initialNamespaceMap: NamespaceToTypeMap = Map.empty,
initialPathMap: NamespaceToTypeMap = Map.empty
) extends ProgramSummary[RubyType] {

override val namespaceToType: Map[String, Set[RubyType]] = initialNamespaceMap
Expand All @@ -21,7 +34,69 @@ class RubyProgramSummary(
ProgramSummary.combine(this.pathToType, other.pathToType)
)
}
}

object RubyProgramSummary {
private val logger = LoggerFactory.getLogger(getClass)

def BuiltinTypes(implicit typeStubMetaData: TypeStubMetaData): NamespaceToTypeMap = {
if (typeStubMetaData.useTypeStubs) {
mpkZipToInitialMapping(mergeBuiltinMpkZip) match {
case Failure(exception) => logger.warn("Unable to parse builtin types", exception); Map.empty
case Success(mapping) => mapping
}
} else {
Map.empty
}
}

private def mpkZipToInitialMapping(inputStream: InputStream): Try[NamespaceToTypeMap] = {
Try(readBinary[NamespaceToTypeMap](inputStream.readAllBytes()))
}

private def mergeBuiltinMpkZip(implicit typeStubMetaData: TypeStubMetaData): InputStream = {
val classLoader = getClass.getClassLoader
val typeStubDir = TypeStubUtil.typeStubDir

val typeStubFiles: Seq[File] =
typeStubDir
.walk()
.filter(f => f.isRegularFile && f.name.startsWith("rubysrc") && f.`extension`.contains(".zip"))
.toSeq

if (typeStubFiles.isEmpty) {
logger.warn("No ZIP files found.")
InputStream.nullInputStream()
} else {
val mergedMpksObj = ListBuffer[collection.mutable.Map[String, Set[RubyType]]]()
typeStubFiles.foreach { f =>
f.fileInputStream { fis =>
val zis = new ZipInputStream(fis)

LazyList.continually(zis.getNextEntry).takeWhile(_ != null).foreach { file =>
val mpkObj = upickle.default.readBinary[collection.mutable.Map[String, Set[RubyType]]](zis.readAllBytes())
mergedMpksObj.addOne(mpkObj)
}
}
}

val mergedMpks = mergedMpksObj
.reduceOption((prev, curr) => {
curr.keys.foreach(key => {
prev.updateWith(key) {
case Some(x) =>
Option(x ++ curr(key))
case None =>
Option(curr(key))
}
})
prev
})
.getOrElse(collection.mutable.Map[String, Set[RubyType]]())

new ByteArrayInputStream(upickle.default.writeBinary(mergedMpks))
}
}
}

case class RubyMethod(
Expand Down Expand Up @@ -74,13 +149,14 @@ object RubyType {
name = json("name").str,
methods = json.obj.get("methods") match {
case Some(jsonMethods) =>
val methodsMap = read[collection.mutable.Map[String, RubyMethod]](jsonMethods)
methodsMap.map { case (name, func) =>
val splitName = name.split("\\.")
val methodsList = read[List[RubyMethod]](jsonMethods)

methodsList.map { func =>
val splitName = func.name.split("\\.")
val baseTypeFullName = splitName.dropRight(1).mkString(".")

func.copy(name = name, baseTypeFullName = Option(baseTypeFullName))
}.toList
func.copy(name = func.name, baseTypeFullName = Option(baseTypeFullName))
}
case None => Nil
},
fields = json.obj.get("fields").map(read[List[RubyField]](_)).getOrElse(Nil)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,15 +100,23 @@ class RubyScope(summary: RubyProgramSummary, projectRoot: Option[String])
Option(path)
}

relativizedPath.iterator.flatMap(summary.pathToType.getOrElse(_, Set())).foreach { ty =>
addImportedTypeOrModule(ty.name)
relativizedPath.iterator.flatMap(summary.pathToType.getOrElse(_, Set())) match {
case x if x.nonEmpty =>
x.foreach { ty => addImportedTypeOrModule(ty.name) }
case _ =>
addRequireGem(path)
}
}

def addInclude(typeOrModule: String): Unit = {
addImportedMember(typeOrModule)
}

def addRequireGem(gemName: String): Unit = {
val matchingTypes = summary.namespaceToType.values.flatten.filter(_.name.startsWith(gemName))
typesInScope.addAll(matchingTypes)
}

/** @return
* the full name of the surrounding scope.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,23 @@ class ImportTests extends RubyCode2CpgFixture with Inspectors {

}

"Builtin Types type-map" should {
val cpg = code("""
|require 'csv'
|require 'pp'
|CSV.parse("")
|CSV::Table.new()
|PP.pp(obj)
|""".stripMargin)

"resolve calls to builtin functions" in {
inside(cpg.call.methodFullName("(pp|csv).*").l) {
case csvParseCall :: csvTableInitCall :: ppCall :: Nil =>
csvParseCall.methodFullName shouldBe "csv.CSV:parse"
ppCall.methodFullName shouldBe "pp.PP:pp"
csvTableInitCall.methodFullName shouldBe "csv.CSV.Table:<init>"
case xs => fail(s"Expected three calls, got [${xs.code.mkString(",")}] instead")
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package io.joern.x2cpg.typestub

import io.joern.x2cpg.X2CpgConfig
import scopt.OParser

import java.net.URL

/** Extends the config to download type stubs to help resolve type full names.
*/
trait TypeStubConfig[R <: X2CpgConfig[R]] { this: R =>

/** Whether to use type stubs to help resolve type information or not. Using type stubs may increase memory
* consumption.
*/
def useTypeStubs: Boolean

/** The entrypoint to load the type stubs into the config.
*/
def withTypeStubs(value: Boolean): R

/** Creates a meta-data class of information about the type stub management.
*/
def typeStubMetaData: TypeStubMetaData =
TypeStubMetaData(useTypeStubs, getClass.getProtectionDomain.getCodeSource.getLocation)

}

/** The meta data around managing type stub resources for this frontend.
* @param useTypeStubs
* a flag to indicate whether types stubs should be used.
* @param packagePath
* the code path for the frontend.
*/
case class TypeStubMetaData(useTypeStubs: Boolean, packagePath: URL)

object TypeStubConfig {

def parserOptions[R <: X2CpgConfig[R] & TypeStubConfig[R]]: OParser[?, R] = {
val builder = OParser.builder[R]
import builder.*
OParser.sequence(
opt[Unit]("disable-type-stubs")
.text(
"Disables the use type stubs for type information recovery. Using type stubs may increase memory consumption."
)
.action((x, c) => c.withTypeStubs(false))
)
}

}
Loading
Loading