Skip to content

Commit

Permalink
Scala semantic slicer (#182)
Browse files Browse the repository at this point in the history
* Scala semantic slicer

Signed-off-by: Prabhu Subramanian <[email protected]>

Tweaks

Signed-off-by: Prabhu Subramanian <[email protected]>

* Auto detect scala 3 version during cross builds

Signed-off-by: Prabhu Subramanian <[email protected]>

* Improved .tasty parsing

Signed-off-by: Prabhu Subramanian <[email protected]>

---------

Signed-off-by: Prabhu Subramanian <[email protected]>
  • Loading branch information
prabhu authored Mar 9, 2025
1 parent cbf0caa commit d34eef5
Show file tree
Hide file tree
Showing 10 changed files with 749 additions and 191 deletions.
4 changes: 0 additions & 4 deletions .github/workflows/nodejstests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,6 @@ jobs:
atom -l python --remove-atom -o $env:GITHUB_WORKSPACE\\repotests\\DjanGoat\\app.atom $env:GITHUB_WORKSPACE\\repotests\\DjanGoat
atom parsedeps --remove-atom -l python -o $env:GITHUB_WORKSPACE\\repotests\\DjanGoat\\app.atom $env:GITHUB_WORKSPACE\\repotests\\DjanGoat
atom -l c -o $env:GITHUB_WORKSPACE\\repotests\\libexpat\\app.atom $env:GITHUB_WORKSPACE\\repotests\\libexpat
- name: test2
shell: cmd
run: |
atom -l js --remove-atom -o %GITHUB_WORKSPACE%\\repotests\\juice-shop\\app.atom %GITHUB_WORKSPACE%\\repotests\\juice-shop
- name: test3
shell: cmd
run: |
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ atom -o app.atom -l java --export-atom --export-dir <export dir> --with-data-dep
- Python
- PHP (Requires PHP >= 7.0. Supports PHP 5.2 to 8.3)
- Ruby (Requires Ruby 3.4.2. Supports Ruby 1.8 - 3.3 syntax)
- Scala (WIP)

## Atom Specification

Expand Down
6 changes: 3 additions & 3 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
name := "atom"
ThisBuild / organization := "io.appthreat"
ThisBuild / version := "2.1.13"
ThisBuild / version := "2.1.14"
ThisBuild / scalaVersion := "3.6.2"

val chenVersion = "2.3.3"
val chenVersion = "2.3.4"

lazy val atom = Projects.atom

libraryDependencies ++= Seq(
"com.github.pathikrit" %% "better-files" % "3.9.2",
"com.github.scopt" %% "scopt" % "4.1.0",
"org.slf4j" % "slf4j-nop" % "2.0.16" % Optional,
"org.slf4j" % "slf4j-nop" % "2.0.17" % Optional,
("io.appthreat" %% "c2cpg" % Versions.chen).excludeAll(
ExclusionRule(organization = "com.ibm.icu", name = "icu4j"),
ExclusionRule(organization = "org.jline", name = "jline"),
Expand Down
6 changes: 5 additions & 1 deletion ci/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@ LABEL maintainer="appthreat" \
org.opencontainers.docker.cmd="docker run --rm -v /tmp:/tmp -v $HOME:$HOME -v $(pwd):/app:rw -it ghcr.io/appthreat/atom atom -o /app/app.atom -l java /app"

ARG MAVEN_VERSION=3.9.9
ARG SCALA_VERSION=3.6.4
ARG RUBY_VERSION=3.4.2

ENV MAVEN_VERSION=$MAVEN_VERSION \
MAVEN_HOME="/opt/maven/${MAVEN_VERSION}" \
SCALA_VERSION=$SCALA_VERSION \
SCALA_HOME="/opt/scala/${SCALA_VERSION}" \
ANDROID_HOME=/opt/android-sdk-linux \
JAVA_TOOL_OPTIONS="-Dfile.encoding=UTF-8" \
PHP_PARSER_BIN=/opt/vendor/bin/php-parse \
Expand All @@ -30,7 +33,7 @@ ENV MAVEN_VERSION=$MAVEN_VERSION \
RUBY_BUILD_BUILD_PATH="/tmp/rbenv" \
RUBY_BUILD_HTTP_CLIENT=curl

ENV PATH=/opt/bin:/opt/vendor/bin:${PATH}:${MAVEN_HOME}/bin:/usr/local/bin/:/root/.local/bin:${ANDROID_HOME}/cmdline-tools/latest/bin:${ANDROID_HOME}/tools:${ANDROID_HOME}/tools/bin:${ANDROID_HOME}/platform-tools:/root/.rbenv/bin:/root/.rbenv/versions/3.4.2/bin:
ENV PATH=/opt/bin:/opt/vendor/bin:${PATH}:${MAVEN_HOME}/bin:${SCALA_HOME}/bin:/usr/local/bin/:/root/.local/bin:${ANDROID_HOME}/cmdline-tools/latest/bin:${ANDROID_HOME}/tools:${ANDROID_HOME}/tools/bin:${ANDROID_HOME}/platform-tools:/root/.rbenv/bin:/root/.rbenv/versions/3.4.2/bin:

WORKDIR /opt

Expand Down Expand Up @@ -76,6 +79,7 @@ RUN set -e; \
&& source "/root/.sdkman/bin/sdkman-init.sh" \
&& echo -e "sdkman_auto_answer=true\nsdkman_selfupdate_feature=false\nsdkman_auto_env=true\nsdkman_curl_connect_timeout=20\nsdkman_curl_max_time=0" >> /root/.sdkman/etc/config \
&& sdk install maven $MAVEN_VERSION \
&& sdk install scala $SCALA_VERSION \
&& sdk offline enable \
&& mv /root/.sdkman/candidates/* /opt/ \
&& rm -rf /root/.sdkman \
Expand Down
2 changes: 1 addition & 1 deletion codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"downloadUrl": "https://github.com/AppThreat/atom",
"issueTracker": "https://github.com/AppThreat/atom/issues",
"name": "atom",
"version": "2.1.13",
"version": "2.1.14",
"description": "Atom is a novel intermediate representation for next-generation code analysis.",
"applicationCategory": "code-analysis",
"keywords": [
Expand Down
211 changes: 127 additions & 84 deletions src/main/scala/io/appthreat/atom/Atom.scala
Original file line number Diff line number Diff line change
Expand Up @@ -334,69 +334,78 @@ object Atom:
))
case _ =>
None

try
migrateAtomConfigToSliceConfig(config) match
case x: AtomConfig if config.exportAtom =>
println(s"Exporting the atom to the directory ${x.exportDir}")
config.exportFormat match
case "graphml" =>
ag.method.internal.filterNot(_.name.startsWith("<")).filterNot(
_.name.startsWith("lambda")
).gml(x.exportDir)
case _ =>
// Export all representations
ag.method.internal.filterNot(_.name.startsWith("<")).filterNot(
_.name.startsWith("lambda")
).dot(x.exportDir)
// Export individual representations
ag.method.internal.filterNot(_.name.startsWith("<")).filterNot(
_.name.startsWith("lambda")
).exportAllRepr(x.exportDir)
case _: DataFlowConfig =>
val dataFlowSlice = sliceCpg(ag).collect { case x: DataFlowSlice => x }
val atomDataFlowSliceJson =
dataFlowSlice.map(x =>
AtomDataFlowSlice(x, DataFlowGraph.buildFromSlice(x).paths).toJson
)
saveSlice(config.outputSliceFile, atomDataFlowSliceJson)
case u: UsagesConfig =>
saveSlice(config.outputSliceFile, sliceCpg(ag).map(_.toJson))
if u.extractEndpoints then
val openapiFileName =
sys.env.getOrElse("ATOM_TOOLS_OPENAPI_FILENAME", "openapi.generated.json")
val openapiFormat = sys.env.getOrElse("ATOM_TOOLS_OPENAPI_FORMAT", "openapi3.1.0")
val atomToolsWorkDir =
sys.env.getOrElse("ATOM_TOOLS_WORK_DIR", config.inputPath.parent.pathAsString)
println(s"atom-tools convert -i ${config.outputSliceFile} -t ${config.language} -f ${openapiFormat} -o ${config
.inputPath.pathAsString}${java.io.File.separator}${openapiFileName}")
val result = ExternalCommand.run(
s"atom-tools convert -i ${config.outputSliceFile} -t ${config.language} -f ${openapiFormat} -o ${config
.inputPath.pathAsString}${java.io.File.separator}${openapiFileName}",
atomToolsWorkDir
)
result match
case Success(_) =>
println(s"${openapiFileName} created successfully.")
case Failure(exception) =>
config.language match
case "SCALA" | "TASTY" | "SBT" => Right("")
case _ =>
try
migrateAtomConfigToSliceConfig(config) match
case x: AtomConfig if config.exportAtom =>
println(s"Exporting the atom to the directory ${x.exportDir}")
config.exportFormat match
case "graphml" =>
ag.method.internal.filterNot(_.name.startsWith("<")).filterNot(
_.name.startsWith("lambda")
).gml(x.exportDir)
case _ =>
// Export all representations
ag.method.internal.filterNot(_.name.startsWith("<")).filterNot(
_.name.startsWith("lambda")
).dot(x.exportDir)
// Export individual representations
ag.method.internal.filterNot(_.name.startsWith("<")).filterNot(
_.name.startsWith("lambda")
).exportAllRepr(x.exportDir)
case _: DataFlowConfig =>
val dataFlowSlice = sliceCpg(ag).collect { case x: DataFlowSlice => x }
val atomDataFlowSliceJson =
dataFlowSlice.map(x =>
AtomDataFlowSlice(x, DataFlowGraph.buildFromSlice(x).paths).toJson
)
saveSlice(config.outputSliceFile, atomDataFlowSliceJson)
case u: UsagesConfig =>
saveSlice(config.outputSliceFile, sliceCpg(ag).map(_.toJson))
if u.extractEndpoints then
val openapiFileName =
sys.env.getOrElse("ATOM_TOOLS_OPENAPI_FILENAME", "openapi.generated.json")
val openapiFormat =
sys.env.getOrElse("ATOM_TOOLS_OPENAPI_FORMAT", "openapi3.1.0")
val atomToolsWorkDir =
sys.env.getOrElse(
"ATOM_TOOLS_WORK_DIR",
config.inputPath.parent.pathAsString
)
println(
s"Failed to run atom-tools. Use the atom container image or perform 'pip install atom-tools' and re-run this command. Exception: ${exception.getMessage}"
s"atom-tools convert -i ${config.outputSliceFile} -t ${config.language} -f ${openapiFormat} -o ${config
.inputPath.pathAsString}${java.io.File.separator}${openapiFileName}"
)
end if
case _: ReachablesConfig =>
saveSlice(config.outputSliceFile, sliceCpg(ag).map(_.toJson))
case x: AtomParseDepsConfig =>
parseDependencies(ag).map(_.toJson) match
case Left(err) => return Left(err)
case Right(slice) => saveSlice(x.outputSliceFile, Option(slice))
case _ =>
end match
Right("Atom sliced successfully")
catch
case err: Throwable if err.getMessage == null =>
Left(err.getStackTrace.take(7).mkString("\n"))
case err: Throwable => Left(err.getMessage)
end try
val result = ExternalCommand.run(
s"atom-tools convert -i ${config.outputSliceFile} -t ${config.language} -f ${openapiFormat} -o ${config
.inputPath.pathAsString}${java.io.File.separator}${openapiFileName}",
atomToolsWorkDir
)
result match
case Success(_) =>
println(s"${openapiFileName} created successfully.")
case Failure(exception) =>
println(
s"Failed to run atom-tools. Use the atom container image or perform 'pip install atom-tools' and re-run this command. Exception: ${exception.getMessage}"
)
end if
case _: ReachablesConfig =>
saveSlice(config.outputSliceFile, sliceCpg(ag).map(_.toJson))
case x: AtomParseDepsConfig =>
parseDependencies(ag).map(_.toJson) match
case Left(err) => return Left(err)
case Right(slice) => saveSlice(x.outputSliceFile, Option(slice))
case _ =>
end match
Right("Atom sliced successfully")
catch
case err: Throwable if err.getMessage == null =>
Left(err.getStackTrace.take(7).mkString("\n"))
case err: Throwable => Left(err.getMessage)
end try
end match
end generateSlice

private def saveSlice(outFile: File, programSlice: Option[String]): Unit =
Expand Down Expand Up @@ -513,6 +522,32 @@ object Atom:
)
.withOutputPath(outputAtomFile)
)
case "SCALA" | "TASTY" | "SBT" =>
val workDir =
sys.env.getOrElse("ATOM_SCALASEM_WORK_DIR", config.inputPath.pathAsString)
val result = ExternalCommand.run(
s"scalasem ${workDir} ${config.outputSliceFile.pathAsString}",
workDir
)
result match
case Success(_) =>
println(
s"Semantic slices file '${config.outputSliceFile.pathAsString}' created successfully."
)
case Failure(exception) =>
println(
s"Failed to run scalasem. Use the atom container image and re-run this command. Exception: ${exception.getMessage}"
)
new Jimple2Cpg()
.createCpgWithOverlays(
JimpleConfig(scalaSdk = Option(System.getProperty("java.class.path")))
.withInputPath(config.inputPath.pathAsString)
.withOutputPath(outputAtomFile)
.withFullResolver(true)
.withOnlyClasses(true)
.withDepth(1)
.withRecurse(true)
)
case Languages.JSSRC | Languages.JAVASCRIPT | "JS" | "TS" | "TYPESCRIPT" =>
new JsSrc2Cpg()
.createCpgWithOverlays(
Expand Down Expand Up @@ -605,30 +640,38 @@ object Atom:
case Failure(exception) =>
Left(exception.getMessage)
case Success(ag) =>
config match
case x: AtomConfig
if x.dataDeps || x.isInstanceOf[AtomDataFlowConfig] || x.isInstanceOf[
AtomReachablesConfig
] =>
println("Generating data-flow dependencies from atom. Please wait ...")
// Enhance with simple and easy tags
new EasyTagsPass(ag).createAndApply()
// Enhance with the BOM from cdxgen
new CdxPass(ag).createAndApply()
new ChennaiTagsPass(ag).createAndApply()
new OssDataFlow(new OssDataFlowOptions(maxNumberOfDefinitions =
x.maxNumDef
))
.run(new LayerCreatorContext(ag))
language match
case "SCALA" | "TASTY" | "SBT" =>
try
ag.close()
catch
case err: Throwable => Left(err.getMessage)
Right("Semantic slices generated successfully.")
case _ =>
generateSlice(config, ag)
try
ag.close()
catch
case err: Throwable if err.getMessage == null =>
Left(err.getStackTrace.take(7).mkString("\n"))
case err: Throwable => Left(err.getMessage)
Right("Atom generation successful")
config match
case x: AtomConfig
if x.dataDeps || x.isInstanceOf[AtomDataFlowConfig] || x.isInstanceOf[
AtomReachablesConfig
] =>
println("Generating data-flow dependencies from atom. Please wait ...")
// Enhance with simple and easy tags
new EasyTagsPass(ag).createAndApply()
// Enhance with the BOM from cdxgen
new CdxPass(ag).createAndApply()
new ChennaiTagsPass(ag).createAndApply()
new OssDataFlow(new OssDataFlowOptions(maxNumberOfDefinitions =
x.maxNumDef
))
.run(new LayerCreatorContext(ag))
case _ =>
generateSlice(config, ag)
try
ag.close()
catch
case err: Throwable if err.getMessage == null =>
Left(err.getStackTrace.take(7).mkString("\n"))
case err: Throwable => Left(err.getMessage)
Right("Atom generation successful")
end match
end generateForLanguage

Expand Down
Loading

0 comments on commit d34eef5

Please sign in to comment.