Skip to content

Commit

Permalink
JS reachables (#21)
Browse files Browse the repository at this point in the history
* JS reachables

Signed-off-by: Prabhu Subramanian <[email protected]>

* Workflow

Signed-off-by: Prabhu Subramanian <[email protected]>

---------

Signed-off-by: Prabhu Subramanian <[email protected]>
  • Loading branch information
prabhu authored Oct 22, 2023
1 parent 5cb9be7 commit 596c85e
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 120 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
- name: Install and test
run: |
npm install -g @appthreat/atom
python3 -m pip install --upgrade pip
python3 -m pip install --upgrade pip setuptools wheel
python3 -m pip install poetry
python3 -m poetry config virtualenvs.create false
python3 -m poetry install --no-cache
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
- name: Install and test
run: |
npm install -g @appthreat/atom
python -m pip install --upgrade pip
python -m pip install --upgrade pip setuptools wheel
pip install poetry
sudo apt install -y graphviz-dev
poetry config virtualenvs.create false
Expand All @@ -59,7 +59,7 @@ jobs:
poetry publish --build --username $PYPI_USERNAME --password $PYPI_PASSWORD
$CONDA/bin/conda update -n base -c defaults conda
$CONDA/bin/conda install anaconda-client conda-build
$CONDA/bin/conda build -c conda-forge --output-folder ./conda-out/ chenpy
$CONDA/bin/conda build -c conda-forge --output-folder ./conda-out/ .
anaconda upload --label main -u appthreat ./conda-out/noarch/*.tar.bz2
env:
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
Expand Down
10 changes: 3 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# chen

Code Hierarchy Exploration Net (chen) is an advanced exploration toolkit for your application source code and its dependency hierarchy.
Code Hierarchy Exploration Net (chen) is an advanced exploration toolkit for your application source code and its dependency hierarchy. This repo contains the source code for chen library and an advanced REPL console called chennai (chen not AI).

## Requirements

Expand All @@ -9,10 +9,6 @@ Code Hierarchy Exploration Net (chen) is an advanced exploration toolkit for you
- Node.js > 16 (To run [atom](https://github.com/AppThreat/atom))
- Minimum 16GB RAM

### Additional requirements

- Rust (For rocksdb-py compilation)

## Getting started

chen container image has everything needed to get started.
Expand Down Expand Up @@ -190,9 +186,9 @@ This error is mostly due to missing python .so (linux), .dll (windows) or .dylib
chen is a fork of the popular [joern](https://github.com/joernio/joern) project. We deviate from the joern project in the following ways:
- Make code analysis accessible by adding first-class integration with Python and frameworks such as NetworkX and PyTorch
- Make code analysis accessible by adding first-class integration with Python and frameworks such as NetworkX and PyTorch.
- Enable broader hierarchical analysis (Application + Dependency + Container + OS layer)
- By creating a more welcoming community more appropriate for beginner users with great support
- By creating a welcoming community more appropriate for beginner users with great enterprise support. We want to democratize code analysis.
## License
Expand Down
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name := "chen"
ThisBuild / organization := "io.appthreat"
ThisBuild / version := "0.0.19"
ThisBuild / version := "0.0.20"
ThisBuild / scalaVersion := "3.3.1"

val cpgVersion = "1.4.22"
Expand Down
79 changes: 0 additions & 79 deletions chenpy/meta.yaml

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -109,16 +109,17 @@ object Path {
case call: Call =>
if (!call.code.startsWith("<operator") || !call.methodFullName.startsWith("<operator")) {
if (
tags.isEmpty && call.callee(NoResolve).head.isExternal && !call.methodFullName.startsWith(
"<operator"
) && !call.name
tags.isEmpty && call.callee(NoResolve).nonEmpty && call
.callee(NoResolve)
.head
.isExternal && !call.methodFullName.startsWith("<operator") && !call.name
.startsWith("<operator") && !call.methodFullName.startsWith("new ")
) {
tags = tagAsString(call.callee(NoResolve).head.tag)
}
var callIcon =
if (
call.callee(NoResolve).head.isExternal && !call.name
call.callee(NoResolve).nonEmpty && call.callee(NoResolve).head.isExternal && !call.name
.startsWith("<operator") && !call.methodFullName.startsWith("new ")
) " :right_arrow_curving_up:"
else ""
Expand Down Expand Up @@ -176,7 +177,13 @@ object Path {
if (!trow(4).startsWith("<operator>.fieldAccess")) {
val tagsStr: String = if (trow(4).nonEmpty) s"Tags: ${trow(4)}" else ""
val methodStr = s"${trow(1)}\n${tagsStr}"
table.add_row(trow(0), methodStr.stripMargin, trow(2), trow(3), end_section = end_section)
table.add_row(
trow(0),
methodStr.stripMargin,
trow(2),
trow(3).takeWhile(_ != '\n'),
end_section = end_section
)
}
}
}
Expand Down
56 changes: 56 additions & 0 deletions meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{% set version = "0.0.20" %}

package:
name: chen
version: {{ version }}
about:
home: https://github.com/appthreat/chen
license: Apache-2.0
license_file: LICENSE
summary: "Code Hierarchy Exploration Net (chen)"
dev_url: https://github.com/appthreat/chen
requirements:
build:
- python
- pip
host:
- pip
- python
- setuptools
- poetry
run:
- python>=3.8.1,<3.12
- conda-forge::httpx>=0.24.1,<0.25.0
- conda-forge::websockets>=11.0.2,<12.0.0
- conda-forge::orjson>=3.9.0,<4.0.0
- conda-forge::rich>=13.4.1,<14.0.0
- conda-forge::oras-py>=0.1.25
- conda-forge::appdirs>=1.4.4,<2.0.0
- conda-forge::psutil>=5.9.5,<6.0.0
- conda-forge::packageurl-python>=0.11.2,<0.12.0
- conda-forge::gitpython>=3.1.37,<4.0.0
- conda-forge::networkx>=3.1
- conda-forge::numpy>=1.26
- conda-forge::scipy>=1.11.3
build:
pin_depends: record
noarch: python
script: |
cp ${RECIPE_DIR}/pyproject.toml ${SRC_DIR}/
cp ${RECIPE_DIR}/LICENSE ${SRC_DIR}/
cp ${RECIPE_DIR}/README.md ${SRC_DIR}/
cp ${RECIPE_DIR}/docker-compose.yml ${SRC_DIR}/
cp -rf ${RECIPE_DIR}/docs ${SRC_DIR}/
cp -rf ${RECIPE_DIR}/notebooks ${SRC_DIR}/
rm -rf ${SRC_DIR}/{ci,console,dataflowengineoss,macros,platform,project,scripts,semanticcpg,target,tests,workspace}
{{ PYTHON }} -m pip install --no-build-isolation --no-deps --ignore-installed .
entry_points:
- chen = chenpy.cli:main

source:
path: ./chenpy
folder: chenpy

extra:
maintainers:
- Team AppThreat <[email protected]>
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ import scala.io.Source

/** Creates tags on typeDecl and call nodes based on a cdx document
*/
class CdxPass(cpg: Cpg) extends CpgPass(cpg) {
class CdxPass(atom: Cpg) extends CpgPass(atom) {

val language: String = cpg.metaData.language.head
val language: String = atom.metaData.language.head

// Number of tags needed
private val TAGS_COUNT: Int = 2
Expand All @@ -27,16 +27,34 @@ class CdxPass(cpg: Cpg) extends CpgPass(cpg) {
// tags list as a seed
private val keywords: List[String] = Source.fromResource("tags-vocab.txt").getLines.toList

// FIXME: Replace these with semantic fingerprints
private def JS_REQUEST_PATTERNS =
Array(
"(?s)(?i).*(req|ctx|context)\\.(originalUrl|path|protocol|route|secure|signedCookies|stale|subdomains|xhr|app|pipe|file|files|baseUrl|fresh|hostname|ip|url|ips|method|body|param|params|query|cookies|request).*"
)

private def JS_RESPONSE_PATTERNS =
Array(
"(?s)(?i).*(res|ctx|context)\\.(append|attachment|body|cookie|download|end|format|json|jsonp|links|location|redirect|render|send|sendFile|sendStatus|set|vary).*",
"(?s)(?i).*res\\.(set|writeHead|setHeader).*",
"(?s)(?i).*(db|dao|mongo|mongoclient).*",
"(?s)(?i).*(\\s|\\.)(list|create|upload|delete|execute|command|invoke|submit|send)"
)

private def containsRegex(str: String) = Pattern.quote(str) == str || str.contains("*")

private val BOM_JSON_FILE = ".*(bom|cdx).json"

override def run(dstGraph: DiffGraphBuilder): Unit = {
cpg.configFile.name(BOM_JSON_FILE).content.foreach { cdxData =>
atom.configFile.name(BOM_JSON_FILE).content.foreach { cdxData =>
val cdxJson = parse(cdxData).getOrElse(Json.Null)
val cursor: HCursor = cdxJson.hcursor
val components = cursor.downField("components").focus.flatMap(_.asArray).getOrElse(Vector.empty)
val donePkgs = mutable.Map[String, Boolean]()
if (language == Languages.JSSRC || language == Languages.JAVASCRIPT) {
JS_REQUEST_PATTERNS.foreach(p => atom.call.code(p).newTagNode("framework-input").store()(dstGraph))
JS_RESPONSE_PATTERNS.foreach(p => atom.call.code(p).newTagNode("framework-output").store()(dstGraph))
}
components.foreach { comp =>
val PURL_TYPE = "purl"
val compPurl = comp.hcursor.downField(PURL_TYPE).as[String].getOrElse("")
Expand All @@ -47,7 +65,7 @@ class CdxPass(cpg: Cpg) extends CpgPass(cpg) {
properties.foreach { ns =>
val nsstr = ns.hcursor.downField("value").as[String].getOrElse("")
nsstr
.split("\n")
.split("(\n|,)")
.filterNot(_.startsWith("java."))
.filterNot(_.startsWith("com.sun"))
.filterNot(_.contains("test"))
Expand All @@ -56,50 +74,61 @@ class CdxPass(cpg: Cpg) extends CpgPass(cpg) {
var bpkg = pkg.takeWhile(_ != '$')
if (language == Languages.JAVA || language == Languages.JAVASRC)
bpkg = bpkg.split("\\.").take(PKG_NS_SIZE).mkString(".").concat(".*")
if (language == Languages.JSSRC || language == Languages.JAVASCRIPT) bpkg = s".*${bpkg}.*"
if (!donePkgs.contains(bpkg)) {
donePkgs.put(bpkg, true)
if (!containsRegex(bpkg)) {
cpg.call.typeFullNameExact(bpkg).newTagNode(compPurl).store()(dstGraph)
cpg.method.parameter.typeFullNameExact(bpkg).newTagNode(compPurl).store()(dstGraph)
cpg.method.fullName(s"${Pattern.quote(bpkg)}.*").newTagNode(compPurl).store()(dstGraph)
atom.call.typeFullNameExact(bpkg).newTagNode(compPurl).store()(dstGraph)
atom.method.parameter.typeFullNameExact(bpkg).newTagNode(compPurl).store()(dstGraph)
atom.method.fullName(s"${Pattern.quote(bpkg)}.*").newTagNode(compPurl).store()(dstGraph)
} else {
cpg.call.typeFullName(bpkg).newTagNode(compPurl).store()(dstGraph)
cpg.method.parameter.typeFullName(bpkg).newTagNode(compPurl).store()(dstGraph)
cpg.method.fullName(bpkg).newTagNode(compPurl).store()(dstGraph)
atom.call.typeFullName(bpkg).newTagNode(compPurl).store()(dstGraph)
atom.method.parameter.typeFullName(bpkg).newTagNode(compPurl).store()(dstGraph)
atom.method.fullName(bpkg).newTagNode(compPurl).store()(dstGraph)
if (language == Languages.JSSRC || language == Languages.JAVASCRIPT) {
atom.call.code(bpkg).argument.newTagNode(compPurl).store()(dstGraph)
atom.identifier.code(bpkg).newTagNode(compPurl).store()(dstGraph)
atom.identifier.code(bpkg).inCall.newTagNode(compPurl).store()(dstGraph)
}
}
if (compType != "library") {
if (!containsRegex(bpkg)) {
cpg.call.typeFullNameExact(bpkg).newTagNode(compType).store()(dstGraph)
cpg.call.typeFullNameExact(bpkg).receiver.newTagNode(s"$compType-value").store()(dstGraph)
cpg.method.parameter.typeFullNameExact(bpkg).newTagNode(compType).store()(dstGraph)
cpg.method.fullName(s"${Pattern.quote(bpkg)}.*").newTagNode(compType).store()(dstGraph)
atom.call.typeFullNameExact(bpkg).newTagNode(compType).store()(dstGraph)
atom.call.typeFullNameExact(bpkg).receiver.newTagNode(s"$compType-value").store()(dstGraph)
atom.method.parameter.typeFullNameExact(bpkg).newTagNode(compType).store()(dstGraph)
atom.method.fullName(s"${Pattern.quote(bpkg)}.*").newTagNode(compType).store()(dstGraph)
} else {
cpg.call.typeFullName(bpkg).newTagNode(compType).store()(dstGraph)
cpg.call.typeFullName(bpkg).receiver.newTagNode(s"$compType-value").store()(dstGraph)
cpg.method.parameter.typeFullName(bpkg).newTagNode(compType).store()(dstGraph)
cpg.method.fullName(bpkg).newTagNode(compType).store()(dstGraph)
atom.call.typeFullName(bpkg).newTagNode(compType).store()(dstGraph)
atom.call.typeFullName(bpkg).receiver.newTagNode(s"$compType-value").store()(dstGraph)
atom.method.parameter.typeFullName(bpkg).newTagNode(compType).store()(dstGraph)
atom.method.fullName(bpkg).newTagNode(compType).store()(dstGraph)
if (language == Languages.JSSRC || language == Languages.JAVASCRIPT) {
atom.call.code(bpkg).argument.newTagNode(compType).store()(dstGraph)
atom.identifier.code(bpkg).newTagNode(compType).store()(dstGraph)
atom.identifier.code(bpkg).inCall.newTagNode(compType).store()(dstGraph)
}
}
}
if (compType == "framework") {
def frameworkAnnotatedMethod = cpg.annotation
def frameworkAnnotatedMethod = atom.annotation
.fullName(bpkg)
.method

frameworkAnnotatedMethod.parameter
.newTagNode(s"$compType-input")
.store()(dstGraph)
cpg.ret
atom.ret
.where(_.method.annotation.fullName(bpkg))
.newTagNode(s"$compType-output")
.store()(dstGraph)
}
descTags.foreach { t =>
cpg.call.typeFullName(bpkg).newTagNode(t).store()(dstGraph)
cpg.method.parameter.typeFullName(bpkg).newTagNode(t).store()(dstGraph)
atom.call.typeFullName(bpkg).newTagNode(t).store()(dstGraph)
atom.method.parameter.typeFullName(bpkg).newTagNode(t).store()(dstGraph)
if (!containsRegex(bpkg)) {
cpg.method.fullName(s"${Pattern.quote(bpkg)}.*").newTagNode(t).store()(dstGraph)
atom.method.fullName(s"${Pattern.quote(bpkg)}.*").newTagNode(t).store()(dstGraph)
} else {
cpg.method.fullName(bpkg).newTagNode(t).store()(dstGraph)
atom.method.fullName(bpkg).newTagNode(t).store()(dstGraph)
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "appthreat-chen"
version = "0.0.19"
version = "0.0.20"
description = "Code Hierarchy Exploration Net (chen)"
authors = ["Team AppThreat <[email protected]>"]
license = "Apache-2.0"
Expand Down

0 comments on commit 596c85e

Please sign in to comment.