diff --git a/.github/actions/install-llvm/action.yml b/.github/actions/install-llvm/action.yml new file mode 100644 index 0000000000..0f87cd1615 --- /dev/null +++ b/.github/actions/install-llvm/action.yml @@ -0,0 +1,23 @@ +name: 'Install LLVM' +inputs: + version: + required: true + +runs: + using: "composite" + steps: + - name: Run install script + shell: bash + run: | + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh ${{ inputs.version }} + sudo ln -sf $(which clang-${{inputs.version}}) /usr/bin/clang + sudo ln -sf $(which llvm-config-${{inputs.version}}) /usr/bin/llvm-config + - name: Test version + shell: bash + run: | + ls -l $(which clang) + ls -l $(which llvm-config) + clang --version + llvm-config --version \ No newline at end of file diff --git a/.github/workflows/linux-build-test-deploy.yml b/.github/workflows/linux-build-test-deploy.yml index 46faa48f00..65d2dd34a4 100644 --- a/.github/workflows/linux-build-test-deploy.yml +++ b/.github/workflows/linux-build-test-deploy.yml @@ -61,6 +61,10 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 + - name: Install LLVM and Clang + uses: ./.github/actions/install-llvm + with: + version: "15" - name: Run tests uses: ./.github/actions/test-action - name: Positive outcome badge diff --git a/.github/workflows/sonar.yml b/.github/workflows/sonar.yml index dd6a760f73..40f78b29f6 100644 --- a/.github/workflows/sonar.yml +++ b/.github/workflows/sonar.yml @@ -15,6 +15,10 @@ jobs: with: distribution: temurin java-version: 17 + - name: Install LLVM and Clang + uses: ./.github/actions/install-llvm + with: + version: "15" - name: Analyze env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/build.gradle.kts b/build.gradle.kts index 4ba8399254..c704f78fe9 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -57,7 +57,7 @@ tasks { } val reportTasks = subprojects.mapNotNull { subproject -> - subproject.tasks.named("jacocoTestReport", JacocoReport::class).orNull + subproject.tasks.findByName("jacocoTestReport")?.let { it as JacocoReport } } dependsOn(reportTasks.flatMap { it.dependsOn }) diff --git a/lib/.gitignore b/lib/.gitignore index 3aa6f3e656..2b8f38f42f 100644 --- a/lib/.gitignore +++ b/lib/.gitignore @@ -1,3 +1,3 @@ /msvcp110.dll /msvcr110.dll -/vcomp110.dll \ No newline at end of file +/vcomp110.dll diff --git a/lib/libtheta-llvm.so b/lib/libtheta-llvm.so new file mode 100755 index 0000000000..85eb353bb0 Binary files /dev/null and b/lib/libtheta-llvm.so differ diff --git a/scripts/reformat.sh b/scripts/reformat.sh old mode 100644 new mode 100755 diff --git a/settings.gradle.kts b/settings.gradle.kts index 1a5e96db2d..c60c6fe932 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -23,6 +23,7 @@ include( "frontends/c-frontend", "frontends/chc-frontend", + "frontends/llvm", "cfa/cfa", "cfa/cfa-analysis", @@ -37,6 +38,7 @@ include( "xcfa/exec-graph-cli", "xcfa/c2xcfa", "xcfa/litmus2xcfa", + "xcfa/llvm2xcfa", "xcfa/xcfa-analysis", "xcfa/xcfa-cli", "xcfa/litmus-cli", diff --git a/subprojects/frontends/llvm/README.md b/subprojects/frontends/llvm/README.md new file mode 100644 index 0000000000..20e54f117a --- /dev/null +++ b/subprojects/frontends/llvm/README.md @@ -0,0 +1,44 @@ +# Theta C Frontend +This repository contains a library, which enables Theta to parse C programs into the XCFA or CFA format using LLVM. + +# Motivation +Trying to directly transform a C program into a formal representation comes with several problems due to the complexity of the C language. Using an assembly-like internal representation, such as LLVM IR can largely decrease this complexity. + +Currently there is no official LLVM Java API, thus a C++ project is necessary to handle the parsing of LLVM bytecode. + +# Connection with Theta +[Link to Theta](https://github.com/ftsrg/theta) + +Theta is a configurable and modular, CEGAR-based verification framework. It is capable of handling several formalisms, including Control Flow Automaton *(CFA)* and an extended version of CFA, called XCFA. *(The latter can handle functions and (concurrent) processes.)* + +Theta has a CLI tool and a module for parsing CFA/XCFA files and can convert an XCFA to a CFA, if it is possible *(if it has only one function and one process)*, but has no capability to transform C programs or any other representation of them to these formalisms. + +# Connection with Gazer +[Link to Gazer](https://github.com/ftsrg/gazer) + +Gazer is a BMC verification tool and Theta frontend written in C++, using the LLVM framework. It handles the transformation to CFA from the C program by itself with an intermediate CFA representation used in its BMC engine. It works as a literal frontend, a CLI tool which handles the transformation and which can call Theta for analysis, parsing and using it's output to get a counterexample. + +Gazer is the predecessor of this library. Many of this its techniques and implementation details are used in this library, but this project has a different, simpler approach. It aims to carry out only a fraction of Gazer's functionality, namely it has no BMC engine, so it cannot carry out software verification and it will not output a CFA or an XCFA in any way. It carries out only the transformation of a C program to an intermediate representation containing the necessary information for the construction of an XCFA, adding some LLVM passes on the way. + +As opposed to Gazer, this library approaches its usage in a reversed manner, being called and used by Theta as a native C++ library, not the other way around. It's long term goal is to be a fairly static library in a way, that extensions and implementation of new features related to the formalisms or the analysis can happen mainly in Theta, as it has better maintainability and is actively developed. + +# Transforming the program +![architecture](doc/theta-c-arch.png) +*The red parts are modules/classes of Theta, the green ones are of this project and the blue ones are yet to be implemented - they are not properly connected yet, as their place in the project isn't certain yet* + +## Input +The input file can either be bitcode (`.ll` pr `.bc`, LLVM passes are not used in this case) or a C program (a single `.c` or `.i` file). Implementing a linking step of more than one input files is not yet implemented. *(But it is an important feature for the future.)* + +## Compilation (clang) +This is a simple step - we just call clang with a few flags to compile the C program to bitcode. We use O0, as most optimization passes that we benefit from are executed in the next step. + +## Documentation of the remaining steps +[Our intermediate representation](doc/intermediate-representation.md) + +[Passes and optimizations](doc/passes.md) + +[JNI interface](doc/jni-interface.md) + +[Analysis in the frontend](doc/analysis.md) + +[A simple example from input program to xcfa](doc/simple-example.md) diff --git a/subprojects/frontends/llvm/build.gradle.kts b/subprojects/frontends/llvm/build.gradle.kts new file mode 100644 index 0000000000..83332b8116 --- /dev/null +++ b/subprojects/frontends/llvm/build.gradle.kts @@ -0,0 +1,96 @@ +/* + * Copyright 2023 Budapest University of Technology and Economics + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.gradle.internal.os.OperatingSystem.current +import java.io.ByteArrayOutputStream +import java.io.IOException + +plugins { + id("cpp-library") +} + +val enabled = current().isLinux && + try { + runCommandForOutput("llvm-config") + true + } catch (e: IOException) { + println("LLVM not installed, not building native library.") + false + } + +fun runCommandForOutput(vararg args: String): Array { + val process = ProcessBuilder(*args).start() + val outputStream = ByteArrayOutputStream() + process.inputStream.copyTo(outputStream) + process.waitFor() + val ret = outputStream.toString() + .trim() + .split(" ") + .filter { it.length > 1 } + .map { it.trim() } + .toTypedArray() + return ret +} + +fun llvmConfigFlags(vararg args: String): Array { + if (!enabled) return arrayOf() + return try { + runCommandForOutput("llvm-config", *args) + } catch (e: IOException) { + e.printStackTrace() + arrayOf() + }.also { println("LLVM flags (${args.toList()}): ${it.toList()}") } +} + +fun jniConfigFlags(): Array { + if (!enabled) return arrayOf() + val jdkHomeArr = runCommandForOutput("bash", "-c", "dirname \$(cd \$(dirname \$(readlink \$(which javac) || which javac)); pwd -P)") + check(jdkHomeArr.size == 1) + val jdkHome = File(jdkHomeArr[0]) + check(jdkHome.exists()) + val mainInclude = jdkHome.resolve("include") + val linuxInclude = mainInclude.resolve("linux") + return arrayOf( + "-I${mainInclude.absolutePath}", + "-I${linuxInclude.absolutePath}", + ).also { println("JNI flags: ${it.toList()}") } +} + +library { + targetMachines.add(machines.linux.x86_64) + tasks.withType(CppCompile::class) { + compilerArgs.addAll(listOf( + "-Wall", + "-fpic", + *jniConfigFlags(), + *llvmConfigFlags("--cxxflags"))) + onlyIf { + println("CppCompile is enabled: $enabled") + this@Build_gradle.enabled + } + } + + tasks.withType(LinkSharedLibrary::class) { + linkerArgs.addAll(listOf( + "-rdynamic", + *llvmConfigFlags("--cxxflags", "--ldflags", "--libs", "core", "bitreader"), + "-ldl")) + onlyIf { + println("LinkSharedLibrary is enabled: $enabled") + this@Build_gradle.enabled + } + } +} \ No newline at end of file diff --git a/subprojects/frontends/llvm/doc/analysis.md b/subprojects/frontends/llvm/doc/analysis.md new file mode 100644 index 0000000000..e9af4a0144 --- /dev/null +++ b/subprojects/frontends/llvm/doc/analysis.md @@ -0,0 +1,20 @@ +## What analysis steps mean in this case +These are simple "checks", executed while the IR is parsed into our representation (so we won't need to explicitly iterate through it twice). + +### Struct check +Using an `llvm::Typefinder`, we check if there are any struct (or union) type definitions in the module. This information is then stored and can be queried. + +### Logical operator check +This check is called for each instruction. The goal of this check is to decide, if Theta can use integer arithmetics or only the more costly bitwise arithmetics. + +#### How it works +Steps executed on each instruction: +- Is it an `and/or/xor` operation? + - If **yes**, is it only used later in `icmp` operations with 0 as the other operand? + - If **yes**, *integer arithmetics* are suitable + - If **no**, we'll need *bitwise arithmetics* +- Else, is it a shift (`shl/lshr/ashr`) operation? + - If **yes**, we'll need *bitwise arithmetics* + - If **no**, *integer arithmetics* are suitable + +*Clarification: this is a global attribute in the sense, that if any instruction requires integer arithmetics, then the whole program has to be handled that way.* diff --git a/subprojects/frontends/llvm/doc/intermediate-representation.md b/subprojects/frontends/llvm/doc/intermediate-representation.md new file mode 100644 index 0000000000..3b249ded38 --- /dev/null +++ b/subprojects/frontends/llvm/doc/intermediate-representation.md @@ -0,0 +1,10 @@ +## Intermediate representation between LLVM IR and the XCFA models +As we can see in the architecture, the C input program is compiled into LLVM IR. LLVM intermediate representation is an assembly-like representation with around 60 instructions and lots of possibilities for adding metadata. + +It can be easily parsed through the LLVM API, but contains a large quantity of complex information, from which we only need to extract particular elements required for the XCFA and the projection of counterexamples on to the original program. To accomplish this we use a simpler representation, which is in many points similar to the programmatic representation of LLVM IR, but differs in some particular places and contains no superfluous data (from our standpoint). + +The classes for this representation can be found in the [types](https://github.com/ftsrg/theta-c-frontend/tree/master/src/types) directory. + +It is similar to LLVM IR in that there is a module, which contains global variables and functions. Functions contain basic blocks, basic blocks are made of instructions, which have operands. It differs in that there are no explicit metadata classes, rather the above mentioned classes contain metadata about themselves. + +Furthermore on instruction level the LLVM IR has no classes explicitly representing (virtual) registers and instruction operands - these are handled as instructions, constants or other appropriate types. In our representation instruction contains Operands, an abstract class and parent class of Register, BasicBlockOperand and StringOperand. The latter is used to handle function operands in `call` instructions and special strings like the condition code in `icmp` operations. diff --git a/subprojects/frontends/llvm/doc/jni-interface.md b/subprojects/frontends/llvm/doc/jni-interface.md new file mode 100644 index 0000000000..4c7af9f1a1 --- /dev/null +++ b/subprojects/frontends/llvm/doc/jni-interface.md @@ -0,0 +1,25 @@ +## JNI Interface +*"In software design, the Java Native Interface (JNI) is a foreign function interface programming framework that enables Java code running in a Java virtual machine (JVM) to call and be called by native applications (programs specific to a hardware and operating system platform) and libraries written in other languages such as C, C++ and assembly."* (from [wikipedia](https://en.wikipedia.org/wiki/Java_Native_Interface)) + +This project behaves as a native library of the Theta framework and communicates through a JNI interface. Although it is possible to send objects through a native interface, we are currently not using that feature. + +The easiest way to get familiar with the interface is to check out the C++ side [here](https://github.com/ftsrg/theta-c-frontend/blob/master/src/hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider.h) and the Java side in Theta (`LllvmIrProvider` class of the XCFA subproject). + +### Configuring passes and parsing the input program +Currently there are four switches to configure pass groups (the rest of the passes are mandatory). +- Function inlining (we do not inline global variables) +- Cleanup passes +- Optimizations +- Debug printing pass + +After configuration the native function `JniParseIr` has to be called to let the frontend iterate through the LLVM IR. + +### Querying the resulting data +The data can be queried based on these groups: +- global variables +- functions + - basic blocks + - instructions + +The number of these in a given parent and their attributes can be queried one by one. +Furthermore analysis results can be queried as well. diff --git a/subprojects/frontends/llvm/doc/passes.md b/subprojects/frontends/llvm/doc/passes.md new file mode 100644 index 0000000000..bd2e3c86b6 --- /dev/null +++ b/subprojects/frontends/llvm/doc/passes.md @@ -0,0 +1,22 @@ +## Passes used +### About LLVM passes in general +Passes provide a system to run optimization, transformation and analysis steps on LLVM IR. These can be LLVMs official passes, but custom passes as well. This project uses both. +[more on LLVM passes](https://llvm.org/docs/Passes.html) + +This part of the project adopts the most from Gazer. LLVM and custom passes are used there similarly as well, although Gazer has a more complex and less naive approach, e.g. using early and late optimization sets. In this project we are approaching the problems and functionality in a direct manner, thus we have only a simpler, single execution line of passes. + +### What are we using passes for +1. Optimizations +2. Transformations +3. Annotations + +*Disclaimer: this part of the project isn't static, as we are constantly experimenting with changing/adding new passes. Due to this, some important custom passes are described here, but to get the precise state of what passes we use and what they do, start from [this](https://github.com/ftsrg/theta-c-frontend/blob/master/src/utilities/CPipeline.cpp) source file.* +### Custom passes +- **Toposort pass** + Strongly connected components (e.g. loops) of basic blocks aren't necessarily in topological order in LLVM IR as this way it is possible to iterate through the blocks in order and have unknown values only in phi nodes, which makes the model transformation in Theta simpler +- **Eliminate GEP instructions pass** + The getelementptr instruction is a special instruction in LLVM IR, which usually pairs with one or more store and load instructions to get/set memory values. In the XCFA this instruction pair is handled as a single instruction on a single edge, but transforming to this format is complex. Instead of that we use this transformation pass to combine the information from these pairs into a single `theta.dbg.getArrayElement_typename` or `theta.dbg.setArrayElement_typename` function calls while removing the `getelementptr` and `load/store` instructions. *Support in this pass is currently incomplete, as there can be special parametrizations in these instructions when structs or unions are used, so that has to be added when extending the frontend with struct support in the future.* +- **Eliminate Phi nodes pass** + When verifying a model with CEGAR, the number of variables can be a crucial point. Phi nodes can basically add a variable per each node, although it wasn't present in the original program and wouldn't be necessary. This pass eliminates these unnecessary variables, where possible and uses a global variable with unique identifiers to store these values instead. +- **Branch dbg call pass** *(work in progress)* + This pass adds a function called `theta.dbg.control` before each `br` *(branch)* instruction. On one hand, adding metadata to this call will make it easier to add *control* information to witnesses, on the other hand it tones down the CFG simplification pass, which is an important optimization, but can also be quite aggressive when merging control flow branches with which we lose *(or even get wrong)* program line metadata. False information on program lines can lead to unusable witnesses. diff --git a/subprojects/frontends/llvm/doc/simple-example.md b/subprojects/frontends/llvm/doc/simple-example.md new file mode 100644 index 0000000000..d35c76d922 --- /dev/null +++ b/subprojects/frontends/llvm/doc/simple-example.md @@ -0,0 +1,94 @@ +## A simple example +### Input +Let's give a simple example program as input: +``` C +#include + +int main() { + int a; + scanf("%d", &a); + + int b = 0; + b = a+2; + return b; +} +``` + +### LLVM IR +After compilation with clang and executing the passes, we get the following LLVM IR: +*(Note: LLVM IR is rather verbose, so some attributes have been cut out from the middle, as they are insignificant in this example)* +```llvm +; ModuleID = 'example.bc' +source_filename = "example.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +@.str = private unnamed_addr constant [3 x i8] c"%d\00", align 1 + +; Function Attrs: noinline nounwind sspstrong uwtable +define dso_local i32 @main() local_unnamed_addr #0 !dbg !9 { + %lastblock = alloca i64, align 8 + %1 = alloca i32, align 4 + call void @llvm.dbg.value(metadata i32* %1, metadata !14, metadata !DIExpression(DW_OP_deref)), !dbg !15 + call i32 (i8*, ...) @__isoc99_scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* nonnull %1) #3, !dbg !16 + call void @llvm.dbg.value(metadata i32 0, metadata !17, metadata !DIExpression()), !dbg !15 + %2 = load i32, i32* %1, align 4, !dbg !18 + call void @llvm.dbg.value(metadata i32 %2, metadata !14, metadata !DIExpression()), !dbg !15 + %3 = add nsw i32 %2, 2, !dbg !19 + call void @llvm.dbg.value(metadata i32 %3, metadata !17, metadata !DIExpression()), !dbg !15 + ret i32 %3, !dbg !20 +} + +declare i32 @__isoc99_scanf(i8*, ...) local_unnamed_addr #1 + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) #2 + +(...) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6, !7} +!llvm.ident = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.1.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "/home/solarowl/Downloads/tmp/frontend-example/example.c", directory: "/home/solarowl/Research/Protos/theta-c-frontend") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{i32 7, !"PIC Level", i32 2} +!7 = !{i32 7, !"PIE Level", i32 2} +!8 = !{!"clang version 11.1.0"} +!9 = distinct !DISubprogram(name: "main", scope: !10, file: !10, line: 3, type: !11, scopeLine: 3, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!10 = !DIFile(filename: "Downloads/tmp/frontend-example/example.c", directory: "/home/solarowl") +!11 = !DISubroutineType(types: !12) +!12 = !{!13} +!13 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!14 = !DILocalVariable(name: "a", scope: !9, file: !10, line: 4, type: !13) +!15 = !DILocation(line: 0, scope: !9) +!16 = !DILocation(line: 5, column: 5, scope: !9) +!17 = !DILocalVariable(name: "b", scope: !9, file: !10, line: 7, type: !13) +!18 = !DILocation(line: 8, column: 9, scope: !9) +!19 = !DILocation(line: 8, column: 10, scope: !9) +!20 = !DILocation(line: 9, column: 5, scope: !9) + +``` +There are two important parts in the IR: +- the IR version of the main function, starting from `define dso_local i32 @main() local_unnamed_addr #0 !dbg !9 {` +- and the metadata in the second half - each line starting with a `!` and a number represents some kind of metadata, e.g. `!14 = !DILocalVariable(name: "a", scope: !9, file: !10, line: 4, type: !13)` tells us, that the register annotated with `!14` is connected to the local variable `a`. + +We can also notice the SSA (single static assignment) nature of LLVM IR in this example: `!17` shows in `main` more than once, as each time `b` has a new value assigned, a new register has to be created, as virtual registers in LLVM can only be assigned once. + +### Our intermediate representation +The customized representation we use has no textual syntax, as it would be superfluous, rather it can be shown as a class hierarchy, loosely following that of LLVM IR. +It is noteworthy to mention some of the differences here: +- LLVM IR has no register/operand classes, only instructions +- We have no separate metadata classes, e.g. the register class contains the above mentioned DILocalVariable itself in the form of the variable name +- We handle only the fraction of information contained in LLVM IR, e.g. we only store the `name: "a"` part of the `!DILocalVariable` metadata, even though it contains a scope, line, filename, etc. +![type-classes](type-classes.png) + +### XCFA +At the end of our example execution we get the XCFA from Theta, which van be printed into a graphviz (dot) format: +![xcfa](xcfa.png) + +Although in the representation before the xcfa we had the registers representing `b`, during the XCFA transformation in Theta the `b = a + 2; main_ret = b` sequence was merged into `main_ret = a + 2`. diff --git a/subprojects/frontends/llvm/doc/theta-c-arch.png b/subprojects/frontends/llvm/doc/theta-c-arch.png new file mode 100644 index 0000000000..1214df0835 Binary files /dev/null and b/subprojects/frontends/llvm/doc/theta-c-arch.png differ diff --git a/subprojects/frontends/llvm/doc/type-classes.png b/subprojects/frontends/llvm/doc/type-classes.png new file mode 100644 index 0000000000..5383c01d92 Binary files /dev/null and b/subprojects/frontends/llvm/doc/type-classes.png differ diff --git a/subprojects/frontends/llvm/doc/xcfa.png b/subprojects/frontends/llvm/doc/xcfa.png new file mode 100644 index 0000000000..c72145a3a6 Binary files /dev/null and b/subprojects/frontends/llvm/doc/xcfa.png differ diff --git a/subprojects/frontends/llvm/src/main/cpp/hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider.cpp b/subprojects/frontends/llvm/src/main/cpp/hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider.cpp new file mode 100644 index 0000000000..b27908b14e --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider.cpp @@ -0,0 +1,465 @@ +#include "hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider.h" +#include "types/Module.h" +#include "utilities/CPipeline.h" + +// NOTE: don't use CLions automatic code formatting, it handles this part pretty badly + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniParseIr + * Signature: (Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL +Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniParseIr(JNIEnv* env, jobject, jstring filename) { + // Convert the JNI String (jstring) into C-String (char*) + const char *cFilename = env->GetStringUTFChars(filename, NULL); + if (NULL == cFilename) { + std::cout << "Could not get filename from jenv!" << std::endl; + return; + } + + // (compile and) parse into llvm Module + std::unique_ptr llvmModule; + llvm::SMDiagnostic error; + std::string strFilename(cFilename); + std::string filenameExtension = strFilename.substr(strFilename.length() - 2, strFilename.length()); + + if ( filenameExtension.compare(".c") || filenameExtension.compare(".i")) { + CPipeline cp = CPipeline(strFilename, "/usr/bin/clang"); + llvmModule = cp.processCProgram(); + } else { + llvmModule = parseIRFile(cFilename, error, context); + } + + if(llvmModule==nullptr) { + std::cout << "Error while parsing: null module!" << std::endl; + return; // TODO somehow communicate the error better + } + + // print the IR into a file - for debugging purposes + if(PassGroupManager::debugPrintIr==true) { + std::error_code EC; + llvm::raw_fd_ostream outIr(strFilename + ".before.ll", EC); + llvmModule->print(outIr, nullptr); + } + + // parse from llvm module to our own module + Module &module = Module::getModule(); + module.parseLLVMModule(move(llvmModule)); + // module.print(); // for debugging purposes + // Register::printLUT(); // for debugging purposes + + env->ReleaseStringUTFChars(filename, cFilename); // release string resources +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniDisableInlining + * Signature: ()V + */ +JNIEXPORT void JNICALL +Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniDisableInlining +(JNIEnv*, jobject) { + PassGroupManager::enableInlining = false; +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniDisableOptimizationPasses + * Signature: ()V + */ +JNIEXPORT void JNICALL +Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniDisableOptimizationPasses +(JNIEnv*, jobject) { + PassGroupManager::enableOptimizations = false; +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniDisableCleanupPasses + * Signature: ()V + */ +JNIEXPORT void JNICALL +Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniDisableCleanupPasses +(JNIEnv*, jobject) { + PassGroupManager::enableCleanupPasses = false; +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniDisablePrintDebugIr + * Signature: ()V + */ +JNIEXPORT void JNICALL +Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniDisablePrintDebugIr +(JNIEnv*, jobject) { + PassGroupManager::debugPrintIr = false; +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetGlobalVariablesNum + * Signature: ()I + */ +JNIEXPORT jint + +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetGlobalVariablesNum +(JNIEnv * env, jobject) { + Module &module = Module::getModule(); + return (jint)module.getNumOfGlobalVariables(); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetGlobalVariableName + * Signature: (I)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetGlobalVariableName +(JNIEnv * env, jobject, jint g) { + Module &module = Module::getModule(); + int globalVarIndex = (int) g; + return env->NewStringUTF((module.getGlobalVariable(globalVarIndex)->getName()).c_str()); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetGlobalVariableType + * Signature: (I)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetGlobalVariableType +(JNIEnv * env, jobject, jint g) { + Module &module = Module::getModule(); + int globalVarIndex = (int) g; + return env-> + NewStringUTF((module.getGlobalVariable(globalVarIndex)->getType()).c_str()); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetGlobalVariableValue + * Signature: (I)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetGlobalVariableValue +(JNIEnv * env, jobject, jint g) { + Module &module = Module::getModule(); + int globalVarIndex = (int) g; + return env->NewStringUTF((module.getGlobalVariable(globalVarIndex)->getInitialValue()).c_str()); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetFunctionsNum + * Signature: ()I + */ +JNIEXPORT jint + +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetFunctionsNum +(JNIEnv * env, jobject) { + Module &module = Module::getModule(); + return (jint) + module.getNumOfFunctions(); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetFunctionRetType + * Signature: (I)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetFunctionRetType +(JNIEnv * env, jobject, jint f) { + Module &module = Module::getModule(); + int functionIndex = (int) f; + std::string retType = module.getFunction(functionIndex)->getReturnType(); + return env->NewStringUTF(retType.c_str()); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetFunctionName + * Signature: (I)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetFunctionName +(JNIEnv * env, jobject, jint f) { + Module &module = Module::getModule(); + int functionIndex = (int) f; + return env->NewStringUTF((module.getFunction(functionIndex)->getName()).c_str()); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetNumOfFunctionParameters + * Signature: (I)I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetNumOfFunctionParameters +(JNIEnv * env, jobject, jint f) { + Module &module = Module::getModule(); + int functionIndex = (int) f; + return (jint)module.getFunction(functionIndex)->getNumOfParameters(); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetParameterType + * Signature: (II)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetParameterType +(JNIEnv * env, jobject, jint f, jint p) { + Module &module = Module::getModule(); + int functionIndex = (int) f; + int paramIndex = (int) p; + return env->NewStringUTF((module.getFunction(functionIndex)->getParameter(paramIndex)->getType()).c_str()); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetParameterName + * Signature: (II)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetParameterName +(JNIEnv * env, jobject, jint f, jint p) { + Module &module = Module::getModule(); + int functionIndex = (int) f; + int paramIndex = (int) p; + return env->NewStringUTF((module.getFunction(functionIndex)->getParameter(paramIndex)->getName()).c_str()); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetNumOfBasicBlocks + * Signature: (I)I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetNumOfBasicBlocks +(JNIEnv * env, jobject, jint f) { + Module &module = Module::getModule(); + int functionIndex = (int) f; + return (jint)module.getFunction(functionIndex)->getNumOfBasicBlocks(); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetFunctionIndex + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetFunctionIndex +(JNIEnv * env, jobject, jstring functionName) { + Module &module = Module::getModule(); + // Convert the JNI String (jstring) into C-String (char*) + const char *cFunctionName = env->GetStringUTFChars(functionName, NULL); + if (NULL == cFunctionName) return -1; // ERROR TODO (communicate somehow better?) + int funcIndex = module.findFunctionByName(cFunctionName); + env->ReleaseStringUTFChars(functionName, cFunctionName); // release string resources + + return (jint) + funcIndex; +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetBlockName + * Signature: (II)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetBlockName +(JNIEnv * env, jobject, jint f, jint b) { + Module &module = Module::getModule(); + int functionIndex = (int) f; + int basicBlockIndex = (int) b; + return env->NewStringUTF((module.getFunction(functionIndex)->getBasicBlock(basicBlockIndex)->getName()).c_str()); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetBlockIndex + * Signature: (ILjava/lang/String;)I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetBlockIndex +(JNIEnv * env, jobject, jint f, jstring basicBlockName) { + Module &module = Module::getModule(); + + // Convert the JNI String (jstring) into C-String (char*) + const char *cBasicBlockName = env->GetStringUTFChars(basicBlockName, NULL); + if (NULL == cBasicBlockName) return -1; // ERROR TODO (communicate somehow better?) + + int basicBlockIndex = module.getFunction(f)->findBasicBlockByName(cBasicBlockName); + + env->ReleaseStringUTFChars(basicBlockName, cBasicBlockName); // release string resources + + return (jint) + basicBlockIndex; +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetNumOfInstructions + * Signature: (II)I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetNumOfInstructions +(JNIEnv * env, jobject, jint f, jint b) { + Module &module = Module::getModule(); + int functionIndex = (int) f; + int bbIndex = (int) b; + return (jint)module.getFunction(functionIndex)->getBasicBlock(bbIndex)-> + + getNumOfInstructions(); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetInstructionLineNumber + * Signature: (III)I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetInstructionLineNumber +(JNIEnv * env, jobject, jint f, jint b, jint i) { + Module &module = Module::getModule(); + int functionIndex = (int) f; + int bbIndex = (int) b; + int instIndex = (int) i; + return (jint)module.getFunction(functionIndex)->getBasicBlock(bbIndex)->getInstruction(instIndex)->getLineNumber(); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetInstructionOpcode + * Signature: (III)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetInstructionOpcode +(JNIEnv * env, jobject, jint f, jint b, jint i) { + Module &module = Module::getModule(); + int functionIndex = (int) f; + int bbIndex = (int) b; + int instIndex = (int) i; + std::string opName = module.getFunction(functionIndex)->getBasicBlock(bbIndex)->getInstruction(instIndex)->getOpname(); + return env->NewStringUTF( opName.c_str() ); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetInstructionRetType + * Signature: (III)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetInstructionRetType +(JNIEnv * env, jobject, jint f, jint b, jint i) { + Module &module = Module::getModule(); + int functionIndex = (int) f; + int bbIndex = (int) b; + int instIndex = (int) i; + std::shared_ptr reg = module.getFunction(functionIndex)->getBasicBlock(bbIndex)->getInstruction(instIndex)->getRetVariable(); + std::string retType; + if ( reg ) retType = reg->getType(); + else { + retType = ""; + } + return env->NewStringUTF( retType.c_str() ); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetInstructionRetName + * Signature: (III)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetInstructionRetName +(JNIEnv * env, jobject, jint f, jint b, jint i) { + Module &module = Module::getModule(); + int functionIndex = (int) f; + int bbIndex = (int) b; + int instIndex = (int) i; + std::shared_ptr reg = module.getFunction(functionIndex)->getBasicBlock(bbIndex)->getInstruction(instIndex)->getRetVariable(); + std::string retName; + if ( reg ) { + retName = reg->getName(); + } else { + retName = ""; + } + return env->NewStringUTF( retName.c_str()); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetInstructionNumOfOperands + * Signature: (III)I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetInstructionNumOfOperands +(JNIEnv * env, jobject, jint f, jint b, jint i) { + Module &module = Module::getModule(); + int functionIndex = (int) f; + int bbIndex = (int) b; + int instIndex = (int) i; + int numOfOperands = module.getFunction(functionIndex)->getBasicBlock(bbIndex)->getInstruction(instIndex)->getNumOfOperands(); + return (jint)numOfOperands; +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetInstructionOperandVarType + * Signature: (IIII)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetInstructionOperandVarType +(JNIEnv * env, jobject, jint f, jint b, jint i, jint o) { + Module &module = Module::getModule(); + int functionIndex = (int) f; + int bbIndex = (int) b; + int instIndex = (int) i; + int operandIndex = (int) o; + std::string operandType; + + // only registers will return a proper type, all others return constant, this is then handled on the Java side in the SSAProvider + operandType = module.getFunction(functionIndex)->getBasicBlock(bbIndex)->getInstruction(instIndex)->getOperand(operandIndex)->getType(); + return env->NewStringUTF( operandType.c_str() ); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetInstructionOperandVarName + * Signature: (IIII)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetInstructionOperandVarName +(JNIEnv * env, jobject, jint f, jint b, jint i, jint o ) { + Module &module = Module::getModule(); + int functionIndex = (int) f; + int bbIndex = (int) b; + int instIndex = (int) i; + int operandIndex = (int) o; + std::string operandName = module.getFunction(functionIndex)->getBasicBlock(bbIndex)->getInstruction(instIndex)->getOperand(operandIndex)->getName(); + return env->NewStringUTF( operandName.c_str()); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetStructAnalysisResult + * Signature: ()I + */ +JNIEXPORT jboolean +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetStructAnalysisResult +(JNIEnv * env, jobject) { + return (jboolean)Analysis::getStructAnalysisResult(); +} + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetBitwiseArithmeticAnalysisResult + * Signature: ()I + */ +JNIEXPORT jboolean +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetBitwiseArithmeticAnalysisResult +(JNIEnv * env, jobject) { + return (jboolean)Analysis::getBitwiseOpAnalysisResult(); +} diff --git a/subprojects/frontends/llvm/src/main/cpp/hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider.h b/subprojects/frontends/llvm/src/main/cpp/hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider.h new file mode 100644 index 0000000000..983f51bcaa --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider.h @@ -0,0 +1,279 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider */ + +#ifndef _Included_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider +#define _Included_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniParseIr + * Signature: (Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL +Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniParseIr +(JNIEnv +*, jobject, jstring); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniDisableInlining + * Signature: ()V + */ +JNIEXPORT void JNICALL +Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniDisableInlining +(JNIEnv +*, jobject); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniDisableOptimizationPasses + * Signature: ()V + */ +JNIEXPORT void JNICALL +Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniDisableOptimizationPasses +(JNIEnv +*, jobject); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniDisableCleanupPasses + * Signature: ()V + */ +JNIEXPORT void JNICALL +Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniDisableCleanupPasses +(JNIEnv +*, jobject); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniDisablePrintDebugIr + * Signature: ()V + */ +JNIEXPORT void JNICALL +Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniDisablePrintDebugIr +(JNIEnv +*, jobject); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetGlobalVariablesNum + * Signature: ()I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetGlobalVariablesNum + (JNIEnv * , jobject); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetGlobalVariableName + * Signature: (I)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetGlobalVariableName + (JNIEnv * , jobject, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetGlobalVariableType + * Signature: (I)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetGlobalVariableType + (JNIEnv * , jobject, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetGlobalVariableValue + * Signature: (I)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetGlobalVariableValue + (JNIEnv * , jobject, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetFunctionsNum + * Signature: ()I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetFunctionsNum + (JNIEnv * , jobject); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetFunctionRetType + * Signature: (I)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetFunctionRetType + (JNIEnv * , jobject, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetFunctionName + * Signature: (I)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetFunctionName + (JNIEnv * , jobject, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetNumOfFunctionParameters + * Signature: (I)I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetNumOfFunctionParameters + (JNIEnv * , jobject, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetParameterType + * Signature: (II)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetParameterType + (JNIEnv * , jobject, jint, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetParameterName + * Signature: (II)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetParameterName + (JNIEnv * , jobject, jint, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetNumOfBasicBlocks + * Signature: (I)I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetNumOfBasicBlocks + (JNIEnv * , jobject, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetFunctionIndex + * Signature: (Ljava/lang/String;)I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetFunctionIndex + (JNIEnv * , jobject, jstring); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetBlockName + * Signature: (II)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetBlockName + (JNIEnv * , jobject, jint, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetBlockIndex + * Signature: (ILjava/lang/String;)I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetBlockIndex + (JNIEnv * , jobject, jint, jstring); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetNumOfInstructions + * Signature: (II)I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetNumOfInstructions + (JNIEnv * , jobject, jint, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetInstructionLineNumber + * Signature: (III)I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetInstructionLineNumber + (JNIEnv * , jobject, jint, jint, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetInstructionOpcode + * Signature: (III)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetInstructionOpcode + (JNIEnv * , jobject, jint, jint, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetInstructionRetType + * Signature: (III)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetInstructionRetType + (JNIEnv * , jobject, jint, jint, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetInstructionRetName + * Signature: (III)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetInstructionRetName + (JNIEnv * , jobject, jint, jint, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetInstructionNumOfOperands + * Signature: (III)I + */ +JNIEXPORT jint +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetInstructionNumOfOperands + (JNIEnv * , jobject, jint, jint, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetInstructionOperandVarType + * Signature: (IIII)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetInstructionOperandVarType + (JNIEnv * , jobject, jint, jint, jint, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetInstructionOperandVarName + * Signature: (IIII)Ljava/lang/String; + */ +JNIEXPORT jstring +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetInstructionOperandVarName + (JNIEnv * , jobject, jint, jint, jint, jint); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetStructAnalysisResult + * Signature: ()I + */ +JNIEXPORT jboolean +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetStructAnalysisResult + (JNIEnv * , jobject); + +/* + * Class: hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider + * Method: JniGetBitwiseArithmeticAnalysisResult + * Signature: ()I + */ +JNIEXPORT jboolean +JNICALL Java_hu_bme_mit_theta_llvm2xcfa_LlvmIrProvider_JniGetBitwiseArithmeticAnalysisResult + (JNIEnv * , jobject); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/BranchDbgCallPass.cpp b/subprojects/frontends/llvm/src/main/cpp/passes/BranchDbgCallPass.cpp new file mode 100644 index 0000000000..1e887b89d4 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/BranchDbgCallPass.cpp @@ -0,0 +1,61 @@ +// +// Created by solarowl on 4/27/21. +// + +#include "BranchDbgCallPass.h" + +bool BranchDbgCallPass::runOnModule(llvm::Module &M) { + std::vector < llvm::Type * > funArg = {llvm::Type::getVoidTy(M.getContext())}; + + + /* + llvm::FunctionType *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(M.getContext()), funArg, false); + brDbgFunction = llvm::Function::Create(FT, llvm::Function::ExternalLinkage, + "theta.dbg.control", M); + */ + + llvm::FunctionType *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(M.getContext()), false); + brDbgFunction = llvm::Function::Create(FT, llvm::Function::ExternalLinkage, + "theta.dbg.control", M); + + brDbgFunction->addFnAttr(llvm::Attribute::AttrKind::WillReturn); + brDbgFunction->addFnAttr(llvm::Attribute::AttrKind::NoUnwind); + brDbgFunction->addFnAttr(llvm::Attribute::AttrKind::NoDuplicate); + // brDbgFunction->addFnAttr(llvm::Attribute::AttrKind::ReadNone); + + auto &llvmFunctionList = M.getFunctionList(); + for (llvm::Function &llvmFunction : llvmFunctionList) { + if (!llvmFunction.isDeclaration()) { + iterateOnFunction(llvmFunction); + } + } + + return false; +} + +void BranchDbgCallPass::handleTerminatorInst(llvm::BasicBlock &bb) { + llvm::Instruction *term = bb.getTerminator(); + llvm::BranchInst *br; + if((br=llvm::dyn_cast(term))!=nullptr) { + // create and insert dbg call + llvm::CallInst::Create( + brDbgFunction->getFunctionType(), + brDbgFunction, + // br->getOperand(0), + "", // nameStr + br // insertBefore + ); + } + +} + +void BranchDbgCallPass::iterateOnFunction(llvm::Function &F) { + // iterate through basic blocks + llvm::BasicBlock *bb = &F.getEntryBlock(); + while (bb) { + handleTerminatorInst(*bb); + bb = bb->getNextNode(); + } +} + +llvm::Pass *createBranchDbgCallPass() { return new BranchDbgCallPass; } \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/BranchDbgCallPass.h b/subprojects/frontends/llvm/src/main/cpp/passes/BranchDbgCallPass.h new file mode 100644 index 0000000000..4ce0a4ba94 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/BranchDbgCallPass.h @@ -0,0 +1,27 @@ +// +// Created by solarowl on 4/27/21. +// + +#ifndef THETA_C_FRONTEND_BRANCHDBGCALLPASS_H +#define THETA_C_FRONTEND_BRANCHDBGCALLPASS_H + +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" + +class BranchDbgCallPass : public llvm::ModulePass { +private: + llvm::Function *brDbgFunction; + void handleTerminatorInst(llvm::BasicBlock &bb); + void iterateOnFunction(llvm::Function &F); +public: + char ID; + + BranchDbgCallPass() : llvm::ModulePass(ID) {} + + bool runOnModule(llvm::Module &M); +}; + +llvm::Pass *createBranchDbgCallPass(); + +#endif //THETA_C_FRONTEND_BRANCHDBGCALLPASS_H diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/EliminateGepPass.cpp b/subprojects/frontends/llvm/src/main/cpp/passes/EliminateGepPass.cpp new file mode 100644 index 0000000000..d1aaa906c0 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/EliminateGepPass.cpp @@ -0,0 +1,262 @@ +// +// Created by solarowl on 4/5/21. +// + +#include "EliminateGepPass.h" +#include + +// This pass tries to eliminate load/store+gep combinations and changes them to function calls +// some more special cases are also handled (2-3 long chain of geps, bitcast between load/store and gep), but not all of them +bool EliminateGepPass::checkGep(llvm::Instruction* gep) { + if(gep == nullptr) return false; // not a gep, if nullptr + + if(gep->getNumOperands()!=3 && gep->getNumOperands()!=2) { // it's a gep, but can we handle it? + std::cerr << "WARNING: Unhandled gep structure, not eliminating instruction" << std::endl; + std::cerr << "GEP Instruction: " << std::endl; + gep->print(llvm::errs()); + std::cerr << std::endl; + /* + std::cerr << std::endl << "Operands:" << std::endl; + for(int i = 0; i < gep->getNumOperands(); i++) { + std::cout << i << ": " << std::endl; + gep->getOperand(i)->print(llvm::errs()); + std::cout << std::endl; + } + */ + return false; + } else { + return true; + } +} + +// replaces the given instruction with a call, then returns the newly inserted call instruction +// (which instruction should get as a value, as the old instruction will no longer be valid!) +llvm::Instruction* EliminateGepPass::insertGetArrayElementCall(llvm::Instruction *instruction, llvm::Instruction *gep) { + llvm::BasicBlock &bb = *instruction->getParent(); + llvm::Value *operand2; + if(gep->getNumOperands()==3) { // index operands in the form: 0, operand2 + operand2 = gep->getOperand(2); + } else if(gep->getNumOperands()==2) { // index operands in the form: operand2 (in case of a malloc) + operand2 = gep->getOperand(1); + } + + std::vector < llvm::Value * > operands = { + gep->getOperand(0), // array + operand2 // idx + }; + + llvm::Function *getArrayElementFunction = fetchGetArrayElementFunction(instruction->getType(), + { gep->getOperand(0)->getType(), + operand2->getType() }, + *instruction->getModule()); + llvm::Instruction *ci = llvm::CallInst::Create( + getArrayElementFunction->getFunctionType(), + getArrayElementFunction, + operands, + "" + ); + llvm::BasicBlock::iterator ii(instruction); + llvm::ReplaceInstWithInst(bb.getInstList(), ii, ci); + instruction = ci; + removable.insert(gep); + return instruction; +} + +// replaces the given instruction with a call, then returns the newly inserted call instruction +// (which instruction should get as a value, as the old instruction will no longer be valid!) +llvm::Instruction* EliminateGepPass::insertSetArrayElementCall(llvm::Instruction *instruction, llvm::Instruction *gep) { + llvm::BasicBlock &bb = *instruction->getParent(); + + llvm::Value *operand2; + if(gep->getNumOperands()==3) { // index operands in the form: 0, operand2 + operand2 = gep->getOperand(2); + } else if(gep->getNumOperands()==2) { // index operands in the form: operand2 (in case of a malloc) + operand2 = gep->getOperand(1); + } + + std::vector < llvm::Value * > operands = { + gep->getOperand(0), // array + operand2, // idx + instruction->getOperand(0) // value to set + }; + + llvm::Function *setArrayElementFunction = fetchSetArrayElementFunction( { gep->getOperand(0)->getType(), + operand2->getType(), + instruction->getOperand(0)->getType() + }, + *instruction->getModule()); + llvm::Instruction *ci = llvm::CallInst::Create( + setArrayElementFunction->getFunctionType(), + setArrayElementFunction, + operands, + "" + ); + + llvm::BasicBlock::iterator ii(instruction); + llvm::ReplaceInstWithInst(bb.getInstList(), ii, ci); + instruction = ci; + removable.insert(gep); + return instruction; +} + +// (instruction should get the return value as a value, as the old instruction will no longer be valid!) +llvm::Instruction* EliminateGepPass::handleLoad(llvm::Instruction *instruction) { + llvm::Instruction *gep = llvm::dyn_cast(instruction->getOperand(0)); + + if(checkGep(gep)) { + // there can be a chain of geps, we'll handle double and triple here for now + + /* // TODO debug gep chaings - they cause badrefs + llvm::Instruction *gep2 = llvm::dyn_cast(gep->getOperand(0)); + if(checkGep(gep2)) { + + llvm::Instruction *gep3 = llvm::dyn_cast(gep2->getOperand(0)); + if(checkGep(gep3)) { + gep2 = insertGetArrayElementCall(gep2, gep3); + } + + gep = insertGetArrayElementCall(gep, gep2); + } + */ + instruction = insertGetArrayElementCall(instruction, gep); + } else { // we'll just not handle gep chain+bitcast combinations for now - I'm not even sure, if they exist + if(gep==nullptr) { + // handle if there is a bitcast inbetween the load and the gep + llvm::Instruction *bitcast = llvm::dyn_cast(instruction->getOperand(0)); + if(bitcast != nullptr) { + gep = llvm::dyn_cast(bitcast->getOperand(0)); + if(checkGep(gep)) { // we got a gep and we can handle its structure + gep->getOperand(0)->mutateType(bitcast->getType()); + instruction = insertGetArrayElementCall(instruction, gep); + removable.insert(bitcast); + } + } + } + } + + return instruction; +} + +// (instruction should get the return value as a value, as the old instruction will no longer be valid!) +llvm::Instruction* EliminateGepPass::handleStore(llvm::Instruction *instruction) { + llvm::Instruction *gep = llvm::dyn_cast(instruction->getOperand(1)); + + if (checkGep(gep)) { + // there can be a chain of geps, we'll handle double and triple here for now + /* // TODO debug gep chains - they cause badrefs + llvm::Instruction *gep2 = llvm::dyn_cast(gep->getOperand(0)); + if(checkGep(gep2)) { + + llvm::Instruction *gep3 = llvm::dyn_cast(gep2->getOperand(0)); + if(checkGep(gep3)) { + gep2 = insertSetArrayElementCall(gep2, gep3); + } + + gep = insertSetArrayElementCall(gep, gep2); + } + */ + + instruction = insertSetArrayElementCall(instruction, gep); + } else { + llvm::Instruction *bitcast = llvm::dyn_cast(instruction->getOperand(0)); + + if(bitcast != nullptr) { + gep = llvm::dyn_cast(bitcast->getOperand(0)); // handle, if there is a bitcast inbetween the load and the gep + if(checkGep(gep)) { + gep->getOperand(0)->mutateType(bitcast->getType()); + instruction = insertSetArrayElementCall(instruction, gep); + removable.insert(bitcast); + } + } + } + + return instruction; +} + +llvm::Function *EliminateGepPass::fetchGetArrayElementFunction(llvm::Type *retType, std::array paramType, llvm::Module &M) { + std::vector < llvm::Type * > funArg = {paramType[0], + paramType[1]}; + llvm::FunctionType *FT = llvm::FunctionType::get(retType, funArg, false); + + auto getFunc = getElementFunctions.find(FT); + if (getFunc == getElementFunctions.end()) { // this function does not exist yet, we'll need to add it + std::string typeName; + llvm::raw_string_ostream typeNameStream(typeName); + retType->print(typeNameStream); + typeNameStream.str(); + + llvm::Function *F = llvm::Function::Create(FT, llvm::Function::ExternalLinkage, + "theta.dbg.getArrayElement_" + std::to_string(getElementFunctions.size()), M); + getElementFunctions[FT] = F; + return F; + } else { + return getFunc->second; + } +} + +llvm::Function *EliminateGepPass::fetchSetArrayElementFunction(std::array paramType, llvm::Module &M) { + std::vector < llvm::Type * > funArg = {paramType[0], paramType[1], paramType[2] }; + llvm::FunctionType *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(M.getContext()), funArg, false); + auto setFunc = setElementFunctions.find(FT); + + if (setFunc == setElementFunctions.end()) { // this function does not exist yet, we'll need to add it + // create and add setArrayElement function + llvm::Function *F = llvm::Function::Create(FT, llvm::Function::ExternalLinkage, + "theta.dbg.setArrayElement_" + std::to_string(setElementFunctions.size()), M); + + setElementFunctions[FT] = F; + return F; + } else { + return setFunc->second; + } +} + +void EliminateGepPass::iterateOnBasicBlock(llvm::BasicBlock &bb) { + llvm::Instruction *instruction = bb.getFirstNonPHI(); + while (instruction != nullptr) { + if ((std::string) instruction->getOpcodeName() == "load") { + instruction = handleLoad(instruction); + + } else if ((std::string) instruction->getOpcodeName() == "store") { + instruction = handleStore(instruction); + } + instruction = instruction->getNextNonDebugInstruction(); + } + +} + +void EliminateGepPass::iterateOnFunction(llvm::Function &F) { + // iterate through basic blocks + llvm::BasicBlock *bb = &F.getEntryBlock(); + while (bb) { + iterateOnBasicBlock(*bb); + bb = bb->getNextNode(); + } +} + +bool EliminateGepPass::runOnModule(llvm::Module &M) { + // initialize getArrayElement func map + getElementFunctions = std::unordered_map(); + setElementFunctions = std::unordered_map(); + + removable = std::set(); // initialize set + + auto &llvmFunctionList = M.getFunctionList(); + for (llvm::Function &llvmFunction : llvmFunctionList) { + if (!llvmFunction.isDeclaration()) { + iterateOnFunction(llvmFunction); + } + } + + + for (llvm::Instruction *r : removable) { + if (r != nullptr && r->getParent() != nullptr) { + r->eraseFromParent(); + } + } + + return false; +} + + +llvm::Pass *createEliminateGepPass() { return new EliminateGepPass; } \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/EliminateGepPass.h b/subprojects/frontends/llvm/src/main/cpp/passes/EliminateGepPass.h new file mode 100644 index 0000000000..c9b9db2c90 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/EliminateGepPass.h @@ -0,0 +1,58 @@ +// +// Created by solarowl on 4/5/21. +// + +#ifndef JNI_LIBRARY_ELIMINATEGEPPASS_H +#define JNI_LIBRARY_ELIMINATEGEPPASS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Pass.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/IR/InstrTypes.h" + +#include +#include +#include +#include + +class EliminateGepPass : public llvm::ModulePass { +private: + bool checkGep(llvm::Instruction* gep); + llvm::Instruction* handleLoad(llvm::Instruction *instruction); + llvm::Instruction* handleStore(llvm::Instruction *instruction); + + llvm::Instruction* insertGetArrayElementCall(llvm::Instruction *instruction, llvm::Instruction *gep); + llvm::Instruction* insertSetArrayElementCall(llvm::Instruction *instruction, llvm::Instruction *gep); + + void iterateOnBasicBlock(llvm::BasicBlock &bb); + void iterateOnFunction(llvm::Function &F); + + // llvm::Function *setF = nullptr; + // The return type has to be right and the parameter types should be right, so we may need more than one set/getArrayElement functions + std::unordered_map getElementFunctions; + std::unordered_map setElementFunctions; + + llvm::Function *fetchGetArrayElementFunction(llvm::Type *retType, std::array paramType, llvm::Module &M); + llvm::Function *fetchSetArrayElementFunction(std::array paramType, llvm::Module &M); + + std::set removable; +public: + char ID; + + EliminateGepPass() : llvm::ModulePass(ID) {} + + bool runOnModule(llvm::Module &M); +}; + +llvm::Pass *createEliminateGepPass(); + +#endif //JNI_LIBRARY_ELIMINATEGEPPASS_H diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/EliminatePhiNodes.cpp b/subprojects/frontends/llvm/src/main/cpp/passes/EliminatePhiNodes.cpp new file mode 100644 index 0000000000..13e4953587 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/EliminatePhiNodes.cpp @@ -0,0 +1,67 @@ +#include "EliminatePhiNodes.h" +#include +#include +#include +#include +#include + +/* + * Eliminates phi nodes where it makes sense (no non-dominated predecessor block) + */ +bool EliminatePhiNodes::runOnFunction(llvm::Function &F) +{ + bool first = true; + llvm::AllocaInst* alloca; + llvm::DominatorTree domTree{F}; + llvm::IntegerType *Ty = llvm::IntegerType::get(F.getContext(), 64); + std::set alreadyStoringPointer; + for (llvm::Function::iterator FI = F.begin(), E = F.end(); FI != E; ++FI) + { + for (llvm::Instruction* I = &*(FI->begin()); I != nullptr; I = I->getNextNonDebugInstruction()) + { + if(first) { + + alloca = new llvm::AllocaInst(Ty, 0, (std::string)"lastblock", &*I); + first = false; + } + llvm::PHINode* phi; + if ((phi = llvm::dyn_cast(I))) { + unsigned num = phi->getNumIncomingValues(); + llvm::Value* val1 = phi->getIncomingValue(0); + llvm::Value* val2 = phi->getIncomingValue(1); + llvm::Instruction* inst1, *inst2; + inst1 = llvm::dyn_cast(val1); + inst2 = llvm::dyn_cast(val2); + if(num == 2 && ((inst1==nullptr) || domTree.dominates(inst1, phi)) && ((inst2==nullptr) || domTree.dominates(inst2, phi))) { + llvm::BasicBlock* incoming = phi->getIncomingBlock(0); + llvm::LoadInst* load = new llvm::LoadInst(Ty, alloca, (std::string)"lastblock", &*I); + llvm::ConstantInt* constant = llvm::ConstantInt::get(Ty, (long long)incoming); + llvm::ICmpInst* icmp = new llvm::ICmpInst(&*I, llvm::CmpInst::Predicate::ICMP_EQ, load, constant); + llvm::SelectInst* select = llvm::SelectInst::Create(icmp, phi->getIncomingValue(0), phi->getIncomingValue(1), (std::string)"lastblock"); + + if(alreadyStoringPointer.find(incoming)==alreadyStoringPointer.end()) { + new llvm::StoreInst(llvm::ConstantInt::get(Ty, (long long)incoming), alloca, incoming->getTerminator()); + alreadyStoringPointer.insert(incoming); + } + llvm::BasicBlock* otherIncoming = phi->getIncomingBlock(1); + if(alreadyStoringPointer.find(otherIncoming)==alreadyStoringPointer.end()) { + new llvm::StoreInst(llvm::ConstantInt::get(Ty, (long long)otherIncoming), alloca, otherIncoming->getTerminator()); + alreadyStoringPointer.insert(otherIncoming); + } + + llvm::ReplaceInstWithInst(phi, select); + I = select; + } else if(phi != &*(FI->begin())){ + I = phi->getPrevNode(); + phi->moveBefore(&*(FI->begin())); + } + } + } + + } + return false; +} + +llvm::Pass *createPhiEliminationPass() { + return new EliminatePhiNodes(); +} \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/EliminatePhiNodes.h b/subprojects/frontends/llvm/src/main/cpp/passes/EliminatePhiNodes.h new file mode 100644 index 0000000000..2c760ea945 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/EliminatePhiNodes.h @@ -0,0 +1,32 @@ +#ifndef JNI_LIBRARY_ELIMINATEPHINODES_H +#define JNI_LIBRARY_ELIMINATEPHINODES_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Pass.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +class EliminatePhiNodes : public llvm::FunctionPass { + char ID; +public: + EliminatePhiNodes() + : llvm::FunctionPass(ID) {} + + virtual bool runOnFunction(llvm::Function &f); +}; + +llvm::Pass *createPhiEliminationPass(); + +#endif \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/EliminateVariables.cpp b/subprojects/frontends/llvm/src/main/cpp/passes/EliminateVariables.cpp new file mode 100644 index 0000000000..6a204a6830 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/EliminateVariables.cpp @@ -0,0 +1,25 @@ +#include "EliminateVariables.h" + +/* + * Eliminates call statements' unused return values. + * Such functions must not have a name, otherwise verification fails. + */ +bool EliminateVariables::runOnFunction(llvm::Function &F) +{ + for (llvm::Function::iterator FI = F.begin(), E = F.end(); FI != E; ++FI) + { + for (llvm::BasicBlock::iterator I = FI->begin(), E = FI->end(); I != E; ++I) + { + llvm::CallInst* call; + if ((call = llvm::dyn_cast(I)) && I->user_begin() == I->user_end() && call->getType() != llvm::Type::getVoidTy(F.getContext())) { + call->mutateType(llvm::Type::getVoidTy(F.getContext())); + call->setValueName(nullptr); + } + } + } + return false; +} + +llvm::Pass *createEliminationPass() { + return new EliminateVariables(); +} \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/EliminateVariables.h b/subprojects/frontends/llvm/src/main/cpp/passes/EliminateVariables.h new file mode 100644 index 0000000000..4fbbd96286 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/EliminateVariables.h @@ -0,0 +1,32 @@ +#ifndef JNI_LIBRARY_ELIMINATEVARIABLES_H +#define JNI_LIBRARY_ELIMINATEVARIABLES_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Pass.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +class EliminateVariables : public llvm::FunctionPass { + char ID; +public: + EliminateVariables() + : llvm::FunctionPass(ID) {} + + virtual bool runOnFunction(llvm::Function &f); +}; + +llvm::Pass *createEliminationPass(); + +#endif \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/ToposortPass.cpp b/subprojects/frontends/llvm/src/main/cpp/passes/ToposortPass.cpp new file mode 100644 index 0000000000..0cce7a1c4b --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/ToposortPass.cpp @@ -0,0 +1,35 @@ +// +// Created by solarowl on 4/5/21. +// + +#include "ToposortPass.h" + +bool ToposortPass::runOnFunction(llvm::Function &f) { + for (llvm::scc_iterator < llvm::Function * > i = scc_begin(&f); i != scc_end(&f); ++i) { + const std::vector stronglyConnectedBbs = *i; + for (std::vector::const_iterator j = stronglyConnectedBbs.begin(); + j != stronglyConnectedBbs.end(); ++j) { + int lastpos = -1; + for (auto k = pred_begin(*j), kend = pred_end(*j); k != kend; ++k) { + std::vector::const_iterator it; + if ((it = std::find(stronglyConnectedBbs.begin(), stronglyConnectedBbs.end(), *k)) != + stronglyConnectedBbs.end()) { + if (it - stronglyConnectedBbs.begin() > lastpos) { + lastpos = it - stronglyConnectedBbs.begin(); + } + } else { + lastpos = -1; + break; + } + } + if (lastpos != -1) { + (*j)->moveAfter(stronglyConnectedBbs.at(lastpos)); + } + } + } + return false; +} + +llvm::Pass *createToposortPass() { + return new ToposortPass(); +} diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/ToposortPass.h b/subprojects/frontends/llvm/src/main/cpp/passes/ToposortPass.h new file mode 100644 index 0000000000..5b36f28473 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/ToposortPass.h @@ -0,0 +1,42 @@ +// +// Created by solarowl on 4/5/21. +// + +#ifndef JNI_LIBRARY_TOPOSORTPASS_H +#define JNI_LIBRARY_TOPOSORTPASS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Pass.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +// Sorts the blocks in strongly connected components based on topological order +// Basic block loops need to have an entry block (to the loop), but if this block isn't +// the first in the IR, we have the potential problem, that when iterating through the blocks, +// we find a register on a (non-phi node) instruction's right hand side, which isn't assigned yet. +// (Of course we won't run into this, when running the IR, but it's problematic, when we are transforming it into an XCFA) +// This pass eliminates the above given possibility +class ToposortPass : public llvm::FunctionPass { + char ID; +public: + ToposortPass() + : llvm::FunctionPass(ID) {} + + virtual bool runOnFunction(llvm::Function &f); +}; + +llvm::Pass *createToposortPass(); + +#endif //JNI_LIBRARY_TOPOSORTPASS_H diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/TransformHandlesToIntPass.cpp b/subprojects/frontends/llvm/src/main/cpp/passes/TransformHandlesToIntPass.cpp new file mode 100644 index 0000000000..3c209de22f --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/TransformHandlesToIntPass.cpp @@ -0,0 +1,79 @@ +// +// Created by solarowl on 5/10/21. +// + +#include "TransformHandlesToIntPass.h" +#include + +bool TransformHandlesToIntPass::runOnModule(llvm::Module &M) { + if(M.getFunction("pthread_create")==nullptr) { + return false; + } // no pthread create in program + + llvm::Function * pthreadCreate = M.getFunction("pthread_create"); + std::vector pthreadCreateParams; + for(llvm::Type *type : pthreadCreate->getFunctionType()->params()) { + pthreadCreateParams.push_back(type); + } + pthreadCreateParams.erase(pthreadCreateParams.begin()+1); + + // create thread has 4 parameters, we change the first one to an int + llvm::FunctionType *thetaCreateThreadType = llvm::FunctionType::get(pthreadCreate->getReturnType(), + pthreadCreateParams, + false); + + thetaCreateThreadFunction = llvm::Function::Create(thetaCreateThreadType, llvm::Function::ExternalLinkage, + "theta_pthread_create", M); + + auto &llvmFunctionList = M.getFunctionList(); + for (llvm::Function &llvmFunction : llvmFunctionList) { + if (!llvmFunction.isDeclaration()) { + iterateOnFunction(llvmFunction); + } + } + return false; +} + + +void TransformHandlesToIntPass::iterateOnBasicBlock(llvm::BasicBlock &bb) { + llvm::Instruction *instruction = bb.getFirstNonPHI(); + while (instruction != nullptr) { + if (instruction->getOpcode() == llvm::Instruction::Call && + instruction->getOperand(instruction->getNumOperands() - 1)->getName().str() == "pthread_create") { + instruction = handlePthreadCreate(instruction); + } + instruction = instruction->getNextNonDebugInstruction(); + } + +} + +void TransformHandlesToIntPass::iterateOnFunction(llvm::Function &F) { + // iterate through basic blocks + llvm::BasicBlock *bb = &F.getEntryBlock(); + while (bb) { + iterateOnBasicBlock(*bb); + bb = bb->getNextNode(); + } +} + +llvm::Instruction* TransformHandlesToIntPass::handlePthreadCreate(llvm::Instruction *instruction) { + llvm::BasicBlock &bb = *instruction->getParent(); + std::vector < llvm::Value * > operands = { + instruction->getOperand(0), + instruction->getOperand(2), + instruction->getOperand(3) + }; + + llvm::Instruction *ci = llvm::CallInst::Create( + thetaCreateThreadFunction->getFunctionType(), + thetaCreateThreadFunction, + operands, + "" + ); + llvm::BasicBlock::iterator ii(instruction); + llvm::ReplaceInstWithInst(bb.getInstList(), ii, ci); + instruction = ci; + return instruction; +} + +llvm::Pass *createTransformHandlesToIntPass() { return new TransformHandlesToIntPass; } diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/TransformHandlesToIntPass.h b/subprojects/frontends/llvm/src/main/cpp/passes/TransformHandlesToIntPass.h new file mode 100644 index 0000000000..39dcdcc6b8 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/TransformHandlesToIntPass.h @@ -0,0 +1,47 @@ +// +// Created by solarowl on 5/10/21. +// + +#ifndef THETA_C_FRONTEND_TRANSFORMHANDLESTOINT_H +#define THETA_C_FRONTEND_TRANSFORMHANDLESTOINT_H + +// TODO superfluous imports +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Pass.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/IR/InstrTypes.h" +#include +#include + +class TransformHandlesToIntPass : public llvm::ModulePass { +private: + llvm::Function * thetaCreateThreadFunction; + + llvm::FunctionType *mutatedType; + llvm::Instruction* handlePthreadCreate(llvm::Instruction *instruction); + + void iterateOnBasicBlock(llvm::BasicBlock &bb); + void iterateOnFunction(llvm::Function &F); +public: + char ID; + + TransformHandlesToIntPass() : llvm::ModulePass(ID) {} + + bool runOnModule(llvm::Module &M); + +}; + +llvm::Pass *createTransformHandlesToIntPass(); + +// pthread_create + +#endif //THETA_C_FRONTEND_TRANSFORMHANDLESTOINT_H diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/gazer/Inline.cpp b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/Inline.cpp new file mode 100644 index 0000000000..9e1d64edaf --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/Inline.cpp @@ -0,0 +1,151 @@ +//==-------------------------------------------------------------*- C++ -*--==// +// +// Copyright 2019 Contributors to the Gazer project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +// +/// \file This file implements our own inlining pass, which is more restricted, +/// but faster than the inlining utilities found in LLVM. +// +//===----------------------------------------------------------------------===// + +#include "TransformUtils.h" + +#include "Passes.h" + +#include +#include +#include +#include +#include + +#define DEBUG_TYPE "SimplifiedInliner" + +using namespace gazer; + +namespace { + + class InlinePass : public llvm::ModulePass { + public: + static char ID; + + public: + InlinePass(llvm::Function *entry, InlineLevel level) + : ModulePass(ID), mEntryFunction(entry), mLevel(level) { + assert(mEntryFunction != nullptr); + } + + void getAnalysisUsage(llvm::AnalysisUsage &au) const override { + au.addRequired(); + } + + bool runOnModule(llvm::Module &module) override; + + llvm::StringRef getPassName() const override { + return "Simplified inling"; + } + + private: + bool shouldInlineFunction(llvm::CallGraphNode *target, unsigned allowedRefs); + + llvm::Function *mEntryFunction; + InlineLevel mLevel; + }; + +} // end anonymous namespace + +char InlinePass::ID; + +bool InlinePass::shouldInlineFunction(llvm::CallGraphNode *target, unsigned allowedRefs) { + bool viable = llvm::isInlineViable(*target->getFunction()).isSuccess(); + viable |= !isRecursive(target); + + if (target->getFunction()->getName() == "reach_error") { + return false; + } + + if (!viable) { + return false; + } + + if (mLevel == InlineLevel::All) { + // This setting requires inlining all non-recursive viable calls. + return true; + } + + // On the default setting we only want to inline functions which are + // non-recursive, used only once and do not have variable argument lists. + if (target->getFunction()->isVarArg()) { + return false; + } + + // If the target has fewer references than the threshold, inline it. + if (target->getNumReferences() <= allowedRefs) { + return true; + } + + // Check the function if it is small enough the inline it below the threshold. + + return false; +} + +bool InlinePass::runOnModule(llvm::Module &module) { + if (mLevel == InlineLevel::Off) { + return false; + } + + bool changed = false; + llvm::CallGraph &cg = getAnalysis().getCallGraph(); + + llvm::InlineFunctionInfo ifi(&cg); + llvm::SmallVector < llvm::CallBase * , 16 > wl; + + llvm::CallGraphNode *entryCG = cg[mEntryFunction]; + + for (auto &tup : *entryCG) { + auto &call = tup.first; + auto &target = tup.second; + if (this->shouldInlineFunction(target, 1)) { + LLVM_DEBUG(llvm::dbgs() << "Decided to inline call " << *call << " to target " + << target->getFunction()->getName() << "\n"); + wl.emplace_back(llvm::dyn_cast(*call)); + } + } + + while (!wl.empty()) { + llvm::CallBase *cs = wl.pop_back_val(); + bool success = llvm::InlineFunction(*cs, ifi).isSuccess(); + changed |= success; + + for (llvm::Value *newCall : ifi.InlinedCalls) { + llvm::CallBase *newCS = llvm::dyn_cast(newCall); + auto callee = newCS->getCalledFunction(); + if (callee == nullptr) { + continue; + } + + llvm::CallGraphNode *calleeNode = cg[callee]; + if (this->shouldInlineFunction(calleeNode, 2)) { + wl.emplace_back(newCS); + } + } + } + + return changed; +} + +llvm::Pass *gazer::createSimpleInlinerPass(llvm::Function &entry, InlineLevel level) { + return new InlinePass(&entry, level); +} diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/gazer/Intrinsics.cpp b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/Intrinsics.cpp new file mode 100644 index 0000000000..dfc92d087a --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/Intrinsics.cpp @@ -0,0 +1,127 @@ +//==-------------------------------------------------------------*- C++ -*--==// +// +// Copyright 2019 Contributors to the Gazer project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// + +#include "Intrinsics.h" + +#include + +using namespace gazer; + +static std::string getOverloadedFunctionName(llvm::StringRef prefix, llvm::Type *type) { + assert(type != nullptr); + + std::string nameBuffer; + llvm::raw_string_ostream rso(nameBuffer); + rso << prefix; + type->print(rso, false, true); + rso.flush(); + + return rso.str(); +} + +/* +llvm::FunctionCallee GazerIntrinsic::GetOrInsertFunctionEntry(llvm::Module& module, llvm::ArrayRef args) +{ + std::vector funArgs; + funArgs.push_back(llvm::Type::getMetadataTy(module.getContext())); + funArgs.insert(funArgs.end(), args.begin(), args.end()); + + auto funTy = llvm::FunctionType::get(llvm::Type::getVoidTy(module.getContext()), funArgs, false); + + std::string buffer; + llvm::raw_string_ostream rso{buffer}; + + rso << FunctionEntryPrefix; + for (auto& arg : args) { + rso << '.'; + arg->print(rso, false, true); + } + rso.flush(); + + return module.getOrInsertFunction( + rso.str(), + funTy + ); +} + +llvm::FunctionCallee GazerIntrinsic::GetOrInsertFunctionReturnVoid(llvm::Module& module) +{ + return module.getOrInsertFunction( + FunctionReturnVoidName, + llvm::Type::getVoidTy(module.getContext()), + llvm::Type::getMetadataTy(module.getContext()) + ); +} + +llvm::FunctionCallee GazerIntrinsic::GetOrInsertFunctionCallReturned(llvm::Module& module) +{ + return module.getOrInsertFunction( + FunctionCallReturnedName, + llvm::Type::getVoidTy(module.getContext()), + llvm::Type::getMetadataTy(module.getContext()) + ); +} + +llvm::FunctionCallee GazerIntrinsic::GetOrInsertFunctionReturnValue(llvm::Module& module, llvm::Type* type) +{ + // Insert a new function for this mark type + return module.getOrInsertFunction( + getOverloadedFunctionName(FunctionReturnValuePrefix, type), + llvm::Type::getVoidTy(module.getContext()), + llvm::Type::getMetadataTy(module.getContext()), + type + ); +} +*/ +llvm::FunctionCallee GazerIntrinsic::GetOrInsertInlinedGlobalWrite(llvm::Module &module, llvm::Type *type) { + return module.getOrInsertFunction( + getOverloadedFunctionName("gazer.inlined_global.write.", type), + // getOverloadedFunctionName(InlinedGlobalWritePrefix, type), // TODO debug this undefined ref to Intrinsics.h + llvm::Type::getVoidTy(module.getContext()), + type, + llvm::Type::getMetadataTy(module.getContext()) + ); +} +/* +llvm::FunctionCallee GazerIntrinsic::GetOrInsertOverflowCheck(llvm::Module& module, Overflow kind, llvm::Type* type) +{ + std::string name; + + switch (kind) { + case Overflow::SAdd: name = SAddNoOverflowPrefix; break; + case Overflow::UAdd: name = UAddNoOverflowPrefix; break; + case Overflow::SSub: name = SSubNoOverflowPrefix; break; + case Overflow::USub: name = USubNoOverflowPrefix; break; + case Overflow::SMul: name = SMulNoOverflowPrefix; break; + case Overflow::UMul: name = UMulNoOverflowPrefix; break; + default: + llvm_unreachable("Unknown overflow kind!"); + } + + llvm::raw_string_ostream rso(name); + type->print(rso, false, true); + rso.flush(); + + return module.getOrInsertFunction( + name, + llvm::Type::getInt1Ty(module.getContext()), + type, + type + ); +} + */ \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/gazer/Intrinsics.h b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/Intrinsics.h new file mode 100644 index 0000000000..daa683efe2 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/Intrinsics.h @@ -0,0 +1,84 @@ +//==-------------------------------------------------------------*- C++ -*--==// +// +// Copyright 2019 Contributors to the Gazer project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// \file This file describes the various intrinsic functions that +/// Gazer uses for instrumentation. +#ifndef _GAZER_LLVM_INSTRUMENTATION_INTRINSICS_H +#define _GAZER_LLVM_INSTRUMENTATION_INTRINSICS_H + +#include +#include + +#include + +namespace gazer { + + class GazerIntrinsic { + public: + /* + static constexpr char FunctionEntryPrefix[] = "gazer.function.entry"; + static constexpr char FunctionReturnVoidName[] = "gazer.function.return_void"; + static constexpr char FunctionCallReturnedName[] = "gazer.function.call_returned"; + static constexpr char FunctionReturnValuePrefix[] = "gazer.function.return_value."; + */ + // static constexpr char InlinedGlobalWritePrefix[] = "gazer.inlined_global.write."; // TODO debug this - undefined reference to this from Intrinsics.cpp + + /* + static constexpr char NoOverflowPrefix[] = "gazer.no_overflow"; + + static constexpr char SAddNoOverflowPrefix[] = "gazer.no_overflow.sadd."; + static constexpr char SSubNoOverflowPrefix[] = "gazer.no_overflow.ssub."; + static constexpr char SMulNoOverflowPrefix[] = "gazer.no_overflow.smul."; + static constexpr char SDivNoOverflowPrefix[] = "gazer.no_overflow.sdiv."; + + static constexpr char UAddNoOverflowPrefix[] = "gazer.no_overflow.uadd."; + static constexpr char USubNoOverflowPrefix[] = "gazer.no_overflow.usub."; + static constexpr char UMulNoOverflowPrefix[] = "gazer.no_overflow.umul."; + + enum class Overflow + { + SAdd, UAdd, SSub, USub, SMul, UMul + }; + */ + public: + static llvm::CallInst *CreateInlinedGlobalWrite(llvm::Value *value, llvm::DIGlobalVariable *gv); + //static llvm::CallInst* CreateFunctionEntry(llvm::Module& module, llvm::DISubprogram* dsp = nullptr); + + public: + /// Returns a 'gazer.function.entry(metadata fn_name, args...)' intrinsic. + //static llvm::FunctionCallee GetOrInsertFunctionEntry(llvm::Module& module, llvm::ArrayRef args); + + /// Returns a 'gazer.function.return_void(metadata fn_name)' intrinsic. + //static llvm::FunctionCallee GetOrInsertFunctionReturnVoid(llvm::Module& module); + + /// Returns a 'gazer.function.call_returned(metadata fn_name)' intrinsic. + //static llvm::FunctionCallee GetOrInsertFunctionCallReturned(llvm::Module& module); + + /// Returns a 'gazer.function.return_value.T(metadata fn_name, T retval)' intrinsic, + /// where 'T' is the given return type. + //static llvm::FunctionCallee GetOrInsertFunctionReturnValue(llvm::Module& module, llvm::Type* type); + + /// Returns a 'gazer.inlined_global_write.T(T value, metadata gv_name)' intrinsic. + static llvm::FunctionCallee GetOrInsertInlinedGlobalWrite(llvm::Module &module, llvm::Type *type); + + /// Returns a 'gazer.KIND.no_overflow.T(T left, T right)' intrinsic. + //static llvm::FunctionCallee GetOrInsertOverflowCheck(llvm::Module& module, Overflow kind, llvm::Type* type); + }; + +} + +#endif \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/gazer/Passes.h b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/Passes.h new file mode 100644 index 0000000000..0853cb5a0c --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/Passes.h @@ -0,0 +1,50 @@ +//==-------------------------------------------------------------*- C++ -*--==// +// +// Copyright 2019 Contributors to the Gazer project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#ifndef GAZER_CORE_TRANSFORM_PASSES_H +#define GAZER_CORE_TRANSFORM_PASSES_H + +#include + +namespace gazer { + +/// InlineGlobalVariables - This pass inlines all global variables into +/// the main function of the program. +// llvm::Pass *createInlineGlobalVariablesPass(); + +/// This pass combines each 'gazer.error_code' call within the function +/// into a single one. +// llvm::Pass* createLiftErrorCallsPass(llvm::Function& entry); + +/// This pass normalizes some known verifier calls into a uniform format. +// llvm::Pass* createNormalizeVerifierCallsPass(); + +// Added from LLVMFrontendSettings.h + enum class InlineLevel { + Off, ///< Do not inline procedures + Default, ///< Inline non-recursive, used-only-once procedures + All ///< Inline all non-recursive procedures + }; + +/// A simpler (and more restricted) inlining pass. + llvm::Pass *createSimpleInlinerPass(llvm::Function &entry, InlineLevel level); + +// llvm::Pass* createCanonizeLoopExitsPass(); + +} + +#endif diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/gazer/TransformUtils.cpp b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/TransformUtils.cpp new file mode 100644 index 0000000000..5adc8d866e --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/TransformUtils.cpp @@ -0,0 +1,41 @@ +//==-------------------------------------------------------------*- C++ -*--==// +// +// Copyright 2019 Contributors to the Gazer project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// + +#include "TransformUtils.h" + +#include + +namespace gazer { + + bool isRecursive(llvm::CallGraphNode *target) { + // We wish to identify the cases of direct AND indirect static + // recursion. We do not bother with function pointers and + // external calls. + auto begin = llvm::scc_begin(target); + auto end = llvm::scc_end(target); + + for (auto it = begin; it != end; ++it) { + if (it.hasCycle()) { + return true; + } + } + + return false; + } + +} \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/gazer/TransformUtils.h b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/TransformUtils.h new file mode 100644 index 0000000000..ca59482ef3 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/TransformUtils.h @@ -0,0 +1,27 @@ +//==-------------------------------------------------------------*- C++ -*--==// +// +// Copyright 2019 Contributors to the Gazer project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include + +namespace gazer { + +/// Returns true if the given call graph node represents a recursive function. +/// This function does not take dynamic properties (e.g. function pointers) +/// into account. + bool isRecursive(llvm::CallGraphNode *target); + +} \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/gazer/UndefToNondet.cpp b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/UndefToNondet.cpp new file mode 100644 index 0000000000..41eba3872a --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/UndefToNondet.cpp @@ -0,0 +1,92 @@ +// +// Created by solarowl on 4/3/21. +// + +//==-------------------------------------------------------------*- C++ -*--==// +// +// Copyright 2019 Contributors to the Gazer project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +#include "UndefToNondet.h" +#include + +#include +#include +#include +#include + +using namespace gazer; + +static llvm::FunctionCallee getUndefFunction(llvm::Type *type, llvm::Module *module) { + std::string nameBuffer; + llvm::raw_string_ostream rso(nameBuffer); + type->print(rso, false, true); + rso.flush(); + + auto name = "gazer.undef_value." + rso.str(); + + return module->getOrInsertFunction(name, llvm::FunctionType::get(type, false)); +} + +static bool replaceUndefsWithCalls(llvm::Function &function) { + bool changed = false; + auto module = function.getParent(); + + for (llvm::BasicBlock &bb : function) { + for (llvm::Instruction &inst : bb) { + for (size_t i = 0; i < inst.getNumOperands(); ++i) { + auto operand = inst.getOperand(i); + if (auto undef = llvm::dyn_cast(operand)) { + llvm::FunctionCallee func = getUndefFunction(undef->getType(), module); + + // Found an undef, insert an instruction. + llvm::CallInst *call = llvm::CallInst::Create(func.getFunctionType(), func.getCallee(), "undefv"); + call->copyMetadata(inst); + + if (auto phi = llvm::dyn_cast(&inst)) { + // If the instruction is a PHI node, insert the call before the terminator of the + // corresponding predecessor block. + auto pred = phi->getIncomingBlock(i); + pred->getInstList().insert(pred->getTerminator()->getIterator(), call); + } else { + // Otherwise we can just insert it before the current instruction. + bb.getInstList().insert(inst.getIterator(), call); + } + + inst.setOperand(i, call); + + changed |= true; + } + } + } + } + + return changed; +} + +char UndefToNondetCallPass::ID; + +bool UndefToNondetCallPass::runOnModule(llvm::Module &module) { + bool changed = false; + + for (llvm::Function &function : module) { + changed |= replaceUndefsWithCalls(function); + } + + return changed; +} + + +llvm::Pass *gazer::createPromoteUndefsPass() { return new UndefToNondetCallPass(); } diff --git a/subprojects/frontends/llvm/src/main/cpp/passes/gazer/UndefToNondet.h b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/UndefToNondet.h new file mode 100644 index 0000000000..4b2d1d8c04 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/passes/gazer/UndefToNondet.h @@ -0,0 +1,46 @@ +// +// Created by solarowl on 4/3/21. +// + +#ifndef JNI_LIBRARY_UNDEFTONONDET_H +#define JNI_LIBRARY_UNDEFTONONDET_H + +//==-------------------------------------------------------------*- C++ -*--==// +// +// Copyright 2019 Contributors to the Gazer project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// + +#include + +namespace gazer { + +/// This pass turns undef values into nondetermistic functions calls, +/// forcing the optimizer to be more careful around undefined behavior. + class UndefToNondetCallPass : public llvm::ModulePass { + public: + static char ID; + + UndefToNondetCallPass() + : ModulePass(ID) {} + + bool runOnModule(llvm::Module &module) override; + }; + + llvm::Pass *createPromoteUndefsPass(); + +} + +#endif //JNI_LIBRARY_UNDEFTONONDET_H diff --git a/subprojects/frontends/llvm/src/main/cpp/types/BasicBlock.cpp b/subprojects/frontends/llvm/src/main/cpp/types/BasicBlock.cpp new file mode 100644 index 0000000000..1face86b45 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/BasicBlock.cpp @@ -0,0 +1,52 @@ +// +// Created by solarowl on 3/30/21. +// + +#include "BasicBlock.h" + +// BasicBlock +int BasicBlock::nameCounter = 0; +std::unordered_map> +BasicBlock::LUT = std::unordered_map < llvm::BasicBlock *, std::shared_ptr> +(); + +void BasicBlock::reset() { + LUT = std::unordered_map < llvm::BasicBlock *, std::shared_ptr>(); +} + +void BasicBlock::init() { + initialized = true; + numOfInstructions = 0; + + instructions = std::vector < std::shared_ptr < Instruction >> (); + for (llvm::Instruction &inst : basicBlock.getInstList()) { // TODO not the nicest if-else tree in the world - restructure, when refactoring + if (inst.getOpcode() == llvm::Instruction::Call && + inst.getOperand(inst.getNumOperands() - 1)->getName().str() == "llvm.dbg.declare") { + // special case - llvm.dbg.declare call, we handle the info in it here and won't create an Instruction from it + // Instruction::handleLlvmDbgDeclare(inst); + } else if (inst.getOpcode() == llvm::Instruction::Call && + inst.getOperand(inst.getNumOperands() - 1)->getName().str() == "llvm.dbg.value") { + // Instruction::handleLlvmDbgValue(inst); + // ignore these + } else if (inst.getOpcode() == llvm::Instruction::Call && + inst.getOperand(inst.getNumOperands() - 1)->getName().str() == "llvm.dbg.label") { + // we'll ignore these for now - if we'll ever need this, we'll get the label name out somehow here + } else { + instructions.push_back(std::make_shared(inst)); + numOfInstructions++; // easiest way to get this info seems to be counting them + } + } +} + +void BasicBlock::addToLut(llvm::BasicBlock *bbPtr, std::shared_ptr basicBlock) { + LUT[bbPtr] = basicBlock; +} + +std::shared_ptr BasicBlock::findBasicBlock(llvm::BasicBlock *key) { + auto it = LUT.find(key); + if (it == LUT.end()) { + return nullptr; + } else { + return it->second; + } +} \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/types/BasicBlock.h b/subprojects/frontends/llvm/src/main/cpp/types/BasicBlock.h new file mode 100644 index 0000000000..849e6bfedd --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/BasicBlock.h @@ -0,0 +1,60 @@ +// +// Created by solarowl on 3/30/21. +// + +#ifndef JNI_LIBRARY_BASICBLOCK_H +#define JNI_LIBRARY_BASICBLOCK_H + +#include "LlvmImports.h" + +#include +#include +#include "Instruction.h" + +class BasicBlock { +private: + bool initialized = false; + std::string name; // if unnamed, a generated (block_x) name is generated + int numOfInstructions; + std::vector > instructions; + llvm::BasicBlock &basicBlock; + static int nameCounter; + static std::unordered_map> LUT; + +public: + BasicBlock(llvm::BasicBlock &basicBlock) : basicBlock(basicBlock) { + std::string bbName = basicBlock.getName().str(); + if (bbName == "") { + this->name = "block_" + std::to_string(nameCounter); + nameCounter++; + } else { + this->name = bbName; + } + } + + static void reset(); + + void init(); + + int getNumOfInstructions() { return numOfInstructions; } + + std::string getName() { return name; } + + std::shared_ptr getInstruction(int i) { return instructions[i]; } + + bool isInitialized() { return initialized; } + + void print() { + std::cout << "BasicBlock, name: " << name << ", numOfInstructions: " << numOfInstructions << ", instructions: " + << std::endl; + for (auto inst : instructions) { inst->print(); } + std::cout << std::endl; + } + + static std::shared_ptr findBasicBlock(llvm::BasicBlock *key); + + static void addToLut(llvm::BasicBlock *bbPtr, std::shared_ptr basicBlock); +}; + + +#endif //JNI_LIBRARY_BASICBLOCK_H diff --git a/subprojects/frontends/llvm/src/main/cpp/types/Function.cpp b/subprojects/frontends/llvm/src/main/cpp/types/Function.cpp new file mode 100644 index 0000000000..2744400063 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/Function.cpp @@ -0,0 +1,50 @@ +// +// Created by solarowl on 3/30/21. +// + +#include "Function.h" + +// Function +Function::Function(llvm::Function &llvmFunction) { + this->name = llvmFunction.getName().str(); + std::string retTypeString; + llvm::raw_string_ostream ostream(retTypeString); + llvmFunction.getReturnType()->print(ostream); + ostream.str(); + this->returnType = retTypeString; + + // parameters + for (llvm::Value ¶m : llvmFunction.args()) { + auto paramRegister = Register::createRegister(param); + parameters.push_back(paramRegister); + } + + // basic blocks + llvm::BasicBlock *llvmBb = &llvmFunction.getEntryBlock(); + while (llvmBb) { + auto newBasicBlock = std::make_shared(*llvmBb); + BasicBlock::addToLut(llvmBb, newBasicBlock); + llvmBb = llvmBb->getNextNode(); + } + + // basic blocks + llvmBb = &llvmFunction.getEntryBlock(); + while (llvmBb) { + addBasicBlock(*llvmBb); + llvmBb = llvmBb->getNextNode(); + } +} + +void Function::addBasicBlock(llvm::BasicBlock &llvmBasicBlock) { + // check, if it was already created + auto bbFromLut = BasicBlock::findBasicBlock(&llvmBasicBlock); + if (bbFromLut != nullptr) { + if (!bbFromLut->isInitialized()) bbFromLut->init(); + basicBlocks.push_back(bbFromLut); + } else { + auto newBasicBlock = std::make_shared(llvmBasicBlock); + basicBlocks.push_back(newBasicBlock); + BasicBlock::addToLut(&llvmBasicBlock, newBasicBlock); + newBasicBlock->init(); + } +} \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/types/Function.h b/subprojects/frontends/llvm/src/main/cpp/types/Function.h new file mode 100644 index 0000000000..e9b5eb8d85 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/Function.h @@ -0,0 +1,60 @@ +// +// Created by solarowl on 3/30/21. +// + +#ifndef JNI_LIBRARY_FUNCTION_H +#define JNI_LIBRARY_FUNCTION_H + +#include "LlvmImports.h" + +#include +#include "BasicBlock.h" + +// Format: Tuple3[0..*]> +class Function { +private: + std::string name; + std::string returnType; + std::vector > parameters; + + std::vector > basicBlocks; +public: + Function(llvm::Function &newFunction); + + void addBasicBlock(llvm::BasicBlock &llvmBasicBlock); + + std::string getName() { return name; } + + std::string getReturnType() { return returnType; } + + int getNumOfParameters() { return parameters.size(); } + + std::shared_ptr getParameter(int paramIndex) { return parameters[paramIndex]; } + + int getNumOfBasicBlocks() { return basicBlocks.size(); } + + std::shared_ptr getBasicBlock(int basicBlockIndex) { return basicBlocks[basicBlockIndex]; } + + int findBasicBlockByName(std::string basicBlockName) { // Find the index of the function given with functionName + auto it = std::find_if(std::begin(basicBlocks), std::end(basicBlocks), + [&](std::shared_ptr const &b) { return b->getName() == basicBlockName; }); + if (it == std::end(basicBlocks)) { + return -1; + } else return it - basicBlocks.begin(); + } + + void print() { + std::cout << "Function, name: " << name << ", returnType: " << returnType << ", Parameters: " << std::endl; + for (auto param : parameters) { + param->print(); + } + std::cout << ", BasicBlocks: "; + for (auto bb : basicBlocks) { + bb->print(); + } + std::cout << std::endl; + } +}; + + +#endif //JNI_LIBRARY_FUNCTION_H diff --git a/subprojects/frontends/llvm/src/main/cpp/types/GlobalVariable.cpp b/subprojects/frontends/llvm/src/main/cpp/types/GlobalVariable.cpp new file mode 100644 index 0000000000..25d5362305 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/GlobalVariable.cpp @@ -0,0 +1,20 @@ +// +// Created by solarowl on 3/30/21. +// + +#include "GlobalVariable.h" + +// GlobalVariable +GlobalVariable::GlobalVariable(llvm::GlobalVariable &llvmGlobalVar) { + this->name = llvmGlobalVar.getName().str(); + + llvm::raw_string_ostream ostream(type); + llvmGlobalVar.getValueType()->print(ostream); + ostream.str(); + + llvm::raw_string_ostream ostream2(initialValue); + llvmGlobalVar.getInitializer()->print(ostream2); + ostream2.str(); + + auto gvPointerRegister = Register::createRegister(llvmGlobalVar); +} \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/types/GlobalVariable.h b/subprojects/frontends/llvm/src/main/cpp/types/GlobalVariable.h new file mode 100644 index 0000000000..3040060c69 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/GlobalVariable.h @@ -0,0 +1,35 @@ +// +// Created by solarowl on 3/30/21. +// + +#ifndef JNI_LIBRARY_GLOBALVARIABLE_H +#define JNI_LIBRARY_GLOBALVARIABLE_H + +#include "LlvmImports.h" + +#include +#include "operands/Register.h" + +class GlobalVariable { +private: + std::string name; + std::string type; + std::string initialValue; // e.g. int glob2 = 11 -> i32 11 +public: + GlobalVariable(llvm::GlobalVariable &llvmGlobalVar); + + std::string getName() { return name; } + + std::string getType() { return type; } + + std::string getInitialValue() { return initialValue; } + + void print() { + std::cout << "GlobalVariable:" << std::endl << "GlobalVar name: " << name << std::endl; + std::cout << "GlobalVar type: " << type << std::endl; + std::cout << "GlobalVar initial value: " << initialValue << std::endl; + } +}; + + +#endif //JNI_LIBRARY_GLOBALVARIABLE_H diff --git a/subprojects/frontends/llvm/src/main/cpp/types/Instruction.cpp b/subprojects/frontends/llvm/src/main/cpp/types/Instruction.cpp new file mode 100644 index 0000000000..b1f728eb69 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/Instruction.cpp @@ -0,0 +1,294 @@ +// +// Created by solarowl on 3/30/21. +// + +#include "Instruction.h" +#include "BasicBlock.h" // needs to be here due to dependency cycle +#include + +// Instruction +Instruction::Instruction(llvm::Instruction &inst) { + // a bit of analysis first + Analysis::checkInstruction(&inst); + + // opcode + opcode = inst.getOpcode(); + + // opname + opname = inst.getOpcodeName(); + // How to get name of opcode: std::cout<<"Haliho "<getTypeID() != llvm::Type::VoidTyID) { // Not void type + retVariable = Register::createRegister(llvm::cast(inst)); + } else { + retVariable = nullptr; + } + + // line number + lineNumber = -1; + auto &location = inst.getDebugLoc(); + if (location) { + lineNumber = location.getLine(); + } + + // if it is a load or store, add metadata about atomicity + handleOrderingString(inst); + // handle icmp/fcmp and PHI node (it is not part of operandHandler, as it is a special instruction, not an operand + handleCmp(inst); + if (handlePhiNode(inst)) { + return; + } + if(handleConditionalBr(inst)) { + return; + } + + // operands + for (unsigned int i = 0; i < inst.getNumOperands(); i++) { + llvm::Value *operand = inst.getOperand(i); + + OperandHandler oph{}; + // "cheap/simple" chain of responsibility + if (oph.handleInstruction(operand, *this)); + else if (oph.handleBasicBlock(operand, *this)); + else if (oph.handleFunction(operand, *this)); + else if (oph.handleConstant(operand, *this)); + else if (oph.handleFunctionArgument(operand, *this)); + else { + // Operand type unknown - so it's an error + std::cerr << "Unknown value type as right hand operand in instruction!" << std::endl; + std::cerr << "Register LUT: "; + Register::printLUT(); + std::cerr << "Pointer: " << operand << std::endl; + operand->print(llvm::errs()); + std::cout << std::endl; + std::cerr << "Instruction: " << std::endl; + inst.print(llvm::errs()); + std::cerr << std::endl; + abort(); + } + + } + +} + +bool Instruction::handleConditionalBr(llvm::Instruction &br) { + if((std::string)(br.getOpcodeName())=="br" && br.getNumOperands()==3) { + OperandHandler oph{}; + + if (oph.handleInstruction(br.getOperand(0), *this)); + else if (oph.handleConstant(br.getOperand(0), *this)); + + oph.handleBasicBlock(br.getOperand(2), *this); + oph.handleBasicBlock(br.getOperand(1), *this); + return true; + } else { + return false; + } +} + +void Instruction::handleLlvmDbgValue(llvm::Instruction &call) { + handleLlvmDbgDeclare(call); // works the same way +} + +void Instruction::handleLlvmDbgDeclare(llvm::Instruction &call) { + llvm::Value *var = call.getOperand( + 1); // operand 1 is the metadata with the local variable name (the name from the C code) + llvm::Value *_register = call.getOperand(0); // operand 0 is the metadata with the Register + + llvm::DILocalVariable *metaVarName = llvm::dyn_cast(llvm::dyn_cast( + var)->getMetadata()); // cast it from value to metadata with the wrapper & get the metadata out + + llvm::Metadata *registerMetadata = llvm::dyn_cast(_register)->getMetadata(); + llvm::LocalAsMetadata *metaRegister = llvm::dyn_cast( + registerMetadata); // cast it from value to metadata with the wrapper & get the metadata out + + std::shared_ptr registerFromMetadata = nullptr; + if (metaRegister != nullptr) { + registerFromMetadata = Register::findRegister(metaRegister->getValue()); + } + // in theory, we probably won't have both a dbg declare and value call, but we'll check none the less, if loc var name wasn't assigned already + if (registerFromMetadata != nullptr && registerFromMetadata->getLocalVariableName()=="" ) { + registerFromMetadata->assignLocalVariable(metaVarName->getName().str()); + } else { + // std::cout << "Warning: Register in llvm.dbg.declare/.value metadata not found!" << std::endl; + // not an error, the register is probably an undef, we just won't use it + } +} + +void +Instruction::changeOperand(int operandIndex, ConstValue operand) { // Copies the operand to the given index in operands + operands[operandIndex] = std::make_shared(operand); +} + +bool Instruction::handleCmp(llvm::Instruction &inst) { + if (opcode == llvm::Instruction::ICmp || opcode == llvm::Instruction::FCmp) { + llvm::CmpInst *cmpInst = llvm::dyn_cast(&inst); + operands.push_back(std::make_shared( + llvm::CmpInst::getPredicateName(cmpInst->getPredicate()).str())); + return true; + } else { + return false; + } +} + +bool Instruction::handlePhiNode(llvm::Instruction &inst) { + if (opcode == llvm::Instruction::PHI) { + llvm::PHINode *phiNode = llvm::dyn_cast(&inst); + unsigned int numParams = phiNode->getNumIncomingValues(); + for (unsigned int i = 0; i < numParams; ++i) { + llvm::Value *value = phiNode->getIncomingValue(i); + llvm::BasicBlock *block = phiNode->getIncomingBlock(i); + + if (llvm::dyn_cast(value) != nullptr) { + operands.push_back(std::make_shared(*value)); + } else { + // it must be a register then + // if it is an already known register, create reg will just return it from lut + // if not, it creates a new register and will handle it, when it pops up later on the left side of an instruction + operands.push_back(Register::createRegister(*value)); + } /* + else { + std::cerr << "Phi node value type not known!" << block << std::endl; + value->print(llvm::errs()); + abort(); + } + */ + std::shared_ptr bb = BasicBlock::findBasicBlock(block); + if (bb == nullptr) { // the basic block does not exist yet + // create the basic block (it's not added to it's containing function yet though - that will happen, when the parsing finds it) + // (it won't create another, as it checks for this) + auto newBasicBlock = std::make_shared(*block); + BasicBlock::addToLut(block, newBasicBlock); + // newBasicBlock->init(); + operands.push_back(std::make_shared( + newBasicBlock->getName())); // the constant value name will be the generated name of the basic block + } else { + operands.push_back(std::make_shared( + bb->getName())); // the constant value nam will be the looked up name + } + + } + return true; + } else { + return false; + } +} + +/** + * If the instruction is a store or a load, adds one or two string operands (they should be the first operands), + * namely atomic/volatile/non-atomic and in the atomic case the ordering as well + * + * @param inst LLVM instruction + * @return true if inst is a store or load, false otherwise + */ +bool Instruction::handleOrderingString(llvm::Instruction &inst) { + std::string isAtomic, ordering = ""; + if(llvm::dyn_cast(&inst)!=nullptr) { + llvm::StoreInst* store = llvm::dyn_cast(&inst); + if(store->isAtomic()) { // atomic + isAtomic = "atomic"; + ordering = llvm::toIRString(store->getOrdering()); + } else if(store->isVolatile()) { // volatile + isAtomic = "volatile"; + } else { // not-atomic + isAtomic = "non-atomic"; + } + this->operands.push_back(std::make_shared(isAtomic)); + if(ordering!="") { // atomic case + this->operands.push_back(std::make_shared(ordering)); + } + return true; + } else if(llvm::dyn_cast(&inst)!=nullptr) { + llvm::LoadInst* load = llvm::dyn_cast(&inst); + if(load->isAtomic()) { // atomic + isAtomic = "atomic"; + ordering = llvm::toIRString(load->getOrdering()); + } else if(load->isVolatile()) { // volatile + isAtomic = "volatile"; + } else { // not-atomic + isAtomic = "non-atomic"; + } + this->operands.push_back(std::make_shared(isAtomic)); + if(ordering!="") { // atomic case + this->operands.push_back(std::make_shared(ordering)); + } + return true; + } else { + return false; + } +} + +// OperandHandler + +bool Instruction::OperandHandler::handleInstruction(llvm::Value *operand, Instruction &inst) { + if (llvm::dyn_cast(operand) != nullptr + || llvm::dyn_cast(operand) != nullptr) { // the operand is a register, whether it exists yet or not + inst.operands.push_back(Register::createRegister(*operand)); + return true; + } else { + return false; + } +} + +bool Instruction::OperandHandler::handleBasicBlock(llvm::Value *operand, Instruction &inst) { + llvm::BasicBlock *llvmBbOperand; + + if ((llvmBbOperand = llvm::dyn_cast(operand)) != nullptr) { // the operand is a basic block + // if it's a basic block, add it to the LUT (if it isn't already there) + std::shared_ptr bb = BasicBlock::findBasicBlock(llvmBbOperand); + if (bb == nullptr) { // the basic block does not exist yet + // create the basic block (it's not added to it's containing function yet though - that will happen, when the parsing finds it) + // (it won't create another, as it checks for this) + auto newBasicBlock = std::make_shared(*llvmBbOperand); + BasicBlock::addToLut(llvmBbOperand, newBasicBlock); + // newBasicBlock->init(); + inst.operands.push_back(std::make_shared( + newBasicBlock->getName())); // the constant value name will be the generated name of the basic block + } else { + inst.operands.push_back(std::make_shared( + bb->getName())); // the constant value nam will be the looked up name + } + return true; + } else { + return false; + } +} + +bool Instruction::OperandHandler::handleFunction(llvm::Value *operand, Instruction &inst) { + llvm::Function *llvmFunctionOperand; + operand = operand->stripPointerCasts(); + if ((llvmFunctionOperand = llvm::dyn_cast(operand)) != + nullptr) { // the operand is a function (i.e. in a call operation) + inst.operands.push_back(std::make_shared( + llvmFunctionOperand->getName().str())); // the constant value name will be the name of the function + return true; + } else { + return false; + } +} + +bool Instruction::OperandHandler::handleConstant(llvm::Value *operand, Instruction &inst) { + if (llvm::dyn_cast(operand) != + nullptr) { // the operand is a constant (AND not a function - that is also a Constant type in llvm!) + if (llvm::dyn_cast(operand) != nullptr) { + llvm::GEPOperator *gep = llvm::dyn_cast(operand); + inst.operands.push_back(Register::findRegister(gep->getPointerOperand())); + } else { + inst.operands.push_back(std::make_shared(*operand)); + } + return true; + } else { + return false; + } +} + +bool Instruction::OperandHandler::handleFunctionArgument(llvm::Value *operand, Instruction &inst) { + if (llvm::dyn_cast(operand) != nullptr) { + // it should be in the register LUT at this point + inst.operands.push_back(Register::findRegister(operand)); + return true; + } else { + return false; + } +} diff --git a/subprojects/frontends/llvm/src/main/cpp/types/Instruction.h b/subprojects/frontends/llvm/src/main/cpp/types/Instruction.h new file mode 100644 index 0000000000..2d18bc90a5 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/Instruction.h @@ -0,0 +1,76 @@ +// +// Created by solarowl on 3/30/21. +// + +#ifndef JNI_LIBRARY_INSTRUCTION_H +#define JNI_LIBRARY_INSTRUCTION_H + +#include + +#include "LlvmImports.h" +#include "operands/Register.h" +#include "operands/Operand.h" +#include "operands/ConstValue.h" +#include "operands/BasicBlockOperand.h" +#include "operands/StringOperand.h" +#include "../utilities/Analysis.h" + +// Format: Tuple4[0..*], lineNumber> +class Instruction { +private: + unsigned int opcode; // llvm opcode + std::string opname; + std::vector > operands; + std::shared_ptr retVariable; // some operations have one, some don't + int lineNumber; + + bool handlePhiNode(llvm::Instruction &inst); + bool handleCmp(llvm::Instruction &inst); + bool handleConditionalBr(llvm::Instruction &br); + bool handleOrderingString(llvm::Instruction &inst); + + class OperandHandler { + public: + bool handleInstruction(llvm::Value *operand, Instruction &inst); + + bool handleBasicBlock(llvm::Value *operand, Instruction &inst); + + bool handleFunction(llvm::Value *operand, Instruction &inst); + + bool handleConstant(llvm::Value *operand, Instruction &inst); + + bool handleFunctionArgument(llvm::Value *operand, Instruction &inst); + }; + +public: + Instruction(llvm::Instruction &inst); + + void print() { + std::cout << "Instruction " << opname << ", opcode: " << opcode << " lineNumber: " << lineNumber + << ", retVariable: "; + if (retVariable)retVariable->print(); + std::cout << ", Operands: " << std::endl; + for (auto op : operands) { op->print(); } + std::cout << std::endl; + } + + int getNumOfOperands() { return operands.size(); } + + int getOpcode() { return opcode; } + + std::string getOpname() { return opname; } + + int getLineNumber() { return lineNumber; } + + std::shared_ptr getRetVariable() { return retVariable; } + + std::shared_ptr getOperand(int operandIndex) { return operands[operandIndex]; } + + void changeOperand(int operandIndex, + ConstValue operand); // Copies the operand to the given index in operands, used to change gazer's metadata call's operands + + static void handleLlvmDbgDeclare(llvm::Instruction &call); + static void handleLlvmDbgValue(llvm::Instruction &call); +}; + +#endif //JNI_LIBRARY_INSTRUCTION_H diff --git a/subprojects/frontends/llvm/src/main/cpp/types/LlvmImports.h b/subprojects/frontends/llvm/src/main/cpp/types/LlvmImports.h new file mode 100644 index 0000000000..9e3f0feb85 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/LlvmImports.h @@ -0,0 +1,40 @@ +// +// Created by solarowl on 4/2/21. +// + +#ifndef JNI_LIBRARY_LLVMIMPORTS_H +#define JNI_LIBRARY_LLVMIMPORTS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "llvm/IR/Operator.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif //JNI_LIBRARY_LLVMIMPORTS_H diff --git a/subprojects/frontends/llvm/src/main/cpp/types/Module.cpp b/subprojects/frontends/llvm/src/main/cpp/types/Module.cpp new file mode 100644 index 0000000000..a3258302bc --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/Module.cpp @@ -0,0 +1,79 @@ +// +// Created by solarowl on 3/30/21. +// + +#include "Module.h" + +// Global context +llvm::LLVMContext context; + +// Module +Module Module::instance; + +bool Module::checkName(llvm::Function &newFunction) { + std::string newFuncName = newFunction.getName().str(); + for (std::shared_ptr f : functions) { + if (f->getName() == newFuncName) { + return true; + } + } + return false; +} + +bool Module::checkName(llvm::GlobalVariable &newGlobalVar) { + std::string newGlobalVarName = newGlobalVar.getName().str(); + for (std::shared_ptr g : globalVariables) { + if (g->getName() == newGlobalVarName) { + return true; + } + } + return false; +} + +// Adding function based on the llvm::Function type +void Module::addFunction(llvm::Function &newFunction) { + if (checkName(newFunction)) { + std::cout << "Function " << newFunction.getName().str() << "was already added to module!" << std::endl; + } else { + functions.push_back(std::make_shared(newFunction)); + } + // numOfFunctions++; +} + +// Adding global variable based on the llvm::GlobalVariable type +void Module::addGlobalVariable(llvm::GlobalVariable &newGlobalVar) { + if (checkName(newGlobalVar)) { + std::cout << "GlobalVariable " << newGlobalVar.getName().str() << "was already added to module!" << std::endl; + } else { + globalVariables.push_back(std::make_shared(newGlobalVar)); + } +} + + +Module::Module() { + functions = std::vector < std::shared_ptr < Function >> (); + globalVariables = std::vector < std::shared_ptr < GlobalVariable >> (); +} + +void Module::parseLLVMModule(std::shared_ptr llvmModule) { + instance = Module(); + Analysis::reset(); + BasicBlock::reset(); + Register::reset(); + + // Analyze module first + Analysis::checkModule(llvmModule.get()); + + auto &globalList = llvmModule->getGlobalList(); + for (llvm::GlobalVariable &globalVar : globalList) { + addGlobalVariable(globalVar); + } + + auto &llvmFunctionList = llvmModule->getFunctionList(); + for (llvm::Function &llvmFunction : llvmFunctionList) { + //if(llvmFunction.getBasicBlockList().size() > 0) { + if (!llvmFunction.isDeclaration()) { + addFunction(llvmFunction); + } + } +} diff --git a/subprojects/frontends/llvm/src/main/cpp/types/Module.h b/subprojects/frontends/llvm/src/main/cpp/types/Module.h new file mode 100644 index 0000000000..afdc1b00ad --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/Module.h @@ -0,0 +1,74 @@ +// +// Created by solarowl on 3/30/21. +// + +#ifndef JNI_LIBRARY_MODULE_H +#define JNI_LIBRARY_MODULE_H + +#include "LlvmImports.h" + +#include +#include "Function.h" +#include "BasicBlock.h" +#include "operands/Register.h" +#include "../utilities/Analysis.h" +#include "GlobalVariable.h" + +// Global context +extern llvm::LLVMContext context; + +class Module { // Singleton +private: + std::vector > functions; + std::vector > globalVariables; + + bool checkName(llvm::Function &newFunction); + + bool checkName(llvm::GlobalVariable &newGlobalVar); + + // Adding function based on the llvm::Function type + void addFunction(llvm::Function &newFunction); + + void addGlobalVariable(llvm::GlobalVariable &newGlobalVar); + + Module(); + + static Module instance; +public: + static Module &getModule() { + return instance; + } + + void parseLLVMModule(std::shared_ptr llvmModule); + + int getNumOfFunctions() { return functions.size(); } + + int getNumOfGlobalVariables() { return globalVariables.size(); } + + std::shared_ptr getFunction(int functionIndex) { return functions[functionIndex]; } + + int findFunctionByName(std::string functionName) { // Find the index of the function given with functionName + auto it = std::find_if(std::begin(functions), std::end(functions), + [&](std::shared_ptr const &f) { return f->getName() == functionName; }); + if (it == std::end(functions)) { + return -1; + } else return it - functions.begin(); + } + + std::shared_ptr getGlobalVariable(int gvIndex) { return globalVariables[gvIndex]; } + + void print() { + std::cout << "Module, numOfFunctions: " << functions.size() << ", functions: "; + for (auto func : functions) { + func->print(); + } + std::cout << ", global variables: " << std::endl; + for (auto gv : globalVariables) { + gv->print(); + } + std::cout << std::endl; + } +}; + + +#endif //JNI_LIBRARY_MODULE_H diff --git a/subprojects/frontends/llvm/src/main/cpp/types/operands/BasicBlockOperand.cpp b/subprojects/frontends/llvm/src/main/cpp/types/operands/BasicBlockOperand.cpp new file mode 100644 index 0000000000..695ac05a5a --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/operands/BasicBlockOperand.cpp @@ -0,0 +1,9 @@ +// +// Created by solarowl on 4/11/21. +// + +#include "BasicBlockOperand.h" + +BasicBlockOperand::BasicBlockOperand(std::string _name) { + name = "label " + _name; +} diff --git a/subprojects/frontends/llvm/src/main/cpp/types/operands/BasicBlockOperand.h b/subprojects/frontends/llvm/src/main/cpp/types/operands/BasicBlockOperand.h new file mode 100644 index 0000000000..ea98422ed3 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/operands/BasicBlockOperand.h @@ -0,0 +1,21 @@ +// +// Created by solarowl on 4/11/21. +// + +#ifndef JNI_LIBRARY_BASICBLOCKLABEL_H +#define JNI_LIBRARY_BASICBLOCKLABEL_H + +#include "Operand.h" + +class BasicBlockOperand : public Operand { +public: + std::string getType() override { return "constant"; } + + BasicBlockOperand(std::string _name); + + void print() override { std::cout << "BasicBlockOperand, name: " << name << std::endl; } + +}; + + +#endif //JNI_LIBRARY_BASICBLOCKLABEL_H diff --git a/subprojects/frontends/llvm/src/main/cpp/types/operands/ConstValue.cpp b/subprojects/frontends/llvm/src/main/cpp/types/operands/ConstValue.cpp new file mode 100644 index 0000000000..cec4c2011a --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/operands/ConstValue.cpp @@ -0,0 +1,36 @@ +// +// Created by solarowl on 3/30/21. +// + +#include "ConstValue.h" +#include +#include +#include + +ConstValue::ConstValue(llvm::Value &llvmConstValue) { + llvm::raw_string_ostream nameStream(name); + llvmConstValue.print(nameStream); // + nameStream.str(); + + // function pointers can have whitespaces in their type names, which we do not handle, so we change them to _ + // the last whitespace should remain though, as that is the delimiter between the type and the function name + std::vector words; + std::stringstream check1(name); + std::string intermediate; + + // Tokenizing w.r.t. space ' ' + while(getline(check1, intermediate, ' ')) + { + words.push_back(intermediate); + } + + std::ostringstream os; + if(words.size()>2) { + for(unsigned int i = 0; i < words.size()-1; i++) + os << words[i] << "_"; + os << " " << words[words.size()-1]; + name = os.str(); + + } // else we change nothing in the name + +} \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/types/operands/ConstValue.h b/subprojects/frontends/llvm/src/main/cpp/types/operands/ConstValue.h new file mode 100644 index 0000000000..65612a3232 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/operands/ConstValue.h @@ -0,0 +1,23 @@ +// +// Created by solarowl on 3/30/21. +// + +#ifndef JNI_LIBRARY_CONSTVALUE_H +#define JNI_LIBRARY_CONSTVALUE_H + +#include "../LlvmImports.h" +#include +#include "Operand.h" + +class ConstValue : public Operand { + // name is e.g. "i32 2" +public: + std::string getType() override { return "constant"; } + + ConstValue(llvm::Value &llvmConstValue); + + void print() override { std::cout << "Constant, name: " << name << std::endl; } +}; + + +#endif //JNI_LIBRARY_CONSTVALUE_H diff --git a/subprojects/frontends/llvm/src/main/cpp/types/operands/Operand.cpp b/subprojects/frontends/llvm/src/main/cpp/types/operands/Operand.cpp new file mode 100644 index 0000000000..9d9a97f810 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/operands/Operand.cpp @@ -0,0 +1,8 @@ +// +// Created by solarowl on 3/30/21. +// + +#include "Operand.h" + +// Operands +int Operand::nameCounter = 0; \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/types/operands/Operand.h b/subprojects/frontends/llvm/src/main/cpp/types/operands/Operand.h new file mode 100644 index 0000000000..64d16c9cdd --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/operands/Operand.h @@ -0,0 +1,23 @@ +// +// Created by solarowl on 3/30/21. +// + +#ifndef JNI_LIBRARY_OPERAND_H +#define JNI_LIBRARY_OPERAND_H + +#include "../LlvmImports.h" +#include + +class Operand { +protected: + static int nameCounter; + std::string name; +public: + virtual std::string getType() = 0; + + virtual std::string getName() { return name; } + + virtual void print() = 0; // for debugging purposes +}; + +#endif //JNI_LIBRARY_OPERAND_H diff --git a/subprojects/frontends/llvm/src/main/cpp/types/operands/Register.cpp b/subprojects/frontends/llvm/src/main/cpp/types/operands/Register.cpp new file mode 100644 index 0000000000..96ea0cc7f6 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/operands/Register.cpp @@ -0,0 +1,88 @@ +// +// Created by solarowl on 3/30/21. +// + +#include "Register.h" + +std::unordered_map> +Register::LUT = std::unordered_map < llvm::Value *, std::shared_ptr> +(); + +void Register::reset() { + LUT = std::unordered_map < llvm::Value *, std::shared_ptr>(); +} + +Register::Register(llvm::Value &llvmRegister) { + if(llvmRegister.getName() == "") name = "register_" + std::to_string(nameCounter); + else { + name = llvmRegister.getName().str(); + if(name.find("@")==0) { // if it is a pointer to a global var, then it starts with @ + name = name.substr(1); // but we'll remove @ + } + } + nameCounter++; + localVariableName = nullptr; + std::string typeStr; + llvm::raw_string_ostream ostream(typeStr); + llvmRegister.getType()->print(ostream); + ostream.str(); + + this->type = typeStr; + + registerAddress = &llvmRegister; +} + +// for global var reg init +Register::Register(llvm::GlobalVariable &globalVariable) { + localVariableName = nullptr; + llvm::raw_string_ostream gvTypeStream(this->type); + globalVariable.getType()->print(gvTypeStream); + gvTypeStream.str(); + this->name = globalVariable.getName().str(); + this->registerAddress = &globalVariable; +} + +void Register::addToLut(std::shared_ptr newRegister) { + LUT[newRegister->getRegisterAddress()] = newRegister; +} + +std::shared_ptr Register::createRegister(llvm::Value &llvmRegister) { + std::shared_ptr regFromLut = findRegister(&llvmRegister); + if (regFromLut != nullptr) { + return regFromLut; + } else { + std::shared_ptr newReg(new Register(llvmRegister)); + addToLut(newReg); + return newReg; + } +} + +std::shared_ptr Register::createRegister(llvm::GlobalVariable &globalVariable) { + std::shared_ptr regFromLut = findRegister(&globalVariable); + if (regFromLut != nullptr) { + return regFromLut; + } else { + std::shared_ptr newReg(new Register(globalVariable)); + addToLut(newReg); + return newReg; + } +} + +void Register::assignLocalVariable(std::string varName) { + if (localVariableName != nullptr) { + std::cout << "Variable " << localVariableName.get() << " was already assigned to register, can't assign " + << varName << std::endl; + return; + } + localVariableName = std::unique_ptr(new std::string(varName)); + type += " [local]"; +} + +std::shared_ptr Register::findRegister(llvm::Value *key) { + auto it = LUT.find(key); + if (it == LUT.end()) { + return nullptr; + } else { + return it->second; + } +} \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/types/operands/Register.h b/subprojects/frontends/llvm/src/main/cpp/types/operands/Register.h new file mode 100644 index 0000000000..1905728cee --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/operands/Register.h @@ -0,0 +1,75 @@ +// +// Created by solarowl on 3/30/21. +// + +#ifndef JNI_LIBRARY_REGISTER_H +#define JNI_LIBRARY_REGISTER_H + +#include "../LlvmImports.h" + +#include +#include +#include "Operand.h" + +class Register : public Operand { + std::unique_ptr localVariableName; // TODO should be optional instead of u_ptr? + llvm::Value *registerAddress; + std::string type; + // llvm::Type* type; // TODO could be a string only? + + static std::unordered_map> LUT; + + Register(llvm::Value &llvmRegister); + + Register(llvm::GlobalVariable &globalVariable); + + static void addToLut(std::shared_ptr newRegister); + +public: + static std::shared_ptr + createRegister(llvm::Value &llvmRegister); // creates the register and adds it to LUT / finds it in LUT + static std::shared_ptr createRegister(llvm::GlobalVariable &globalVariable); + + void assignLocalVariable(std::string varName); + std::string getLocalVariableName() { + if(localVariableName==nullptr) { + return ""; + } else { + return *localVariableName; + } + } + + std::string getName() override { + // if the register has a local var name assigned, + // we return that, instead of the generated name + if(localVariableName != nullptr) { + return *localVariableName; + } else { + return name; + } + } + + llvm::Value *getRegisterAddress() { return registerAddress; } + + std::string getType() override { return type; } + + void print() override { + std::cout << "Register, name: " << name << ", RegisterAddress: " << registerAddress; + std::cout << ", type: " << getType(); + if (localVariableName != nullptr) std::cout << ", LocalVarName: " << *localVariableName; + std::cout << std::endl; + } + + static void printLUT() { + std::cout << "Register LUT content: " << std::endl; + for (auto &it: LUT) { + it.second->print(); + } + } + + static void reset(); + + static std::shared_ptr findRegister(llvm::Value *key); +}; + +#endif //JNI_LIBRARY_REGISTER_H diff --git a/subprojects/frontends/llvm/src/main/cpp/types/operands/StringOperand.cpp b/subprojects/frontends/llvm/src/main/cpp/types/operands/StringOperand.cpp new file mode 100644 index 0000000000..8eb49a0ed8 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/operands/StringOperand.cpp @@ -0,0 +1,9 @@ +// +// Created by solarowl on 4/11/21. +// + +#include "StringOperand.h" + +StringOperand::StringOperand(std::string opName) { + name = "meta " + opName; +} \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/types/operands/StringOperand.h b/subprojects/frontends/llvm/src/main/cpp/types/operands/StringOperand.h new file mode 100644 index 0000000000..1c09526f67 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/types/operands/StringOperand.h @@ -0,0 +1,21 @@ +// +// Created by solarowl on 4/11/21. +// + +#ifndef JNI_LIBRARY_FUNCTIONLABEL_H +#define JNI_LIBRARY_FUNCTIONLABEL_H + +#include "Operand.h" + +class StringOperand : public Operand { +public: + std::string getType() override { return "constant"; } + + StringOperand(std::string opName); + + void print() override { std::cout << "StringOperand, name: " << name << std::endl; } + +}; + + +#endif //JNI_LIBRARY_FUNCTIONLABEL_H diff --git a/subprojects/frontends/llvm/src/main/cpp/utilities/Analysis.cpp b/subprojects/frontends/llvm/src/main/cpp/utilities/Analysis.cpp new file mode 100644 index 0000000000..53f4119df8 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/utilities/Analysis.cpp @@ -0,0 +1,104 @@ +// +// Created by solarowl on 4/19/21. +// + +#include "Analysis.h" + +bool Analysis::hasIntInBitwiseBinaryOp = false; +bool Analysis::hasStructs = false; + +void Analysis::checkInstruction(llvm::Instruction* instruction) { + checkIfIntInBitwiseBinary(instruction); +} + +void Analysis::checkModule(const llvm::Module* module) { + checkIfHasStructs(module); +} + +void Analysis::checkIfHasStructs(const llvm::Module* module) { + llvm::TypeFinder StructTypes; + StructTypes.run(*module, true); // TODO we do not check for opaque struct - we do not handle them yet + + if(StructTypes.begin()!=StructTypes.end()) { + // std::cout << "there are structs" << std::endl; + hasStructs = true; + } + + // debug print of struct types + // for (auto *STy : StructTypes) + // STy->print(llvm::errs()); +} + +// TODO refactor this to smaller pieces +void Analysis::checkIfIntInBitwiseBinary(llvm::Instruction* instruction) { + // Analyzing so we can set hasIntInBitwiseBinaryOp accordingly + llvm::BinaryOperator *binOp = llvm::dyn_cast(instruction); + // 1. if the flag is already true, the test is superfluous + // 2. is it a binOp? + // 3. is it a bitwise binop? + // so we only want to check the bitwise binary instructions any further + // because we want to check if there are any bitwise binary ops, that has non-booleans (i1) integers + // (example: | (bitwise or -> need integers) vs || (logical or, works with bools) + if(!hasIntInBitwiseBinaryOp && binOp != nullptr) { + // if it is one of and,or,xor and is only used by icmp 0 compares, then int arith is still alright + if(checkIfAndOrXor(binOp)) { + // all operands should be of the same type, according to llvm ref manual, so we only need to check one + llvm::IntegerType *intType = llvm::dyn_cast(binOp->getOperand(0)->getType()); + if (intType != nullptr && intType->getBitWidth() > 1) { + // if it is an integer, we need to check, if it is only used by icmps, comparing them to zero values + // (the optimizations, mainly CFG simplifications, can create i32 binary ops with icmps from logical ops) + // (but if the bitwise op is only used by icmps, we can handle that alright) + + // I left the debugging prints in there, as they can be useful - at some point we should probably delete them + bool icmpOnly = true; + llvm::ICmpInst *icmp; + for (auto user : instruction->users()) { + if ((icmp = llvm::dyn_cast(user)) != nullptr) { + llvm::Constant *constCmpOperand = llvm::dyn_cast(icmp->getOperand(1)); + if (constCmpOperand != nullptr && constCmpOperand->isZeroValue()) { + // std::cout << "icmp :)" << std::endl; + } else { + // std::cout << "not icmp :c" << std::endl; + icmpOnly = false; + } + } else { + // std::cout << "not icmp :c" << std::endl; + icmpOnly = false; + } + } + + if (!icmpOnly) { + hasIntInBitwiseBinaryOp = true; + // std::cout << "Found one!" << std::endl; + binOp->print(llvm::errs()); + } + } + } else if(checkIfShift(binOp)) { // in this case, on the other hand, shifts on integers will always need integer arithmetics + llvm::IntegerType *intType = llvm::dyn_cast(binOp->getOperand(0)->getType()); + if (intType != nullptr && intType->getBitWidth() > 1) { + hasIntInBitwiseBinaryOp = true; + } + } + } +} + +bool Analysis::checkIfShift(const llvm::BinaryOperator *binOp) { + std::string opname = binOp->getOpcodeName(); + if(opname=="shl") return true; + if(opname=="lshr") return true; + if(opname=="ashr") return true; + return false; +} + +bool Analysis::checkIfAndOrXor(const llvm::BinaryOperator *binOp) { + std::string opname = binOp->getOpcodeName(); + if(opname=="and") return true; + if(opname=="or") return true; + if(opname=="xor") return true; + return false; +} + +void Analysis::reset() { +// hasIntInBitwiseBinaryOp = false; +// hasStructs = false; +} diff --git a/subprojects/frontends/llvm/src/main/cpp/utilities/Analysis.h b/subprojects/frontends/llvm/src/main/cpp/utilities/Analysis.h new file mode 100644 index 0000000000..ea25a9b740 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/utilities/Analysis.h @@ -0,0 +1,40 @@ +// +// Created by solarowl on 4/19/21. +// + +#ifndef THETA_C_FRONTEND_ANALYSIS_H +#define THETA_C_FRONTEND_ANALYSIS_H + +#include "../types/LlvmImports.h" +#include "llvm/IR/TypeFinder.h" +#include + +class Analysis { +private: + // hasIntInBitwiseBinaryOp: + // we need to know, if in Theta we should use bitwise or integer arithmetic + // if there are bitwise binary ops (with a few exceptions, see in checkIfIntInBitwiseBinary) + // with integer operands, than we have to use bitwise arithmetic + // otherwise we can use integer arithmetics which is faster + static bool hasIntInBitwiseBinaryOp; + static bool checkIfShift(const llvm::BinaryOperator *binOp); + static bool checkIfAndOrXor(const llvm::BinaryOperator *binOp); + static void checkIfIntInBitwiseBinary(llvm::Instruction *instruction); + + static bool hasStructs; + static void checkIfHasStructs(const llvm::Module*); +public: + static void checkInstruction(llvm::Instruction* instruction); // when parsing, every instruction should be checked with this + static void checkModule(const llvm::Module*); // before parsing module, this should be called on it + static void reset(); + + static bool getStructAnalysisResult() { + return hasStructs; + } + static bool getBitwiseOpAnalysisResult() { + return hasIntInBitwiseBinaryOp; + } +}; + + +#endif //THETA_C_FRONTEND_ANALYSIS_H diff --git a/subprojects/frontends/llvm/src/main/cpp/utilities/CPipeline.cpp b/subprojects/frontends/llvm/src/main/cpp/utilities/CPipeline.cpp new file mode 100644 index 0000000000..07a6361bf2 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/utilities/CPipeline.cpp @@ -0,0 +1,199 @@ +// +// Created by solarowl on 3/30/21. +// + +#include "CPipeline.h" +#include "../passes/gazer/Passes.h" +#include "../passes/gazer/UndefToNondet.h" +#include "../passes/ToposortPass.h" +#include "../passes/EliminateGepPass.h" +#include "../passes/BranchDbgCallPass.h" +#include "../passes/EliminateVariables.h" +#include "../passes/EliminatePhiNodes.h" +#include "../passes/TransformHandlesToIntPass.h" +#include +// #include "../passes/modified/SimplifyCFGModifiedPass.h" + +CPipeline::CPipeline(std::string _filename, std::string _clangCLI) { + std::string filenameExtension = _filename.substr(_filename.length() - 2, _filename.length()); + if (!(filenameExtension.compare(".c") || filenameExtension.compare(".i"))) { + std::cerr << "Error: Input file should be a .c or .i file!" << std::endl; + abort(); + } + + this->clangCli = _clangCLI; + this->filename = _filename; + this->bcFilename = filename.substr(0, filename.length() - 2) + ".bc"; + + //std::cout << this->bcFilename << std::endl; + this->clangArgs = std::vector < llvm::StringRef > { + this->clangCli, + "-g", + "-c", + "-O0", + "-emit-llvm", + "-Xclang", "-disable-O0-optnone", + "-o", this->bcFilename, + this->filename + }; +} + +void CPipeline::executeClang() { + std::string clangErrors; + + int returnCode = llvm::sys::ExecuteAndWait( + clangCli, + clangArgs, + llvm::None, + llvm::None, + 0, + 0, + &clangErrors + ); + + if (returnCode == -1) { + llvm::errs() << "ERROR: failed to execute clang: " + << (clangErrors.empty() ? "Unknown error." : clangErrors) << "\n"; + abort(); + } + + if (returnCode != 0) { + llvm::errs() << "ERROR: clang exited with a non-zero exit code.\n"; + abort(); + } + + llvm::SMDiagnostic error; + + this->module = parseIRFile(bcFilename, error, context); + + if (module == nullptr) { + std::cout << "Error while parsing: null module!" << std::endl; + abort(); + } +} + +void CPipeline::executeOptimizationPasses() { + llvm::legacy::PassManager pm; + llvm::initializeAnalysis(*llvm::PassRegistry::getPassRegistry()); + + pm.add(createBranchDbgCallPass()); + + auto mainPtr = module->getFunction("main"); + if (mainPtr == nullptr) { + std::cerr << "ERROR: entry function (main) not found" << std::endl; + abort(); + } + + if (PassGroupManager::enableInlining) { + pm.add(llvm::createInternalizePass([this](auto &gv) { + if (auto fun = llvm::dyn_cast(&gv)) { + return module->getFunction("main") == fun; + } else if(auto gvar = llvm::dyn_cast(&gv)) { + return true; + } + return false; + })); + // For now we hardcode main as the entry function and all as inline level + pm.add(gazer::createSimpleInlinerPass(*mainPtr, gazer::InlineLevel::All)); + pm.add(llvm::createGlobalDCEPass()); // Remove dead functions + + + // Transform the generated alloca instructions into registers + pm.add(llvm::createPromoteMemoryToRegisterPass()); + } + + if (PassGroupManager::enableOptimizations) { + // Start with some metadata-based typed AA + pm.add(llvm::createTypeBasedAAWrapperPass()); + pm.add(llvm::createScopedNoAliasAAWrapperPass()); + pm.add(llvm::createBasicAAWrapperPass()); + + // Split call sites under conditionals + pm.add(llvm::createCallSiteSplittingPass()); + + // Do some inter-procedural reductions + pm.add(llvm::createIPSCCPPass()); + pm.add(llvm::createGlobalOptimizerPass()); + pm.add(llvm::createDeadArgEliminationPass()); + + // Clean up + pm.add(llvm::createInstructionCombiningPass()); + pm.add(createEliminationPass()); // must be after createInstructionCombiningPass, otherwise weird things happen + + // Note: CFG simplifier can do some problematic stuff, like creating logical binary ops from phi node - icmp combinations + /* + llvm::SimplifyCFGOptions options = llvm::SimplifyCFGOptions(1, false, false, true, false, nullptr, false, false); + llvm::Pass* scfgpass = new llvm::SimplifyCFGPass(options); + pm.add(scfgpass); + llvm::createCFGSimplificationPass() + */ + + // pm.add(llvm::createCFGSimplificationPass()); + + // pm.add(llvm::createSROAPass()); + // pm.add(gazer::createPromoteUndefsPass()); // SROA may introduce new undef values, so we run another promote undef pass after it + + // // pm.add(llvm::createPrintModulePass(llvm::outs())); + // pm.add(llvm::createEarlyCSEPass()); + + // // pm.add(llvm::createCFGSimplificationPass()); + // pm.add(llvm::createAggressiveInstCombinerPass()); + // pm.add(llvm::createInstructionCombiningPass()); + + // // Try to remove irreducible control flow + // pm.add(llvm::createStructurizeCFGPass()); + + // // Optimize loops + // pm.add(llvm::createLoopInstSimplifyPass()); + // pm.add(llvm::createLoopSimplifyCFGPass()); + // pm.add(llvm::createLoopRotatePass()); + // pm.add(llvm::createLICMPass()); + + // // pm.add(llvm::createCFGSimplificationPass()); + // pm.add(llvm::createInstructionCombiningPass()); + + // pm.add(llvm::createIndVarSimplifyPass()); + // pm.add(llvm::createLoopStrengthReducePass()); // needed by indvarsimplify (see LLVM passes reference manual) + // pm.add(llvm::createLoopDeletionPass()); + + // pm.add(llvm::createNewGVNPass()); + + } + pm.add(createEliminateGepPass()); + + // Note: CFG simplifier can do some problematic stuff, like creating logical binary ops from phi node - icmp combinations + // more problems: we can lose assumption line metadata on this as well + // and if used so late, it can merge icmps into a logical or/other op and that probably won't be usable when giving cex + // pm.add(llvm::createCFGSimplificationPass()); // simplifies CFG of a function, (good as a clean up?), later/sooner? + + // "cleanup": + if (PassGroupManager::enableCleanupPasses) { + pm.add(llvm::createInstructionCombiningPass()); // algebraic simplification (does not modify CFG), -instcombine + // https://llvm.org/doxygen/classllvm_1_1SimplifyCFGPass.html#details + + pm.add(llvm::createDeadArgEliminationPass()); // -deadargelim + pm.add(llvm::createRedundantDbgInstEliminationPass()); // -die (I mean the LLVM flag - not as a threat) + pm.add(gazer::createPromoteUndefsPass()); // cleanups can bring undefs + } + + pm.add(createToposortPass()); + // pm.add(createPhiEliminationPass()); + + // pm.add(llvm::createDemoteRegisterToMemoryPass()); + pm.add(gazer::createPromoteUndefsPass()); // cleanups can bring undefs + + pm.add(createTransformHandlesToIntPass()); + pm.add(llvm::createStripDeadPrototypesPass()); + + // FatalErrors = false - it won't kill our process for each error (maybe for some?) + // it will print, what it finds to stderr + pm.add(llvm::createVerifierPass(false)); + + pm.run(*module); +} + +std::unique_ptr CPipeline::processCProgram() { + executeClang(); + executeOptimizationPasses(); + return std::move(module); // we move this out of here - can be done only once! +} \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/utilities/CPipeline.h b/subprojects/frontends/llvm/src/main/cpp/utilities/CPipeline.h new file mode 100644 index 0000000000..6f872faf12 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/utilities/CPipeline.h @@ -0,0 +1,32 @@ +// +// Created by solarowl on 3/30/21. +// + +#ifndef JNI_LIBRARY_CPIPELINE_H +#define JNI_LIBRARY_CPIPELINE_H + +#include "../types/Module.h" +#include "PassGroupManager.h" +#include +#include + +class CPipeline { +private: + std::vector clangArgs; + std::string filename; + std::string bcFilename; + std::string clangCli; + std::unique_ptr module = nullptr; // parsed from .bc at the end of executeClang + + void executeClang(); + + void executeOptimizationPasses(); + +public: + CPipeline(std::string filename, std::string clangCLI = "clang"); // it will run clang & passes on the C file + void addClangArg(std::string arg) { clangArgs.push_back(arg); } // does not check, if arg was already added! + std::unique_ptr processCProgram(); +}; + + +#endif //JNI_LIBRARY_CPIPELINE_H diff --git a/subprojects/frontends/llvm/src/main/cpp/utilities/PassGroupManager.cpp b/subprojects/frontends/llvm/src/main/cpp/utilities/PassGroupManager.cpp new file mode 100644 index 0000000000..72c9a22840 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/utilities/PassGroupManager.cpp @@ -0,0 +1,11 @@ +// +// Created by solarowl on 3/30/21. +// + +#include "PassGroupManager.h" + +// PassGroupManager +bool PassGroupManager::enableInlining = true; +bool PassGroupManager::enableCleanupPasses = true; +bool PassGroupManager::debugPrintIr = true; +bool PassGroupManager::enableOptimizations = true; \ No newline at end of file diff --git a/subprojects/frontends/llvm/src/main/cpp/utilities/PassGroupManager.h b/subprojects/frontends/llvm/src/main/cpp/utilities/PassGroupManager.h new file mode 100644 index 0000000000..645f1ab843 --- /dev/null +++ b/subprojects/frontends/llvm/src/main/cpp/utilities/PassGroupManager.h @@ -0,0 +1,18 @@ +// +// Created by solarowl on 3/30/21. +// + +#ifndef JNI_LIBRARY_PASSGROUPMANAGER_H +#define JNI_LIBRARY_PASSGROUPMANAGER_H + +class PassGroupManager { +public: + static bool enableInlining; // true by default + static bool enableOptimizations; // true by default + static bool enableCleanupPasses; // true by default + static bool debugPrintIr; // true by default + PassGroupManager() = delete; +}; + + +#endif //JNI_LIBRARY_PASSGROUPMANAGER_H diff --git a/subprojects/xcfa/llvm2xcfa/build.gradle.kts b/subprojects/xcfa/llvm2xcfa/build.gradle.kts new file mode 100644 index 0000000000..594182d75c --- /dev/null +++ b/subprojects/xcfa/llvm2xcfa/build.gradle.kts @@ -0,0 +1,38 @@ +/* + * Copyright 2023 Budapest University of Technology and Economics + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.gradle.internal.os.OperatingSystem + +plugins { + id("java-common") + id("kotlin-common") +} + +dependencies { + implementation(project(":theta-common")) + implementation(project(":theta-core")) + implementation(project(":theta-xcfa")) +} + +tasks.test { + if (OperatingSystem.current().isLinux) { + val nativeLibTasks = project(":theta-llvm").tasks + dependsOn(nativeLibTasks.build) + + val linkTask = nativeLibTasks.withType(LinkSharedLibrary::class).first() + systemProperty("java.library.path", linkTask.linkedFile.get().asFile.parent) + } +} diff --git a/subprojects/xcfa/llvm2xcfa/src/main/java/hu/bme/mit/theta/llvm2xcfa/ArithmeticType.java b/subprojects/xcfa/llvm2xcfa/src/main/java/hu/bme/mit/theta/llvm2xcfa/ArithmeticType.java new file mode 100644 index 0000000000..681ee1f918 --- /dev/null +++ b/subprojects/xcfa/llvm2xcfa/src/main/java/hu/bme/mit/theta/llvm2xcfa/ArithmeticType.java @@ -0,0 +1,24 @@ +/* + * Copyright 2023 Budapest University of Technology and Economics + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package hu.bme.mit.theta.llvm2xcfa; + +public enum ArithmeticType { + integer, + // integer_modulo, + bitvector, + efficient +} \ No newline at end of file diff --git a/subprojects/xcfa/llvm2xcfa/src/main/java/hu/bme/mit/theta/llvm2xcfa/LlvmIrProvider.java b/subprojects/xcfa/llvm2xcfa/src/main/java/hu/bme/mit/theta/llvm2xcfa/LlvmIrProvider.java new file mode 100644 index 0000000000..f08788911f --- /dev/null +++ b/subprojects/xcfa/llvm2xcfa/src/main/java/hu/bme/mit/theta/llvm2xcfa/LlvmIrProvider.java @@ -0,0 +1,227 @@ +/* + * Copyright 2023 Budapest University of Technology and Economics + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package hu.bme.mit.theta.llvm2xcfa; + +import hu.bme.mit.theta.common.Tuple2; +import hu.bme.mit.theta.common.Tuple3; +import hu.bme.mit.theta.common.Tuple4; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +public class LlvmIrProvider implements SSAProvider { + static { + System.loadLibrary("theta-llvm"); + } + + private final Map, Integer> bbNamefuncIndexLut; // key: BasicBlock name, value: index of function in module + + public LlvmIrProvider(String irFilename) { + this(irFilename, true, true, true, true); + } + + public LlvmIrProvider(String irFilename, Boolean inlining, Boolean cleanup, Boolean optimization, Boolean debugPrintIr) { + if (!inlining) { + JniDisableInlining(); + } + if (!cleanup) { + JniDisableCleanupPasses(); + } + if (!optimization) { + JniDisableOptimizationPasses(); + } + if (!debugPrintIr) { + JniDisablePrintDebugIr(); + } + + JniParseIr(irFilename); + bbNamefuncIndexLut = new HashMap<>(); + + int numOfFunctions = JniGetFunctionsNum(); + + for (int f = 0; f < numOfFunctions; f++) { + String functionName = JniGetFunctionName(f); + int numOfBasicBlocks = JniGetNumOfBasicBlocks(f); + + for (int b = 0; b < numOfBasicBlocks; b++) { + bbNamefuncIndexLut.put(Tuple2.of(functionName, JniGetBlockName(f, b)), f); + } + } + + } + + private native void JniParseIr(String irFilename); + + private native void JniDisableInlining(); + + private native void JniDisableOptimizationPasses(); + + private native void JniDisableCleanupPasses(); + + private native void JniDisablePrintDebugIr(); + + private native int JniGetGlobalVariablesNum(); + + private native String JniGetGlobalVariableName(int gvIndex); + + private native String JniGetGlobalVariableType(int gvIndex); + + private native String JniGetGlobalVariableValue(int gvIndex); + + // Format: Tuple3 + @Override + public Collection> getGlobalVariables() { + int numOfGlobalVar = JniGetGlobalVariablesNum(); + Tuple3 globalVar; + ArrayList> globalVarList = new ArrayList>(); + + for (int i = 0; i < numOfGlobalVar; i++) { + globalVar = Tuple3.of( + JniGetGlobalVariableName(i), + JniGetGlobalVariableType(i), + JniGetGlobalVariableValue(i) + ); + globalVarList.add(globalVar); + } + return globalVarList; + } + + private native int JniGetFunctionsNum(); + + private native String JniGetFunctionRetType(int funcIndex); + + private native String JniGetFunctionName(int funcIndex); + + private native int JniGetNumOfFunctionParameters(int funcIndex); + + private native String JniGetParameterType(int funcIndex, int paramIndex); + + private native String JniGetParameterName(int funcIndex, int paramIndex); + + @Override + public Collection, List>>> getFunctions() { + int numOfFunctions = JniGetFunctionsNum(); + ArrayList, List>>> functions = new ArrayList<>(); + + for (int f = 0; f < numOfFunctions; f++) { + String functionName = JniGetFunctionName(f); + String retType = JniGetFunctionRetType(f); // TODO make this really optional? (->when void) + int numOfParams = JniGetNumOfFunctionParameters(f); + + ArrayList> parameters = new ArrayList<>(); + for (int p = 0; p < numOfParams; p++) { + String paramType = JniGetParameterType(f, p); + String paramName = JniGetParameterName(f, p); + parameters.add(Tuple2.of(paramType, paramName)); + } + if (retType.equals("void")) { + functions.add(Tuple3.of(functionName, Optional.empty(), parameters)); + } else { + functions.add(Tuple3.of(functionName, Optional.of(retType), parameters)); + } + } + return functions; + } + + private native int JniGetNumOfBasicBlocks(int funcIndex); + + private native int JniGetFunctionIndex(String funcName); + + private native String JniGetBlockName(int funcIndex, int BasicBlockIndex); + + @Override + public List getBlocks(String funcName) { + int f = JniGetFunctionIndex(funcName); + int numOfBasicBlocks = JniGetNumOfBasicBlocks(f); + ArrayList blocks = new ArrayList<>(); + for (int b = 0; b < numOfBasicBlocks; b++) { + blocks.add(JniGetBlockName(f, b)); + } + return blocks; + } + + private native int JniGetBlockIndex(int functionIndex, String blockName); + + private native int JniGetNumOfInstructions(int functionIndex, int basicBlockIndex); + + private native int JniGetInstructionLineNumber(int functionIndex, int basicBlockIndex, int i); + + private native String JniGetInstructionOpcode(int functionIndex, int basicBlockIndex, int i); + + private native String JniGetInstructionRetType(int functionIndex, int basicBlockIndex, int i); + + private native String JniGetInstructionRetName(int functionIndex, int basicBlockIndex, int i); + + private native int JniGetInstructionNumOfOperands(int functionIndex, int basicBlockIndex, int i); + + private native String JniGetInstructionOperandVarType(int functionIndex, int basicBlockIndex, int i, int o); + + private native String JniGetInstructionOperandVarName(int functionIndex, int basicBlockIndex, int i, int o); + + @Override + public List>, List, String>>, Integer>> getInstructions(String funcName, String blockName) { + int functionIndex = bbNamefuncIndexLut.get(Tuple2.of(funcName, blockName)); + int basicBlockIndex = JniGetBlockIndex(functionIndex, blockName); + int numOfInstructions = JniGetNumOfInstructions(functionIndex, basicBlockIndex); + + ArrayList>, List, String>>, Integer>> instructions = new ArrayList<>(); + for (int i = 0; i < numOfInstructions; i++) { + int lineNumber = JniGetInstructionLineNumber(functionIndex, basicBlockIndex, i); + String opcode = JniGetInstructionOpcode(functionIndex, basicBlockIndex, i); + String retType = JniGetInstructionRetType(functionIndex, basicBlockIndex, i); + String retVar = JniGetInstructionRetName(functionIndex, basicBlockIndex, i); + int numOfOperands = JniGetInstructionNumOfOperands(functionIndex, basicBlockIndex, i); + ArrayList, String>> instructionOperands = new ArrayList<>(); + for (int o = 0; o < numOfOperands; o++) { + String varType = JniGetInstructionOperandVarType(functionIndex, basicBlockIndex, i, o); + String varName = JniGetInstructionOperandVarName(functionIndex, basicBlockIndex, i, o); + if (varType.equals("constant")) { + instructionOperands.add(Tuple2.of(Optional.empty(), varName)); + } else { + instructionOperands.add(Tuple2.of(Optional.of(varType), varName)); + } + } + + if (retType.equals("")) { + instructions.add(Tuple4.of(opcode, Optional.empty(), instructionOperands, lineNumber)); + } else { + instructions.add(Tuple4.of(opcode, Optional.of(Tuple2.of(retType, retVar)), instructionOperands, lineNumber)); + } + } + + return instructions; + } + + private native boolean JniGetStructAnalysisResult(); + + @Override + public boolean hasStructs() { + return JniGetStructAnalysisResult(); + } + + private native boolean JniGetBitwiseArithmeticAnalysisResult(); + + @Override + public boolean shouldUseBitwiseArithmetics() { + return JniGetBitwiseArithmeticAnalysisResult(); + } + +} \ No newline at end of file diff --git a/subprojects/xcfa/llvm2xcfa/src/main/java/hu/bme/mit/theta/llvm2xcfa/LlvmMetadata.java b/subprojects/xcfa/llvm2xcfa/src/main/java/hu/bme/mit/theta/llvm2xcfa/LlvmMetadata.java new file mode 100644 index 0000000000..b8a3043eaa --- /dev/null +++ b/subprojects/xcfa/llvm2xcfa/src/main/java/hu/bme/mit/theta/llvm2xcfa/LlvmMetadata.java @@ -0,0 +1,27 @@ +/* + * Copyright 2023 Budapest University of Technology and Economics + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package hu.bme.mit.theta.llvm2xcfa; + +import hu.bme.mit.theta.xcfa.model.MetaData; + +public class LlvmMetadata extends MetaData { + private final int lineNumber; + + public LlvmMetadata(int lineNumber) { + this.lineNumber = lineNumber; + } +} diff --git a/subprojects/xcfa/llvm2xcfa/src/main/java/hu/bme/mit/theta/llvm2xcfa/Readme.md b/subprojects/xcfa/llvm2xcfa/src/main/java/hu/bme/mit/theta/llvm2xcfa/Readme.md new file mode 100644 index 0000000000..0f6977101a --- /dev/null +++ b/subprojects/xcfa/llvm2xcfa/src/main/java/hu/bme/mit/theta/llvm2xcfa/Readme.md @@ -0,0 +1,113 @@ +# LLVM Instruction mapping + +These tables detail the currently supported LLVM instructions and their corresponding model elements. Note, that a +dash (`-`) denotes an instruction that is recognized but _won't_ cause any alteration to the model (because it is +out-of-scope), while a `TODO` label means that it currently produces an error to use the instruction, but should be +handled accordingly. + +## Terminator Instructions + +| LLVM Instructions | Handled versions | Resulting model element(s)| +| --- | --- | --- | +|`ret` | 1. `ret`
2. `ret ` | 1. Edge to final location
2. Assignment to return variable, edge to final location +|`br` | 1. `br