diff --git a/benchmarks/turnt.toml b/benchmarks/turnt.toml index e8a9db124..a6d4eb695 100644 --- a/benchmarks/turnt.toml +++ b/benchmarks/turnt.toml @@ -39,3 +39,8 @@ hyperfine --warmup 3 --export-json bench.json \ rm -f {base}.json rm {base}""" output."bench.json" = "../bench.json" + +[envs.fastbrili] +default = false +command = "bril2json < {filename} | ../fastbril/build/fastbrili {args}" +output.out = "-" diff --git a/fastbril/.gitignore b/fastbril/.gitignore new file mode 100644 index 000000000..e82120eb1 --- /dev/null +++ b/fastbril/.gitignore @@ -0,0 +1,4 @@ +build/ +*~ +\#*\# +*.o \ No newline at end of file diff --git a/fastbril/Makefile b/fastbril/Makefile new file mode 100644 index 000000000..1635e59e9 --- /dev/null +++ b/fastbril/Makefile @@ -0,0 +1,84 @@ +TARGET_EXEC ?= fastbrili + +BUILD_DIR ?= ./build +SRC_DIRS ?= ./src +DOC_DIR ?= ./doc +SHELL = /bin/sh + +CONFIGS := $(wildcard config/*.cf) +GEN_HEAD := $(CONFIGS:config/%.cf=src/%.h) +GEN_TEX := $(CONFIGS:config/%.cf=doc/%.tex) + +SRCS := $(wildcard $(SRC_DIRS)/*.c $(SRC_DIRS)/**/*.c) + +OBJS := $(SRCS:%=$(BUILD_DIR)/%.o) +DEPS := $(OBJS:.o=.d) + +INC_DIRS := $(shell find $(SRC_DIRS) -type d) +INC_FLAGS := $(addprefix -I,$(INC_DIRS)) + + +CFLAGS += $(INC_FLAGS) + +.PHONY: debug + +debug: CFLAGS += -g -Og +debug: $(BUILD_DIR)/$(TARGET_EXEC) + +.PHONY: coverage + +coverage: CFLAGS += --coverage -DDEBUG -g3 +coverage: $(GEN_HEAD) + mkdir $(BUILD_DIR) && cd $(BUILD_DIR) && $(CC) $(CFLAGS) $(abspath $(SRCS)) -o $(TARGET_EXEC) + +.PHONY: cov-report + +cov-report: + gcovr -r . --html --html-details -o cov-report/out.html + +.PHONY: release + +release: CFLAGS += -O3 +release: $(BUILD_DIR)/$(TARGET_EXEC) + +$(BUILD_DIR)/$(TARGET_EXEC): $(OBJS) lib/lib.o + $(CC) $(OBJS) -o $@ $(LDFLAGS) + +# # assembly +# $(BUILD_DIR)/%.s.o: %.s +# $(MKDIR_P) $(dir $@) +# $(AS) $(ASFLAGS) -c $< -o $@ + +# c source +$(BUILD_DIR)/%.c.o: %.c | $(GEN_HEAD) + $(MKDIR_P) $(dir $@) + $(CC) $(CFLAGS) -c $< -o $@ + +# configured header files +src/%.h: config/%.cf srcgen.awk + ./srcgen.awk < $< > $@ + +brb.pdf: $(GEN_TEX) $(DOC_DIR)/main.tex + cd $(DOC_DIR) && latex -output-format=pdf main.tex && mv main.pdf brb.pdf + +lib/lib.o: lib/lib.c + cd lib && $(CC) -O3 -c lib.c + +doc: brb.pdf + +$(GEN_TEX): $(configs) docgen.sh docgen.awk + $(MKDIR_P) $(dir $@) + ./docgen.sh $@ + +.PHONY: clean + +clean: + find . -name "*.aux" -o -name "*.log" -o -name "*.pdf" -o -name "*~" -o \ + -name "*.gcda" -o -name "*.gcno" -o -name "*.o" | xargs rm || true + $(RM) $(GEN_HEAD) + $(RM) $(GEN_TEX) + $(RM) -r $(BUILD_DIR) + +-include $(DEPS) + +MKDIR_P ?= mkdir -p diff --git a/fastbril/README.md b/fastbril/README.md new file mode 100644 index 000000000..bd3fff43b --- /dev/null +++ b/fastbril/README.md @@ -0,0 +1,31 @@ +# The `fastbril` bytecode interpreter + +This is a bytecode spec/generator/interpreter for `bril`. It aims to +be like the typescript or rust implementation, but faster. + +## To build + ++ binary: `make release` + the binary will be `./build/fastbrili` ++ doc: you need to have LaTeX installed. run `make doc` + the doc will be `./doc/brb.pdf` + there is a prebuilt pdf in case this is difficult + + + +### Features +We support a superset of the behavior provided by `brili`, so options like `-p` +work exactly the same. We also support the following: + - `-b` will read in bytecode instead of the Bril json + - `-bo ` will output the bytecode to `` + - `-pr` will print the program to standard out (probably more useful with the + `-b` option) + - `-ni` will NOT run the interpreter. + - `-e ` will emit assembly to ``. + +the current only supported assembly is armv8. sorry. + +the compiler to asm is probably the least trustworthy part of this +whole project. The general interpreter should be pretty good, but it +is always possible there are bugs. Please report bugs to +`cs897@cornell.edu`, and there's a chance that i will fix them :) diff --git a/fastbril/bytecode.txt b/fastbril/bytecode.txt new file mode 100644 index 000000000..e1e825bf9 --- /dev/null +++ b/fastbril/bytecode.txt @@ -0,0 +1,49 @@ +Instructions are 64bit numbers +1 bit to mark labelled, 15 bits of opcode, 16bits of destination, 16 bits of +arg1, 16 bits of arg2 + +this is modified for other instructions + +Opcodes: +nop: 0 +const: 1 +add: 2 +mul: 3 +sub: 4 +div: 5 +eq: 6 +lt: 7 +gt: 8 +le: 9 +ge: 10 +not: 11 +and: 12 +or: 13 +jmp: 14 +br: 15 +call: 16 +ret: 17 +print: 18 +phi: 19 +alloc: 20 +free: 21 +store: 22 +load: 23 +ptradd: 24 +fadd: 25 +fmul: 26 +fsub: 27 +fdiv: 28 +feq: 29 +flt: 30 +fle: 31 +fgt: 32 +fge: 33 +lconst: 34 + + +types: +bool: 0 +int: 1 +float: 2 +ptr: 3 diff --git a/fastbril/config/base.cf b/fastbril/config/base.cf new file mode 100644 index 000000000..6c4d227e4 --- /dev/null +++ b/fastbril/config/base.cf @@ -0,0 +1,22 @@ +CONST 1 +ADD 2 +MUL 3 +SUB 4 +DIV 5 +EQ 6 +LT 7 +GT 8 +LE 9 +GE 10 +NOT 11 +AND 12 +OR 13 +JMP 14 +BR 15 +CALL 16 +RET 17 +PRINT 18 +LCONST 19 +NOP 20 +ID 21 + diff --git a/fastbril/config/float.cf b/fastbril/config/float.cf new file mode 100644 index 000000000..676ab6809 --- /dev/null +++ b/fastbril/config/float.cf @@ -0,0 +1,9 @@ +FADD 28 +FMUL 29 +FSUB 30 +FDIV 31 +FEQ 32 +FLT 33 +FLE 34 +FGT 35 +FGE 36 diff --git a/fastbril/config/mem.cf b/fastbril/config/mem.cf new file mode 100644 index 000000000..a53d9df92 --- /dev/null +++ b/fastbril/config/mem.cf @@ -0,0 +1,5 @@ +ALLOC 23 +FREE 24 +STORE 25 +LOAD 26 +PTRADD 27 diff --git a/fastbril/config/ssa.cf b/fastbril/config/ssa.cf new file mode 100644 index 000000000..6ce3c44af --- /dev/null +++ b/fastbril/config/ssa.cf @@ -0,0 +1,2 @@ +PHI 22 + diff --git a/fastbril/config/types.cf b/fastbril/config/types.cf new file mode 100644 index 000000000..9099d5ed6 --- /dev/null +++ b/fastbril/config/types.cf @@ -0,0 +1,4 @@ +BRILINT 0 +BRILBOOL 1 +BRILFLOAT 2 +BRILVOID 3 diff --git a/fastbril/doc/.gitignore b/fastbril/doc/.gitignore new file mode 100644 index 000000000..a6e642c80 --- /dev/null +++ b/fastbril/doc/.gitignore @@ -0,0 +1,9 @@ +*.aux +*.out +*.log +base.tex +float.tex +mem.tex +ssa.tex +types.tex +brb.pdf diff --git a/fastbril/doc/auto/main.el b/fastbril/doc/auto/main.el new file mode 100644 index 000000000..2a239c5fb --- /dev/null +++ b/fastbril/doc/auto/main.el @@ -0,0 +1,34 @@ +(TeX-add-style-hook + "main" + (lambda () + (TeX-add-to-alist 'LaTeX-provided-package-options + '(("appendix" "toc" "page") ("geometry" "margin=3cm"))) + (add-to-list 'LaTeX-verbatim-macros-with-braces-local "href") + (add-to-list 'LaTeX-verbatim-macros-with-braces-local "hyperref") + (add-to-list 'LaTeX-verbatim-macros-with-braces-local "hyperimage") + (add-to-list 'LaTeX-verbatim-macros-with-braces-local "hyperbaseurl") + (add-to-list 'LaTeX-verbatim-macros-with-braces-local "nolinkurl") + (add-to-list 'LaTeX-verbatim-macros-with-braces-local "url") + (add-to-list 'LaTeX-verbatim-macros-with-braces-local "path") + (add-to-list 'LaTeX-verbatim-macros-with-delims-local "path") + (TeX-run-style-hooks + "latex2e" + "base" + "ssa" + "mem" + "float" + "types" + "article" + "art10" + "appendix" + "hyperref" + "geometry") + (TeX-add-symbols + "bril" + "Bril" + "refint") + (LaTeX-add-labels + "app:opcodes" + "app:types")) + :latex) + diff --git a/fastbril/doc/main.tex b/fastbril/doc/main.tex new file mode 100644 index 000000000..557e1eaf1 --- /dev/null +++ b/fastbril/doc/main.tex @@ -0,0 +1,239 @@ + +\documentclass{article} + +\usepackage[toc,page]{appendix} +\usepackage{hyperref} +\usepackage[margin=3cm]{geometry} + +\newcommand{\bril}{\href{https://capra.cs.cornell.edu/bril/}{bril}} +\newcommand{\Bril}{\href{https://capra.cs.cornell.edu/bril/}{Bril}} +\newcommand{\refint}{\href{https://capra.cs.cornell.edu/bril/tools/interp.html} + {reference interpreter}} + +\title{BRB Specification \\ +\Large{Big Red Bytecode}} +\author{Susan Garry, Charles Sherk} + + +\begin{document} +\maketitle +\section{Overview} +We provide a description of a bytecode for \bril{}. We support the standard +extensions, and make it relatively easy to extend the bytecode to support new +extensions as well. We also provide a reference for the opcodes that we have +chosen in appendix \ref{app:opcodes}. The general encoding is that each +instruction is 64 bits, but some instructions require more information. These +instructions encode in some multiple of 64 bits which can be determined by +reading the first 64 bits. +\section{Base Instructions} +Temporaries (stack variables) are represented by a number in the range +$[0, 2^{16})$. This does technically impose a limit over the original \bril{} +implementation, but we don't believe that it will be relevant for any +programs. Since all data types take up 8 bytes in our implementation, this comes +out to half a megabyte of space maximum for each stack frame. In java, the +maximum stack size is 1 megabyte, so this seems like a reasonable limitation. + +% TODO alternate encoding for this? +We also limit the number of instructions to $2^{16}$ per function so that labels +can be represented in 16 bits as instruction indices. This allows us to use the +following general instruction format:\\ +\begin{center} + \begin{tabular}{r|c|c|c|c|c|} + \cline{2-6} + & \texttt{labelled} & \texttt{opcode} & \texttt{dest} & \texttt{arg1} + & \texttt{arg2} \\ \cline{2-6} + Size (bits): & 1 & 15 & 16 & 16 & 16 \\ \cline{2-6} + \end{tabular} +\end{center} +The \texttt{labelled} bit marks if an instruction has a label immediately preceding +it, that is, it can be a jump target. This is important for faithful execution +of the $\phi$ instruction, as it needs to keep track of the most recent label. + +The \texttt{opcode} is 15 bits, which is far bigger than we need, but it makes +sense to keep everything in 16 bit increments. There is space for another couple +bits of tagging extensions after the label bit since we don't use many opcode +bits, but there is no need for them yet. + +Next is the \texttt{dest} temporary. This is where the result of this +instruction will be put. \texttt{arg1} and \texttt{arg2} are the two +arguments. This works for all instructions of the form +\[ + \texttt{dest} := \texttt{arg1}\ op\ \texttt{arg2} +\] +There are however other formats.\footnote{Not EVERY instruction uses the second + argument, such as the \texttt{not} instruction, so in this case the second + argument is undefined.} +\section{Types} +\label{sec:types} +We can represent the \bril{} types \texttt{int}, \texttt{float}, \texttt{bool}, +\texttt{void}, and arbitrarily nested pointers to these. The 2 bit encodings for +base types are in appendix \ref{app:types}. +\begin{center} + \begin{tabular}{r|c|c|} + \cline{2-3} + & \texttt{ptr depth} & \texttt{base type}\\ \cline{2-3} + Size(bits) & 14 & 2 + \end{tabular} +\end{center} +This means something like \texttt{ptr>>} would be represented as +\texttt{0b1101}. The right two bits are the base type, and the rest to the left +is how many \texttt{ptr} there are. + +\section{ID instruction} +The \texttt{ID} instruction only has one argument and a destination, leaving us +16 bits unused. However, in order to facilitate translation back to the original +\bril{} code, we need to be able to write down types, and \texttt{ID} is a +polymorphic instruction, so we hold the type in this last slot, giving this +layout: +\begin{center} + \begin{tabular}{r|c|c|c|c|c|} + \cline{2-6} + & \texttt{labelled} & \texttt{opcode} & \texttt{dest} & \texttt{arg} + & \texttt{type} \\ \cline{2-6} + Size (bits): & 1 & 15 & 16 & 16 & 16 \\ \cline{2-6} + \end{tabular} +\end{center} + +\section{Branch Instructions} +Branches are the simplest instructions that don't fit into the ``base'' format, +and are laid out as follows:\\ +\begin{center} + \begin{tabular}{r|c|c|c|c|c|} + \cline{2-6} + & \texttt{labelled} & \texttt{opcode} & \texttt{test} & \texttt{ltrue} + & \texttt{lfalse} \\ \cline{2-6} + Size (bits): & 1 & 15 & 16 & 16 & 16 \\ \cline{2-6} + \end{tabular} +\end{center} +The \texttt{labelled} and \texttt{opcode} sections work the same way as base +instructions (the opcode will always be the branch opcode, but we still need it +to be able to decode easily). \texttt{test} is a temporary which if it is 1 will +transfer control flow to \texttt{ltrue}, and if it is 0 will transfer to +\texttt{lfalse}. Other values result in undefined behavior, as we only allow +booleans to be 0 or 1. +\section{Constant Instruction} +Constants present a bit of an issue: \bril{} supports constants up to 64 bits, +but that would be the whole instruction. We also observe that usually constants +are much smaller, so we provide two encodings. First of all is the ``standard'' +encoding:\\ +\begin{center} + \begin{tabular}{r|c|c|c|c|} + \cline{2-5} + & \texttt{labelled} & \texttt{opcode} & \texttt{dest} & \texttt{value}\\ + \cline{2-5} + Size (bits): & 1 & 15 & 16 & 32 \\ \cline{2-5} + \end{tabular} +\end{center} +This works for integers which can be represented in 32 bits as well as booleans, +but for larger integers and floats, there is a loss of precision. We also +provide a ``long constant'' format as follows: +\begin{center} + \begin{tabular}{r|c|c|c|c|c|c|} + \cline{2-7} + & \texttt{labelled} & \texttt{opcode} & \texttt{dest} & \texttt{type} + & \texttt{unused} + & \texttt{value} \\ \cline{2-7} + Size (bits): & 1 & 15 & 16 & 16 & 48 & 64 \\ \cline{2-5} + \end{tabular} +\end{center} +This is 128 bits instead of 64, as we use \texttt{labelled}, \texttt{opcode}, +and \texttt{dest} +the same way. +We also include the type since we have the space, and this is necessary to +translate back to \bril{}. +The next 64 bits are the constant. This opcode is distinct +from the opcode for the ``standard'' constant instruction encoding, so it allows +distinction. +\section{Function Call Instruction} +\Bril{} supports function calls in a single instruction with an unlimited number +of arguments, which is obviously a problem for a 64 bit encoding. We provide an +initial encoding as follows: +\begin{center} + \begin{tabular}{r|c|c|c|c|c|} + \cline{2-6} + & \texttt{labelled} & \texttt{opcode} & \texttt{dest} & \texttt{num\_args} + & \texttt{target} \\ \cline{2-6} + Size (bits): & 1 & 15 & 16 & 16 & 16 \\ \cline{2-6} + \end{tabular} +\end{center} +\texttt{labelled}, \texttt{opcode}, and \texttt{dest} work as in the standard +encoding. \texttt{num\_args} tells us the number of arguments to the function, so +we know how many of the following 64 bit chunks contain them, and +\texttt{target} is the function to call. Functions are represented as their +indices in order they appear in the source, which makes linking multiple sources +together impossible at the bytecode stage. For this reason, the outputted file +includes the name of the function, so that it is possible to reparse and link +multiple files. + +We then have a sequence of 64 bit words that are split into 4 arguments, not all +of which are used. These are represented as temps. + +\section{Print Instruction} +\Bril{} also includes a fairly high level \texttt{print} instruction, which +deals with it's arguments differently depending on their types. In order to +encode this, we need to have an encoding for the types supported by \bril{}, +which we provide in section \ref{sec:types}. The first word of a \texttt{print} +is encoded as follows: +\begin{center} + \begin{tabular}{r|c|c|c|c|c|} + \cline{2-6} + & \texttt{labelled} & \texttt{opcode} & \texttt{num\_prints} & \texttt{type1} + & \texttt{arg1} \\ \cline{2-6} + Size (bits): & 1 & 15 & 16 & 16 & 16 \\ \cline{2-6} + \end{tabular} +\end{center} +As usual, \texttt{labelled} and \texttt{opcode} work as +above. \texttt{num\_prints} is the number of arguments to the \texttt{print} +instruction, as \bril{} supports an arbitrary amount. \texttt{type1} is the type +of the first argument, and \texttt{arg1} is the encoding of the +temporary. Subsequent arguments come in a sequence of 64 bit chunks which hold +up to two arguments each, alternating the type and then the temp. +\section{$\Phi$ Instruction} +In order to properly mimic the behavior of the \refint, we need to be able to +faithfully execute the $\Phi$ instruction on SSA programs. This is the reason we +have the bit to keep track of which instructions were labelled in the +source. The $\Phi$ instruction also supports an arbitrary number of arguments, +so we use multiple 64 bit words to encode it. The first is as follows: +\begin{center} + \begin{tabular}{r|c|c|c|c|c|} + \cline{2-6} + & \texttt{labelled} & \texttt{opcode} & \texttt{dest} & \texttt{num\_choices} + & \texttt{unused} \\ \cline{2-6} + Size (bits): & 1 & 15 & 16 & 16 & 16 \\ \cline{2-6} + \end{tabular} +\end{center} +\texttt{labelled}, \texttt{opcode}, and \texttt{dest} work the same as a standard +instruction, and \texttt{num\_choices} is the number of places we might come +from into the $\Phi$ instruction. Following this instruction are a sufficient +number of 64 bit words to hold all the choices, which are represented as the +encoding of a label followed by the value, which is a temp. Remember labels are +indices into the instruction list. +\begin{appendices} +\section{Opcodes} +\label{app:opcodes} +\begin{tabular}{cccc} + \begin{minipage}{.25\linewidth} + \include{base.tex} + \end{minipage} + & + \begin{minipage}{.25\linewidth} + \include{ssa.tex} + \end{minipage} + & + \begin{minipage}{.25\linewidth} + \include{mem.tex} + \end{minipage} + & + \begin{minipage}{.25\linewidth} + \include{float.tex} + \end{minipage} +\end{tabular} + \begin{minipage}{\linewidth} + \section{Types} + \label{app:types} + \begin{center} + \include{types.tex} + \end{center} + \end{minipage} +\end{appendices} +\end{document} diff --git a/fastbril/docgen.awk b/fastbril/docgen.awk new file mode 100755 index 000000000..b6bce5e47 --- /dev/null +++ b/fastbril/docgen.awk @@ -0,0 +1,10 @@ +BEGIN { + printf "\\begin{tabular}{|r|l|}\\hline\n\\multicolumn{2}{|c|}{%s}\\\\ \\hline\n", header +} +{ + if($0 != "") + print $1 " & " $2 " \\\\ \\hline" +} +END { + print "\\end{tabular}" +} diff --git a/fastbril/docgen.sh b/fastbril/docgen.sh new file mode 100755 index 000000000..986c97c6b --- /dev/null +++ b/fastbril/docgen.sh @@ -0,0 +1,11 @@ +#!/bin/bash + + +CORE=$(echo $1 | awk -F'[/.]' '{print $2}') +IF=$(echo $1 | awk -F'[/.]' '{printf "config/%s.cf", $2}') +if [[ $CORE == "types" ]]; then + CORE="Types" +else + CORE="$CORE Instructions" +fi +awk -v header="$CORE" -f docgen.awk $IF > $1 diff --git a/fastbril/lib/lib.c b/fastbril/lib/lib.c new file mode 100644 index 000000000..8447a5165 --- /dev/null +++ b/fastbril/lib/lib.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include + +void priint(int64_t i) +{ + printf("%ld", i); +} + +void pribool(int i) +{ + printf("%s", i ? "true" : "false"); +} + +void prifloat(double d) +{ + printf("%.17g", d); +} + + +int64_t int_of_string(const char *str) +{ + return strtol(str, 0, 0); +} + +int bool_of_string(const char *str) +{ + if(strcmp(str, "true") == 0) return 1; + else if(strcmp(str, "false") == 0) return 0; + fprintf(stderr, "%s should be a boolean!\n", str); + exit(1); +} + +double float_of_string(const char *str) +{ + return strtod(str, 0); +} diff --git a/fastbril/src/.gitignore b/fastbril/src/.gitignore new file mode 100644 index 000000000..8f192d813 --- /dev/null +++ b/fastbril/src/.gitignore @@ -0,0 +1,5 @@ +base.h +float.h +mem.h +ssa.h +types.h diff --git a/fastbril/src/asm/armv8.h b/fastbril/src/asm/armv8.h new file mode 100644 index 000000000..fadea93d6 --- /dev/null +++ b/fastbril/src/asm/armv8.h @@ -0,0 +1,151 @@ +#ifndef ARMV8_H +#define ARMV8_H +#include +#include +#include + +/** + * "normal" operations in armv8 (prefixed w/ A for "arm") + */ +typedef enum norm_arm_op + { + AADD, AMUL, ASUB, AAND, AORR, ASDIV, ALSL, AFADD, AFMUL, AFSUB, AFDIV, + } norm_arm_op_t; + +typedef enum arm_reg + { + SP, X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, + X16, X17, X18, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, X29, X30, + XZR, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, + D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, + D31, + } arm_reg_t; + +typedef enum arm_cmp_flags + { + CMPEQ, CMPMI, CMPLS, CMPGT, CMPGE, CMPLT, CMPLE, + } arm_cmp_flags_t; + +typedef enum arm_arg_tp {REG, CNST, TMP} arm_arg_tp_t; + +typedef union arm_arg +{ + arm_reg_t reg; + int32_t cnst; + uint16_t tmp; +} arm_arg_t; + +typedef struct arm_arg_tagged +{ + arm_arg_tp_t type; + arm_arg_t value; +} arm_arg_tagged_t; + +typedef struct norm_arm_insn +{ + norm_arm_op_t op; + arm_arg_tagged_t dest, a1, a2; + uint8_t lsl; +} norm_arm_insn_t; + +typedef struct cmp_arm_insn +{ + bool is_float; + arm_arg_tagged_t a1, a2; +} cmp_arm_insn_t; + +typedef struct set_arm_insn +{ + arm_arg_tagged_t dest; + arm_cmp_flags_t flag; +} set_arm_insn_t; + +typedef struct mov_arm_insn +{ + arm_arg_tagged_t dest, src; + bool is_float; +} mov_arm_insn_t; + +typedef struct movk_arm_insn +{ + arm_arg_tagged_t dest; + uint16_t val; + uint16_t lsl; +} movk_arm_insn_t; + +typedef struct movc_arm_insn +{ + arm_arg_tagged_t dest; + int64_t val; +} movc_arm_insn_t; + +typedef struct cbz_arm_insn +{ + arm_arg_tagged_t cond; + char dest[100]; +} cbz_arm_insn_t; + +typedef struct call_arm_insn +{ + char name[126]; +} call_arm_insn_t; + +typedef struct abs_call_arm_insn +{ + uint16_t num_args; + uint16_t ret_tp; + uint16_t dest; + char name[100]; +} abs_call_arm_insn_t; + +typedef struct abs_call_arm_ext +{ + /* n & 0xffff is the temp. n >> 16 is the type */ + uint32_t typed_temps[32]; +} abs_call_arm_ext_t; + +typedef struct str_arm_insn +{ + arm_arg_tagged_t value, address; + uint16_t offset; +} str_arm_insn_t; + +typedef struct ldr_arm_insn +{ + arm_arg_tagged_t dest, address; + uint16_t offset; +} ldr_arm_insn_t; + +typedef enum arm_insn_type + { + ANORM, ACMP, ASET, AMOV, AMOVK, AOTHER, ACBZ, ACALL, AABSCALL, + AABSEXT, ASTR, ALDR, AMOVC, + } arm_insn_type_t; + +typedef union arm_insn +{ + norm_arm_insn_t norm; + cmp_arm_insn_t cmp; + set_arm_insn_t set; + mov_arm_insn_t mov; + movk_arm_insn_t movk; + movc_arm_insn_t movc; + cbz_arm_insn_t cbz; + call_arm_insn_t call; + abs_call_arm_insn_t abs_call; + abs_call_arm_ext_t abs_call_ext; + ldr_arm_insn_t ldr; + str_arm_insn_t str; + char other[128]; +} arm_insn_t; + +typedef struct tagged_arm_insn +{ + arm_insn_type_t type; + arm_insn_t value; +} tagged_arm_insn_t; + +typedef tagged_arm_insn_t asm_insn_t; + + +#endif diff --git a/fastbril/src/asm/asm.c b/fastbril/src/asm/asm.c new file mode 100644 index 000000000..b79921acb --- /dev/null +++ b/fastbril/src/asm/asm.c @@ -0,0 +1,227 @@ +#include "asm.h" + +#include "../bril-insns/types.h" + +#include + +#define __ARM_ARCH //DEBUGGING ON X86!!!! +#ifdef __ARM_ARCH +#include "armv8.h" + +static inline const char *op_to_string(norm_arm_op_t op) +{ + switch (op) + { + case AADD: return "add"; + case AMUL: return "mul"; + case ASUB: return "sub"; + case AAND: return "and"; + case AORR: return "orr"; + case ASDIV: return "sdiv"; + case AFADD: return "fadd"; + case AFMUL: return "fmul"; + case AFSUB: return "fsub"; + case AFDIV: return "fdiv"; + case ALSL: return "lsl"; + } +} + +static inline const char *reg_to_string(arm_reg_t reg) +{ + switch(reg) + { + case SP: return "sp"; case X0: return "x0"; case X1: return "x1"; + case X2: return "x2"; case X3: return "x3"; case X4: return "x4"; + case X5: return "x5"; case X6: return "x6"; case X7: return "x7"; + case X8: return "x8"; case X9: return "x9"; case X10: return "x10"; + case X11: return "x11"; case X12: return "x12"; case X13: return "x13"; + case X14: return "x14"; case X15: return "x14"; case X16: return "x16"; + case X17: return "x17"; case X18: return "x18"; case X19: return "x19"; + case X20: return "x20"; case X21: return "x21"; case X22: return "x22"; + case X23: return "x23"; case X24: return "x24"; case X25: return "x25"; + case X26: return "x26"; case X27: return "x27"; case X28: return "x28"; + case X29: return "x29"; case X30: return "x30"; case XZR: return "xzr"; + case D0: return "d0"; case D1: return "d1"; case D2: return "d2"; + case D3: return "d3"; case D4: return "d4"; case D5: return "d5"; + case D6: return "d6"; case D7: return "d7"; case D8: return "d8"; + case D9: return "d9"; case D10: return "d10"; case D11: return "d11"; + case D12: return "d12"; case D13: return "d13"; case D14: return "d14"; + case D15: return "d15"; case D16: return "d16"; case D17: return "d17"; + case D18: return "d18"; case D19: return "d19"; case D20: return "d20"; + case D21: return "d21"; case D22: return "d22"; case D23: return "d23"; + case D24: return "d24"; case D25: return "d25"; case D26: return "d26"; + case D27: return "d27"; case D28: return "d28"; case D29: return "d29"; + case D30: return "d30"; case D31: return "d31"; + } +} + +static inline void emit_arg(FILE *stream, arm_arg_tagged_t arg) +{ + switch(arg.type) + { + case REG: fprintf(stream, "%s", reg_to_string(arg.value.reg)); + break; + case TMP: fprintf(stream, "t%d", arg.value.tmp); + break; + case CNST: fprintf(stream, "%d", arg.value.cnst); + } +} + +static inline const char *cmp_code_string(arm_cmp_flags_t flag) +{ + switch(flag) + { + case CMPEQ: return "eq"; + case CMPMI: return "mi"; + case CMPLS: return "ls"; + case CMPGT: return "gt"; + case CMPGE: return "ge"; + case CMPLT: return "lt"; + case CMPLE: return "le"; + } +} + +void emit_arm_insns(FILE *stream, tagged_arm_insn_t *insns, size_t num_insns) +{ + for(size_t i = 0; i < num_insns; ++i) + { + switch(insns[i].type) + { + case ANORM: + fprintf(stream, "\t%s\t", op_to_string(insns[i].value.norm.op)); + emit_arg(stream, insns[i].value.norm.dest); + fprintf(stream, ", "); + emit_arg(stream, insns[i].value.norm.a1); + fprintf(stream, ", "); + emit_arg(stream, insns[i].value.norm.a2); + if(insns[i].value.norm.lsl) + fprintf(stream, ", lsl #%d", insns[i].value.norm.lsl); + putc('\n', stream); + break; + case ACMP: + if(insns[i].value.cmp.is_float) + fprintf(stream, "\tfcmpe\t"); + else + fprintf(stream, "\tcmp\t"); + emit_arg(stream, insns[i].value.cmp.a1); + fprintf(stream, ", "); + emit_arg(stream, insns[i].value.cmp.a2); + putc('\n', stream); + break; + case ASET: + fprintf(stream, "\tcset\t"); + emit_arg(stream, insns[i].value.set.dest); + fprintf(stream, ", %s\n", cmp_code_string(insns[i].value.set.flag)); + break; + case AMOV: + fprintf(stream, "\tmov\t"); + emit_arg(stream, insns[i].value.mov.dest); + fprintf(stream, ", "); + emit_arg(stream, insns[i].value.mov.src); + putc('\n', stream); + break; + case AMOVK: + fprintf(stream, "\tmovk\t"); + emit_arg(stream, insns[i].value.movk.dest); + fprintf(stream, ", %d, lsl %d\n", insns[i].value.movk.val, + insns[i].value.movk.lsl); + break; + case AMOVC: + fprintf(stream, "\tmovc\t"); + emit_arg(stream, insns[i].value.movc.dest); + fprintf(stream, ", %ld\n", insns[i].value.movc.val); + break; + case ACBZ: + fprintf(stream, "\tcbz\t"); + emit_arg(stream, insns[i].value.cbz.cond); + fprintf(stream, ", %s\n", insns[i].value.cbz.dest); + break; + case ACALL: + fprintf(stream, "\tbl\t%s\n", insns[i].value.call.name); + break; + case ALDR: + fprintf(stream, "\tldr\t"); + emit_arg(stream, insns[i].value.ldr.dest); + fprintf(stream, ", ["); + emit_arg(stream, insns[i].value.ldr.address); + if(insns[i].value.ldr.offset) + fprintf(stream, ", %d", insns[i].value.ldr.offset); + fprintf(stream, "]\n"); + break; + case ASTR: + fprintf(stream, "\tstr\t"); + emit_arg(stream, insns[i].value.str.value); + fprintf(stream, ", ["); + emit_arg(stream, insns[i].value.str.address); + if(insns[i].value.str.offset) + fprintf(stream, ", %d", insns[i].value.str.offset); + fprintf(stream, "]\n"); + break; + case AABSCALL: + { + size_t num_args = insns[i].value.abs_call.num_args; + fprintf(stream, "\t"); + if(insns[i].value.abs_call.ret_tp != BRILVOID) + fprintf(stream, "t%d <- ", insns[i].value.abs_call.dest); + fprintf(stream, "call\t%s\t", insns[i].value.abs_call.name); + for(size_t x = 0; x < num_args; x += 32) + { + for(size_t argi = 0; x + argi < num_args && argi < 32; ++argi) + { + fprintf(stream, "t%d ", + insns[i + 1 + x/32].value.abs_call_ext.typed_temps[argi] & + 0xffff); + } + ++i; + } + fprintf(stream, "\n"); + } + break; + case AABSEXT: + fprintf(stderr, "shouldn't be printing extension\n"); + break; + case AOTHER: + fprintf(stream, "%s\n", insns[i].value.other); + break; + } + } +} + +void emit_function(FILE *stream, asm_func_t *f) +{ + fprintf(stream, "\t.global %s\n", f->name); + fprintf(stream, "\t.type\t%s, %%function\n", f->name); + fprintf(stream, "%s:\n", f->name); + emit_arm_insns(stream, f->insns, f->num_insns); +} + +void emit_insns(FILE *stream, asm_prog_t *prog) +{ + fprintf(stream, "\t.arch armv8-a\n\t.text\n"); + for(size_t i = 0; i < prog->num_funcs; ++i) + emit_function(stream, prog->funcs + i); +} + +#else + +void emit_insns(FILE *stream, asm_prog_t *program) +{ + fprintf(stderr, "architecture not supported!\n"); + exit(1); +} + +#endif + + +void free_func(asm_func_t *f) +{ + free(f->insns); + free(f->arg_types); +} + +void free_asm_prog(asm_prog_t prog) +{ + for(size_t i = 0; i < prog.num_funcs; ++i) + free_func(prog.funcs + i); + free(prog.funcs); +} diff --git a/fastbril/src/asm/asm.h b/fastbril/src/asm/asm.h new file mode 100644 index 000000000..411ba9576 --- /dev/null +++ b/fastbril/src/asm/asm.h @@ -0,0 +1,35 @@ +#ifndef ASM_H +#define ASM_H + +#define __ARM_ARCH //x86 debugging + +#ifdef __ARM_ARCH +#include "armv8.h" +#endif + +typedef struct asm_func +{ + char name[128]; + size_t num_insns; + size_t num_temps; + size_t num_args; + uint16_t *arg_types; + uint16_t ret_tp; + asm_insn_t *insns; +} asm_func_t; + +typedef struct asm_prog +{ + size_t num_funcs; + asm_func_t *funcs; +} asm_prog_t; + + +void emit_insns(FILE *stream, asm_prog_t *program); + +void free_asm_prog(asm_prog_t prog); + +#define INSN_SIZE sizeof(asm_insn_t) +#endif + + diff --git a/fastbril/src/asm/linear-scan.c b/fastbril/src/asm/linear-scan.c new file mode 100644 index 000000000..a14bcd294 --- /dev/null +++ b/fastbril/src/asm/linear-scan.c @@ -0,0 +1,274 @@ +#include "linear-scan.h" + +#include +#include +#include + +#ifdef __ARM_ARCH + + + +typedef struct interval +{ + int start, end; + arm_reg_t reg; + uint16_t temp, stack_loc; + bool is_reg; +} interval_t; + +typedef struct temps_used +{ + int16_t num; + int tmps[2]; +} temps_used_t; + +static inline int max(int a, int b) +{ + return a > b ? a : b; +} + +static inline temps_used_t norm_temps_used(arm_arg_tagged_t a1, arm_arg_tagged_t a2) +{ + uint16_t num = (a1.type == TMP ? 1 : 0) + + (a2.type == TMP ? 1 : 0); + return (temps_used_t) + {.num = num, + .tmps[0] = num == 0 ? -1 : (num == 1 && a1.type != TMP ? + a2.value.tmp : a1.value.tmp), + .tmps[1] = num < 2 ? -1 : a2.value.tmp}; +} + +static inline temps_used_t one_temps_used(arm_arg_tagged_t a) +{ + uint16_t num = a.type == TMP ? 1 : 0; + return (temps_used_t) + {.num = num, + .tmps[0] = num == 0 ? -1 : a.value.tmp, + .tmps[1] = -1}; +} + +temps_used_t get_temps_used(tagged_arm_insn_t insn) +{ + switch(insn.type) + { + case ANORM: + return norm_temps_used(insn.value.norm.a1, insn.value.norm.a2); + case ACMP: + return norm_temps_used(insn.value.cmp.a1, insn.value.cmp.a2); + case AMOV: + return one_temps_used(insn.value.mov.src); + case ACBZ: + return one_temps_used(insn.value.cbz.cond); + case ASTR: + return norm_temps_used(insn.value.str.address, insn.value.str.value); + case ALDR: + return one_temps_used(insn.value.ldr.address); + default: + return (temps_used_t) + {.num = 0, + .tmps[0] = -1, + .tmps[1] = -1}; + } +} + +temps_used_t get_temps_defd(tagged_arm_insn_t insn) +{ + switch(insn.type) + { + case ANORM: + return one_temps_used(insn.value.norm.dest); + case ASET: + return one_temps_used(insn.value.set.dest); + case AMOV: + return one_temps_used(insn.value.mov.dest); + case AMOVC: + return one_temps_used(insn.value.movc.dest); + case ALDR: + return one_temps_used(insn.value.ldr.dest); + default: + return (temps_used_t) + {.num = 0, + .tmps[0] = -1, + .tmps[1] = -1}; + } +} + +int cmp_interval_start(const void *p1, const void *p2) +{ + return ((const interval_t *) p1)->start - + ((const interval_t *) p2)->start; +} + +int cmp_interval_end(const void *p1, const void *p2) +{ + return ((const interval_t *) p1)->end - + ((const interval_t *) p2)->end; +} + + +interval_t *get_intervals(asm_func_t f) +{ + interval_t *intervals = malloc(sizeof(interval_t) * f.num_temps); + printf("%s:\n", f.name); + for(uint16_t i = 0; i < f.num_temps; ++i) + { + intervals[i].temp = i; + intervals[i].start = -1; + intervals[i].end = -1; + } + for(size_t i = 0; i < f.num_args; ++i) + { + intervals[i].start = 0; + } + for(int i = 0; i < f.num_insns; ++i) + { + if(f.insns[i].type == AABSCALL) + { + if(intervals[f.insns[i].value.abs_call.dest].start == -1) + intervals[f.insns[i].value.abs_call.dest].start = i; + uint16_t num_args = f.insns[i].value.abs_call.num_args; + for(size_t x = 0; x < num_args; x += 32) + { + for(size_t argi = 0; x * 32 + argi < num_args && argi < 32; ++argi) + { + intervals[f.insns[i + 1 + x/32].value + .abs_call_ext.typed_temps[argi] & 0xffff].end = max + (intervals[f.insns[i + 1 + x/32].value + .abs_call_ext.typed_temps[argi] & 0xffff].end, i); + if(intervals[f.insns[i + 1 + x/32].value + .abs_call_ext.typed_temps[argi] & 0xffff].start == -1) + intervals[f.insns[i + 1 + x/32].value + .abs_call_ext.typed_temps[argi] & 0xffff].start = i; + } + ++i; + } + } else + { + temps_used_t t = get_temps_used(f.insns[i]); + for(size_t j = 0; j < t.num; ++j) + { + intervals[t.tmps[j]].end = max(intervals[t.tmps[j]].end, i); + if(intervals[t.tmps[j]].start == -1) + intervals[t.tmps[j]].start = i; + } + temps_used_t defd = get_temps_defd(f.insns[i]); + for(size_t j = 0; j < defd.num; ++j) + { + if(intervals[defd.tmps[j]].start == -1) + intervals[defd.tmps[j]].start = i; + } + } + } + qsort(intervals, f.num_temps, sizeof(interval_t), cmp_interval_start); + return intervals; +} + + +typedef struct reg_pool +{ + size_t top; + arm_reg_t regs[32]; +} reg_pool_t; + + +bool is_float_reg(arm_reg_t r) +{ + switch (r) + { + case D0: + case D1: + case D2: + case D3: + case D4: + case D5: + case D6: + case D7: + case D8: + case D9: + case D10: + case D11: + case D12: + case D13: + case D14: + case D15: + case D16: + case D17: + case D18: + case D19: + case D20: + case D21: + case D22: + case D23: + case D24: + case D25: + case D26: + case D27: + case D28: + case D29: + case D30: + case D31: return true; + default: return false; + } +} + +void expire_old_intervals(interval_t i, interval_t *active, size_t *num_active, + reg_pool_t *float_pool, reg_pool_t *int_pool) +{ + qsort(active, *num_active, sizeof(interval_t), cmp_interval_end); + size_t active_shift = 0; + for(size_t j = 0; j < num_active; ++j) + { + if(active[j].end >= i.start) + return; + active_shift = j; + if(is_float_reg(active[j].reg)) + float_pool->regs[++float_pool->top] = active[j].reg; + else + int_pool->regs[++int_pool->top] = active[j].reg; + } + *num_active -= active_shift; + memmove(active, active + active_shift * sizeof(interval_t), *num_active); +} + +asm_func_t lin_alloc(asm_prog_t p, size_t which_fun) +{ + interval_t *intervals = get_intervals(p.funcs[which_fun]); + interval_t *active = malloc(sizeof(interval_t) * 128); + reg_pool_t float_pool = (reg_pool_t) + {.top = 30, + .regs = {D31, D30, D29, D28, D27, D26, D25, D24, D23, D22, D21, D20, D19, D18, + D17, D16, D15, D14, D13, D12, D11, D10, D9, D8, D7, D6, D5, D4, D3, D2,}}; + reg_pool_t int_pool = (reg_pool_t) + {.top = 29, + .regs = {X30, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20, X19, X18, + X17, X16, X15, X14, X13, X12, X11, X10, X9, X8, X7, X6, X5, X4, X3, X2,}}; + + size_t num_active = 0; + for(size_t i = 0; i < p.funcs[which_fun].num_temps; ++i) + { + printf("t%d: live from %d to %d\n", intervals[i].temp, intervals[i].start, intervals[i].end); + } + free(intervals); + return p.funcs[which_fun]; +} + +asm_prog_t linear_scan(asm_prog_t p) +{ + asm_func_t *funs = malloc(sizeof(asm_func_t) * p.num_funcs); + for(size_t i = 0; i < p.num_funcs; ++i) + funs[i] = lin_alloc(p, i); + return (asm_prog_t) + {.funcs = funs, + .num_funcs = p.num_funcs}; +} + +#else + +asm_prog_t linear_scan(asm_prog_t p) +{ + fprintf(stderr, "arch not supported\n"); + exit(1); +} + + +#endif diff --git a/fastbril/src/asm/linear-scan.h b/fastbril/src/asm/linear-scan.h new file mode 100644 index 000000000..c717dfbf9 --- /dev/null +++ b/fastbril/src/asm/linear-scan.h @@ -0,0 +1,8 @@ +#ifndef LINEAR_SCAN_H +#define LINEAR_SCAN_H +#include "asm.h" + + +asm_prog_t linear_scan(asm_prog_t prog); + +#endif diff --git a/fastbril/src/asm/to_abstract_asm.c b/fastbril/src/asm/to_abstract_asm.c new file mode 100644 index 000000000..b458a0d07 --- /dev/null +++ b/fastbril/src/asm/to_abstract_asm.c @@ -0,0 +1,463 @@ +#include "to_abstract_asm.h" + +#define _POSIX_C_SOURCE 200809L +#include + +#include + +#ifdef __ARM_ARCH + + +static inline void make_space(tagged_arm_insn_t **insns, size_t *space, size_t idesired) +{ + while(idesired >= *space) + { + *space *= 2; + *insns = realloc(*insns, *space * sizeof(asm_insn_t)); + } +} + +static inline arm_arg_tagged_t from_tmp(uint16_t tmp) +{ + return (arm_arg_tagged_t) {.type = TMP, .value = (arm_arg_t) {.tmp = tmp}}; +} + +static inline arm_arg_tagged_t from_const(int16_t num) +{ + return (arm_arg_tagged_t) {.type = CNST, .value = (arm_arg_t) {.cnst = num}}; +} + +static inline void write_insn(asm_insn_t *insn, FILE *insn_stream) +{ + fwrite(insn, INSN_SIZE, 1, insn_stream); +} + +static inline void trans_const(FILE *insn_stream, + uint16_t dest, int64_t value) +{ + write_insn(&(tagged_arm_insn_t) {.type = AMOV, + .value = (arm_insn_t) + {.mov = (mov_arm_insn_t) + {.dest = from_tmp(dest), + .src = from_const(value & 0xffff)}}}, + insn_stream); + int64_t tmp = value >> 16; + for(size_t i = 1; i < 4; ++i) + { + if(tmp & 0xffff) + write_insn(&(tagged_arm_insn_t) {.type = AMOVK, + .value = (arm_insn_t) + {.movk = (movk_arm_insn_t) + {.dest = from_tmp(dest), + .val = tmp & 0xffff, + .lsl = i * 16}}}, insn_stream); + tmp >>= 16; + } +} + +norm_arm_op_t bril_op_to_arm(uint16_t op) +{ + switch(op) + { + case ADD: return AADD; + case MUL: return AMUL; + case SUB: return ASUB; + case AND: return AAND; + case OR: return AORR; + case DIV: return ASDIV; + case FADD: return AFADD; + case FMUL: return AFMUL; + case FSUB: return AFSUB; + case FDIV: return AFDIV; + } + return -1; +} + +arm_cmp_flags_t bril_op_to_flag(uint16_t op) +{ + switch(op) + { + case EQ: return CMPEQ; + case LT: return CMPLT; + case LE: return CMPLE; + case FEQ: return CMPEQ; + case FLT: return CMPMI; + case FLE: return CMPLS; + case FGT: + case GT: return CMPGT; + case GE: + case FGE: return CMPGE; + } + return -1; +} + + +asm_func_t trans_func(program_t *prog, size_t which_fun) +{ + char *mem_stream; + size_t size_loc; + //size_t insns_space = 32, ni = 0; + //tagged_arm_insn_t *insns = malloc(sizeof(asm_insn_t) * insns_space); + FILE *insn_stream = open_memstream(&mem_stream, &size_loc); + function_t f = prog->funcs[which_fun]; + bool float_var = false; + for(size_t i = 0; i < f.num_insns; ++i) + { + instruction_t *insn = f.insns + i; + if(is_labelled(*insn)) + { + //make_space(&insns, &insns_space, ni); + arm_insn_t lbl; + sprintf(lbl.other, ".LF%s%lx:", f.name, i); + fwrite(&(tagged_arm_insn_t) {.type = AOTHER, + .value = lbl}, INSN_SIZE, 1, insn_stream); + /* insns[ni++] = (tagged_arm_insn_t) {.type = AOTHER, */ + /* .value = lbl}; */ + } + uint16_t opcode = get_opcode(*insn); + //make_space(&insns, &insns_space, ni); + switch(opcode) + { + case NOP: + fwrite(&(tagged_arm_insn_t) + {.type = AOTHER, + .value = (arm_insn_t){ .other = "\tnop"}}, + INSN_SIZE, 1, insn_stream); + break; + case CONST: + write_insn(&(tagged_arm_insn_t) + {.type = AMOVC, .value = (arm_insn_t) + {.movc = (movc_arm_insn_t) + {.dest = from_tmp(insn->const_insn.dest), + .val = insn->const_insn.value}}}, insn_stream); + break; + case LCONST: + write_insn(&(tagged_arm_insn_t) + {.type = AMOVC, .value = (arm_insn_t) + {.movc = (movc_arm_insn_t) + {.dest = from_tmp(insn->long_const_insn.dest), + .val = (insn + 1)->const_ext.int_val}}}, insn_stream); + ++i; + break; + case ADD: + case MUL: + case SUB: + case AND: + case OR: + case DIV: + case FADD: + case FMUL: + case FSUB: + case FDIV: + write_insn(&(tagged_arm_insn_t) + {.type = ANORM, + .value = (arm_insn_t) + {.norm = (norm_arm_insn_t) + {.op = bril_op_to_arm(opcode), + .dest = from_tmp(insn->norm_insn.dest), + .a1 = from_tmp(insn->norm_insn.arg1), + .a2 = from_tmp(insn->norm_insn.arg2)}} + }, insn_stream); + break; + case NOT: + write_insn(&(tagged_arm_insn_t) + {.type = ACMP, + .value = (arm_insn_t) + {.cmp = (cmp_arm_insn_t) + {.is_float = false, + .a1 = from_tmp(insn->norm_insn.arg1), + .a2 = from_const(0) + }}}, insn_stream); + write_insn(&(tagged_arm_insn_t) + {.type = ASET, .value = (arm_insn_t) + {.set = (set_arm_insn_t) + {.dest = from_tmp(insn->norm_insn.dest), + .flag = CMPEQ}}}, insn_stream); + break; + case FEQ: + case FLT: + case FGT: + case FGE: + float_var = true; + goto cmp_lbl; + case EQ: + case LT: + case GT: + case LE: + case GE: + float_var = false; + cmp_lbl: + write_insn(&(tagged_arm_insn_t) + {.type = ACMP, + .value = (arm_insn_t) + {.cmp = (cmp_arm_insn_t) + {.is_float = float_var, + .a1 = from_tmp(insn->norm_insn.arg1), + .a2 = from_tmp(insn->norm_insn.arg2)}}}, insn_stream); + write_insn(&(tagged_arm_insn_t) + {.type = ASET, .value = (arm_insn_t) + {.set = (set_arm_insn_t) + {.dest = from_tmp(insn->norm_insn.dest), + .flag = bril_op_to_flag(opcode)}}}, insn_stream); + break; + case PRINT: + { + uint16_t num_args = insn->print_insn.num_prints; + i += num_args / 2; + uint16_t *args = &insn->print_insn.type1; + for(uint16_t j = 0; j < num_args * 2; j += 2) + { + if(j != 0) + { + write_insn(&(tagged_arm_insn_t) + {.type = AMOV, .value = (arm_insn_t) + {.mov = (mov_arm_insn_t) + {.dest = (arm_arg_tagged_t) + {.type = REG, .value = (arm_arg_t) + {.reg = X0}}, + .src = from_const(32)}}}, insn_stream); + write_insn(&(tagged_arm_insn_t) + {.type = ACALL, .value = (arm_insn_t) + {.call = (call_arm_insn_t) + {.name = "putchar"}}}, insn_stream); + } + switch(args[j]) + { + case BRILINT: + write_insn(&(tagged_arm_insn_t) + {.type = AMOV, .value = (arm_insn_t) + {.mov = (mov_arm_insn_t) + {.dest = (arm_arg_tagged_t) + {.type = REG, .value = (arm_arg_t) + {.reg = X0}}, + .src = from_tmp(args[j + 1])}}}, insn_stream); + write_insn(&(tagged_arm_insn_t) + {.type = ACALL, .value = (arm_insn_t) + {.call = (call_arm_insn_t) + {.name = "priint"}}}, insn_stream); + break; + case BRILBOOL: + write_insn(&(tagged_arm_insn_t) + {.type = AMOV, .value = (arm_insn_t) + {.mov = (mov_arm_insn_t) + {.dest = (arm_arg_tagged_t) + {.type = REG, .value = (arm_arg_t) + {.reg = X0}}, + .src = from_tmp(args[j + 1])}}}, insn_stream); + write_insn(&(tagged_arm_insn_t) + {.type = ACALL, .value = (arm_insn_t) + {.call = (call_arm_insn_t) + {.name = "pribool"}}}, insn_stream); + break; + case BRILFLOAT: + write_insn(&(tagged_arm_insn_t) + {.type = AMOV, .value = (arm_insn_t) + {.mov = (mov_arm_insn_t) + {.dest = (arm_arg_tagged_t) + {.type = REG, .value = (arm_arg_t) + {.reg = D0}}, + .src = from_tmp(args[j + 1])}}}, insn_stream); + write_insn(&(tagged_arm_insn_t) + {.type = ACALL, .value = (arm_insn_t) + {.call = (call_arm_insn_t) + {.name = "prifloat"}}}, insn_stream); + } + } + write_insn(&(tagged_arm_insn_t) + {.type = AMOV, .value = (arm_insn_t) + {.mov = (mov_arm_insn_t) + {.dest = (arm_arg_tagged_t) + {.type = REG, .value = (arm_arg_t) + {.reg = X0}}, + .src = from_const(10)}}}, insn_stream); + write_insn(&(tagged_arm_insn_t) + {.type = ACALL, .value = (arm_insn_t) + {.call = (call_arm_insn_t) + {.name = "putchar"}}}, insn_stream); + } + break; + case CALL: + { + uint16_t num_args = insn->call_inst.num_args; + uint16_t *args = (uint16_t*) (insn + 1); + function_t *target = prog->funcs + insn->call_inst.target; + tagged_arm_insn_t call = (tagged_arm_insn_t) + {.type = AABSCALL, .value = (arm_insn_t) + {.abs_call = (abs_call_arm_insn_t) + {.num_args = num_args, + .ret_tp = target->ret_tp, + .dest = insn->call_inst.dest}}}; + sprintf(call.value.abs_call.name, "%s", target->name); + write_insn(&call, insn_stream); + for(size_t eidx = 0; eidx < num_args; eidx += 32) + { + abs_call_arm_ext_t ext; + for(size_t argi = 0; eidx + argi < num_args && argi < 32; ++argi) + { + size_t arg = eidx + argi; + uint32_t tagged_arg = target->arg_types[arg]; + tagged_arg <<= 16; + tagged_arg |= args[arg]; + ext.typed_temps[argi] = tagged_arg; + } + write_insn(&(tagged_arm_insn_t) + {.type = AABSEXT, .value = (arm_insn_t) + {.abs_call_ext = ext}}, insn_stream); + } + i += (num_args + 3) / 4; + } + break; + case RET: + switch(f.ret_tp) + { + case BRILVOID: + break; + case BRILFLOAT: + write_insn(&(tagged_arm_insn_t) + {.type = AMOV, .value = (arm_insn_t) + {.mov = (mov_arm_insn_t) + {.dest = (arm_arg_tagged_t) + {.type = REG, .value = (arm_arg_t) + {.reg = D0}}, + .src = from_tmp(insn->norm_insn.arg1), + .is_float = true}}}, insn_stream); + break; + default: + write_insn(&(tagged_arm_insn_t) + {.type = AMOV, .value = (arm_insn_t) + {.mov = (mov_arm_insn_t) + {.dest = (arm_arg_tagged_t) + {.type = REG, .value = (arm_arg_t) + {.reg = X0}}, + .src = from_tmp(insn->norm_insn.arg1), + .is_float = false}}}, insn_stream); + } + arm_insn_t ins; + sprintf(ins.other, "\tb\t.L%s.ret", f.name); + write_insn(&(tagged_arm_insn_t) + {.type = AOTHER, .value = ins}, insn_stream); + break; + case JMP: + { + arm_insn_t ins; + sprintf(ins.other, "\tb\t.LF%s%x", f.name, insn->norm_insn.dest); + write_insn(&(tagged_arm_insn_t) + {.type = AOTHER, .value = ins}, insn_stream); + } + break; + case BR: + { + tagged_arm_insn_t ins = (tagged_arm_insn_t) + {.type = ACBZ, .value = (arm_insn_t) + {.cbz = (cbz_arm_insn_t) + {.cond = from_tmp(insn->br_inst.test)}}}; + sprintf(ins.value.cbz.dest, ".LF%s%x", f.name, insn->br_inst.lfalse); + write_insn(&ins, insn_stream); + ins = (tagged_arm_insn_t) {.type = AOTHER}; + sprintf(ins.value.other, "\tb\t.LF%s%x", f.name, insn->br_inst.ltrue); + write_insn(&ins, insn_stream); + } + break; + case ID: + write_insn(&(tagged_arm_insn_t) + {.type = AMOV, .value = (arm_insn_t) + {.mov = (mov_arm_insn_t) + {.dest = from_tmp(insn->norm_insn.dest), + .src = from_tmp(insn->norm_insn.arg1), + .is_float = insn->norm_insn.arg2 == BRILFLOAT}}}, insn_stream); + break; + case ALLOC: + write_insn(&(tagged_arm_insn_t) + {.type = ANORM, .value = (arm_insn_t) + {.norm = (norm_arm_insn_t) + {.op = ALSL, + .dest = (arm_arg_tagged_t) + {.type = REG, .value = (arm_arg_t) + {.reg = X0}}, + .a1 = from_tmp(insn->norm_insn.arg1), + .a2 = from_const(3)}}}, insn_stream); + write_insn(&(tagged_arm_insn_t) + {.type = ACALL, .value = (arm_insn_t) + {.call = (call_arm_insn_t) + {.name = "malloc"}}}, insn_stream); + write_insn(&(tagged_arm_insn_t) + {.type = AMOV, .value = (arm_insn_t) + {.mov = (mov_arm_insn_t) + {.dest = from_tmp(insn->norm_insn.dest), + .src = (arm_arg_tagged_t) + {.type = REG, .value = (arm_arg_t) + {.reg = X0}}, + .is_float = false}}}, insn_stream); + break; + case FREE: + write_insn(&(tagged_arm_insn_t) + {.type = AMOV, .value = (arm_insn_t) + {.mov = (mov_arm_insn_t) + {.dest = (arm_arg_tagged_t) + {.type = REG, .value = (arm_arg_t) + {.reg = X0}}, + .src = from_tmp(insn->norm_insn.arg1)}}}, insn_stream); + write_insn(&(tagged_arm_insn_t) + {.type = ACALL, .value = (arm_insn_t) + {.call = (call_arm_insn_t) + {.name = "free"}}}, insn_stream); + break; + case STORE: + write_insn(&(tagged_arm_insn_t) + {.type = ASTR, .value = (arm_insn_t) + {.str = (str_arm_insn_t) + {.address = from_tmp(insn->norm_insn.arg1), + .value = from_tmp(insn->norm_insn.arg2)}}}, insn_stream); + break; + case LOAD: + write_insn(&(tagged_arm_insn_t) + {.type = ALDR, .value = (arm_insn_t) + {.ldr = (ldr_arm_insn_t) + {.address = from_tmp(insn->norm_insn.arg1), + .dest = from_tmp(insn->norm_insn.dest)}}}, insn_stream); + break; + case PTRADD: + write_insn(&(tagged_arm_insn_t) + {.type = ANORM, .value = (arm_insn_t) + {.norm = (norm_arm_insn_t) + {.op = AADD, + .dest = from_tmp(insn->norm_insn.dest), + .a1 = from_tmp(insn->norm_insn.arg1), + .a2 = from_tmp(insn->norm_insn.arg2), + .lsl = 3}}}, insn_stream); + break; + default: + fprintf(stderr, "unsupported opcode: %d\n", opcode); + } + } + tagged_arm_insn_t i = (tagged_arm_insn_t) + {.type = AOTHER, .value = (arm_insn_t) {}}; + sprintf(i.value.other, ".LF%s%lx:\n", f.name, f.num_insns); + write_insn(&i, insn_stream); + fclose(insn_stream); + asm_func_t fun = (asm_func_t) + {.num_insns = size_loc / sizeof(tagged_arm_insn_t), + .num_temps = f.num_tmps, + .num_args = f.num_args, + .ret_tp = f.ret_tp, + .insns = (asm_insn_t*) mem_stream}; + fun.arg_types = malloc(sizeof(uint16_t) * f.num_args); + memcpy(fun.arg_types, f.arg_types, sizeof(uint16_t) * f.num_args); + sprintf(fun.name, "%s", f.name); + return fun; +} + +asm_prog_t bytecode_to_abs_asm(program_t *prog) +{ + asm_func_t *funcs = malloc(sizeof(asm_func_t) * prog->num_funcs); + for(size_t i = 0; i < prog->num_funcs; ++i) + { + funcs[i] = trans_func(prog, i); + } + asm_prog_t p; + p.funcs = funcs; + p.num_funcs = prog->num_funcs; + return p; +} + +#endif diff --git a/fastbril/src/asm/to_abstract_asm.h b/fastbril/src/asm/to_abstract_asm.h new file mode 100644 index 000000000..86d1b7d79 --- /dev/null +++ b/fastbril/src/asm/to_abstract_asm.h @@ -0,0 +1,16 @@ +#ifndef TO_ABS_H +#define TO_ABS_H + +#include "../bril-insns/instrs.h" +#include "asm.h" + + +//#define __ARM_ARCH +#ifdef __ARM_ARCH +#include "armv8.h" +#endif + +asm_prog_t bytecode_to_abs_asm(program_t *prog); + + +#endif diff --git a/fastbril/src/asm/trivial-regalloc.c b/fastbril/src/asm/trivial-regalloc.c new file mode 100644 index 000000000..17502bc4e --- /dev/null +++ b/fastbril/src/asm/trivial-regalloc.c @@ -0,0 +1,586 @@ +#include "trivial-regalloc.h" + +#include "../bril-insns/types.h" + +#include +#include +#include + +#ifdef __ARM_ARCH + +static inline arm_arg_tagged_t from_tmp(uint16_t tmp) +{ + return (arm_arg_tagged_t) {.type = TMP, .value = (arm_arg_t) {.tmp = tmp}}; +} + +static inline arm_arg_tagged_t from_reg(arm_reg_t r) +{ + return (arm_arg_tagged_t) {.type = REG, .value = (arm_arg_t) {.reg = r}}; +} + +static inline arm_arg_tagged_t from_const(int16_t num) +{ + return (arm_arg_tagged_t) {.type = CNST, .value = (arm_arg_t) {.cnst = num}}; +} + +static inline arm_arg_tagged_t from_uconst(uint16_t num) +{ + return (arm_arg_tagged_t) {.type = CNST, .value = (arm_arg_t) {.cnst = num}}; +} + + +static inline int max(int a, int b) +{ + return a > b ? a : b; +} + + +static inline tagged_arm_insn_t mov(arm_reg_t dest, arm_arg_tagged_t src) +{ + switch(src.type) + { + case REG: + return (tagged_arm_insn_t) + {.type = AMOV, .value = (arm_insn_t) + {.mov = (mov_arm_insn_t) + {.dest = from_reg(dest), .src = src}}}; + case CNST: + return (tagged_arm_insn_t) + {.type = AMOV, .value = (arm_insn_t) + {.mov = (mov_arm_insn_t) + {.dest = from_reg(dest), .src = src}}}; + case TMP: + return (tagged_arm_insn_t) + {.type = ALDR, .value = (arm_insn_t) + {.ldr = (ldr_arm_insn_t) + {.dest = from_reg(dest), + .address = from_reg(SP), + .offset = 16 + src.value.tmp * 8}}}; + } +} + +arm_arg_tagged_t float_arg_reg(int n) +{ + switch (n) + { + case 0: return from_reg(D0); + case 1: return from_reg(D1); + case 2: return from_reg(D2); + case 3: return from_reg(D3); + case 4: return from_reg(D4); + case 5: return from_reg(D5); + case 6: return from_reg(D6); + case 7: return from_reg(D7); + } + fprintf(stderr, "invalid float arg %d\n", n); + exit(1); +} + + +arm_arg_tagged_t norm_arg_reg(int n) +{ + switch (n) + { + case 0: return from_reg(X0); + case 1: return from_reg(X1); + case 2: return from_reg(X2); + case 3: return from_reg(X3); + case 4: return from_reg(X4); + case 5: return from_reg(X5); + case 6: return from_reg(X6); + case 7: return from_reg(X7); + } + fprintf(stderr, "invalid regular arg %d\n", n); + //exit(1); +} + +static inline tagged_arm_insn_t movd(arm_arg_tagged_t dest, arm_reg_t src) +{ + switch(dest.type) + { + case REG: + return (tagged_arm_insn_t) + {.type = AMOV, .value = (arm_insn_t) + {.mov = (mov_arm_insn_t) + {.dest = dest, .src = from_reg(src)}}}; + case TMP: + return (tagged_arm_insn_t) + {.type = ASTR, .value = (arm_insn_t) + {.str = (str_arm_insn_t) + {.value = from_reg(src), + .address = from_reg(SP), + .offset = 16 + dest.value.tmp * 8}}}; + default: + fprintf(stderr, "cannot move into a constant\n"); + exit(1); + } +} + +void write_insn(asm_insn_t insn, FILE *insn_stream) +{ + fwrite(&insn, INSN_SIZE, 1, insn_stream); +} + +static inline bool reg_eql(arm_reg_t reg, arm_arg_tagged_t arg) +{ + return arg.type == REG && arg.value.reg == reg; +} + +bool is_float(norm_arm_op_t op) +{ + switch (op) + { + case AFADD: + case AFMUL: + case AFSUB: + case AFDIV: + return true; + default: + return false; + } +} + +void trans_const(FILE *insn_stream, + arm_arg_tagged_t dest, int64_t value) +{ + write_insn((tagged_arm_insn_t) {.type = AMOV, + .value = (arm_insn_t) + {.mov = (mov_arm_insn_t) + {.dest = from_reg(X0), + .src = from_uconst(value & 0xffff)}}}, + insn_stream); + int64_t tmp = value >> 16; + for(size_t i = 1; i < 4; ++i) + { + if(tmp & 0xffff) + write_insn((tagged_arm_insn_t) {.type = AMOVK, + .value = (arm_insn_t) + {.movk = (movk_arm_insn_t) + {.dest = from_reg(X0), + .val = tmp & 0xffff, + .lsl = i * 16}}}, insn_stream); + tmp >>= 16; + } + write_insn(movd(dest, X0), insn_stream); +} + + +size_t trivial_prologue(asm_func_t f, FILE *insn_stream) +{ + size_t stack_offset = 8 * f.num_temps + 8; + bool is_main = strcmp(f.name, "main") == 0; + if(is_main && f.num_args != 0) + stack_offset += 8; + if(stack_offset % 16 != 0) + stack_offset += 8; + if(stack_offset < 512) + { + tagged_arm_insn_t i = (tagged_arm_insn_t) + {.type = AOTHER, .value = (arm_insn_t) {}}; + sprintf(i.value.other, "\tstp\tx29, x30, [sp, -%ld]!", stack_offset); + write_insn(i, insn_stream); + } else + { + write_insn((tagged_arm_insn_t) + {.type = ANORM, .value = (arm_insn_t) + {.norm = (norm_arm_insn_t) + {.op = ASUB, .dest = from_reg(SP), + .a1 = from_reg(SP), .a2 = from_const(stack_offset)}}}, insn_stream); + tagged_arm_insn_t i = (tagged_arm_insn_t) + {.type = AOTHER, .value = (arm_insn_t) {}}; + sprintf(i.value.other, "\tstp\tx29, x30, [sp]"); + write_insn(i, insn_stream); + } + write_insn(mov(X29, from_reg(SP)), insn_stream); + if(is_main && f.num_args != 0) + { + write_insn((tagged_arm_insn_t) + {.type = ASTR, .value = (arm_insn_t) + {.str = (str_arm_insn_t) + {.value = from_reg(X19), + .address = from_reg(SP), + .offset = stack_offset - 16}}}, insn_stream); + write_insn(mov(X19, from_reg(X1)), insn_stream); + for(size_t i = 0; i < f.num_args; ++i) + { + write_insn((tagged_arm_insn_t) + {.type = ALDR, .value = (arm_insn_t) + {.ldr = (ldr_arm_insn_t) + {.dest = from_reg(X0), + .address = from_reg(X19), + .offset = (i + 1) * 8}}}, insn_stream); + switch(f.arg_types[i]) + { + case BRILINT: + write_insn((tagged_arm_insn_t) + {.type = ACALL, .value = (arm_insn_t) + {.call = (call_arm_insn_t) + {.name = "int_of_string"}}}, insn_stream); + write_insn((tagged_arm_insn_t) + {.type = ASTR, .value = (arm_insn_t) + {.str = (str_arm_insn_t) + {.value = from_reg(X0), + .address = from_reg(SP), + .offset = 16 + i * 8}}}, insn_stream); + break; + case BRILBOOL: + write_insn((tagged_arm_insn_t) + {.type = ACALL, .value = (arm_insn_t) + {.call = (call_arm_insn_t) + {.name = "bool_of_string"}}}, insn_stream); + write_insn((tagged_arm_insn_t) + {.type = ASTR, .value = (arm_insn_t) + {.str = (str_arm_insn_t) + {.value = from_reg(X0), + .address = from_reg(SP), + .offset = 16 + i * 8}}}, insn_stream); + break; + case BRILFLOAT: + write_insn((tagged_arm_insn_t) + {.type = ACALL, .value = (arm_insn_t) + {.call = (call_arm_insn_t) + {.name = "float_of_string"}}}, insn_stream); + write_insn((tagged_arm_insn_t) + {.type = ASTR, .value = (arm_insn_t) + {.str = (str_arm_insn_t) + {.value = from_reg(D0), + .address = from_reg(SP), + .offset = 16 + i * 8}}}, insn_stream); + break; + default: + fprintf(stderr, "main cannot have pointer arguments! Exiting.\n"); + exit(1); + } + } + } else + { + size_t double_args = 0, other_args = 0, spilled_args = 0; + /* args passed in memory */ + for(size_t i = 0; i < f.num_args; ++i) + { + if(f.arg_types[i] == BRILFLOAT) + { + if(double_args < 7) + ++double_args; + else + { + write_insn((tagged_arm_insn_t) + {.type = ALDR, .value = (arm_insn_t) + {.ldr = (ldr_arm_insn_t) + {.dest = from_reg(D9), + .address = from_reg(SP), + .offset = stack_offset + 8 * spilled_args++}}}, + insn_stream); + write_insn(movd(from_tmp(++double_args), D9), insn_stream); + } + } else + { + if(other_args < 7) + ++other_args; + else + { + write_insn((tagged_arm_insn_t) + {.type = ALDR, .value = (arm_insn_t) + {.ldr = (ldr_arm_insn_t) + {.dest = from_reg(X9), + .address = from_reg(SP), + .offset = stack_offset + 8 * spilled_args++}}}, + insn_stream); + write_insn(movd(from_tmp(++other_args), X9), insn_stream); + } + } + } + double_args = 0; other_args = 0; + for(size_t i = 0; i < f.num_args; ++i) + { + if(f.arg_types[i] == BRILFLOAT) + { + if(double_args < 8) + write_insn(movd(from_tmp(i), float_arg_reg(double_args++).value.reg), + insn_stream); + } else + { + if(other_args < 8) + write_insn(movd(from_tmp(i), norm_arg_reg(other_args++).value.reg), + insn_stream); + } + } + } + return stack_offset; +} + + +void trivial_epilogue(asm_func_t f, FILE *insn_stream, size_t stack_offset) +{ + tagged_arm_insn_t i = (tagged_arm_insn_t) + {.type = AOTHER, .value = (arm_insn_t) {}}; + sprintf(i.value.other, ".L%s.ret:", f.name); + write_insn(i, insn_stream); + bool is_main = strcmp(f.name, "main") == 0; + if(is_main && f.num_args != 0) + write_insn((tagged_arm_insn_t) + {.type = ALDR, .value = (arm_insn_t) + {.ldr = (ldr_arm_insn_t) + {.dest = from_reg(X19), + .address = from_reg(SP), + .offset = stack_offset - 16}}}, insn_stream); + if(is_main && f.ret_tp == BRILVOID) + write_insn((tagged_arm_insn_t) + {.type = AOTHER, .value = (arm_insn_t) + {.other = "\tmov\tw0, 0"}}, insn_stream); + if(stack_offset < 512) + { + tagged_arm_insn_t i = (tagged_arm_insn_t) + {.type = AOTHER, .value = (arm_insn_t) {}}; + sprintf(i.value.other, "\tldp\tx29, x30, [sp], %ld\n", stack_offset); + write_insn(i, insn_stream); + } else + { + write_insn((tagged_arm_insn_t) + {.type = AOTHER, .value = (arm_insn_t) + {.other = "\tldp\tx29, x30, [sp]"}}, insn_stream); + write_insn((tagged_arm_insn_t) + {.type = ANORM, .value = (arm_insn_t) + {.norm = (norm_arm_insn_t) + {.op = AADD, .dest = from_reg(SP), + .a1 = from_reg(SP), .a2 = from_const(stack_offset)}}}, insn_stream); + } + write_insn((tagged_arm_insn_t) + {.type = AOTHER, .value = (arm_insn_t) + {.other = "ret"}}, insn_stream); +} + +asm_func_t allocate(asm_prog_t *p, size_t which_fun) +{ + char *mem_stream; + size_t size_loc; + FILE *insn_stream = open_memstream(&mem_stream, &size_loc); + asm_func_t f = p->funcs[which_fun]; + size_t stack_offset = trivial_prologue(f, insn_stream); + for(size_t i = 0; i < f.num_insns; ++i) + { + tagged_arm_insn_t insn = f.insns[i]; + switch(insn.type) + { + case ANORM: + { + arm_reg_t r1 = is_float(insn.value.norm.op) ? D0 : X0; + arm_reg_t r2 = is_float(insn.value.norm.op) ? D1 : X1; + bool is_weird = reg_eql(r1, insn.value.norm.a2); + if(is_weird) + write_insn(mov(r2, insn.value.norm.a2), insn_stream); + write_insn(mov(r1, insn.value.norm.a1), insn_stream); + if(!is_weird) + write_insn(mov(r2, insn.value.norm.a2), insn_stream); + write_insn((tagged_arm_insn_t) + {.type = ANORM, .value = (arm_insn_t) + {.norm = (norm_arm_insn_t) + {.op = insn.value.norm.op, + .a1 = from_reg(r1), + .a2 = from_reg(r2), + .dest = from_reg(r1), + .lsl = insn.value.norm.lsl}}}, insn_stream); + write_insn(movd(insn.value.norm.dest, r1), insn_stream); + } break; + case ACMP: + { + bool is_float = insn.value.cmp.is_float; + arm_reg_t r1 = is_float ? D0 : X0; + arm_reg_t r2 = is_float ? D1 : X1; + bool is_weird = reg_eql(r1, insn.value.cmp.a2); + if(is_weird) + write_insn(mov(r2, insn.value.cmp.a2), insn_stream); + write_insn(mov(r1, insn.value.cmp.a1), insn_stream); + if(!is_weird) + write_insn(mov(r2, insn.value.cmp.a2), insn_stream); + write_insn((tagged_arm_insn_t) + {.type = ACMP, .value = (arm_insn_t) + {.cmp = (cmp_arm_insn_t) + {.is_float = is_float, + .a1 = from_reg(r1), + .a2 = from_reg(r2)}}}, insn_stream); + } break; + case ASET: + write_insn((tagged_arm_insn_t) + {.type = ASET, .value = (arm_insn_t) + {.set = (set_arm_insn_t) + {.dest = from_reg(X0), + .flag = insn.value.set.flag}}}, insn_stream); + write_insn(movd(insn.value.set.dest, X0), insn_stream); + break; + case AMOV: + if(insn.value.mov.dest.type == REG) + { + write_insn(mov(insn.value.mov.dest.value.reg, + insn.value.mov.src), insn_stream); + } else + { + arm_reg_t reg = insn.value.mov.is_float ? D0 : X0; + write_insn(mov(reg, insn.value.mov.src), insn_stream); + write_insn(movd(insn.value.mov.dest, reg), insn_stream); + } + break; + case AMOVC: + trans_const(insn_stream, insn.value.movc.dest, insn.value.movc.val); + break; + case ACBZ: + write_insn(mov(X0, insn.value.cbz.cond), insn_stream); + tagged_arm_insn_t in = (tagged_arm_insn_t) + {.type = ACBZ, .value = (arm_insn_t) + {.cbz = (cbz_arm_insn_t) + {.cond = from_reg(X0)}}}; + sprintf(in.value.cbz.dest, "%s", insn.value.cbz.dest); + write_insn(in, insn_stream); + break; + case AABSCALL: + { + uint16_t num_args = insn.value.abs_call.num_args; + uint32_t *typed_args = alloca(sizeof(uint32_t) * num_args); + for(size_t x = 0; x < num_args; x += 32) + { + for(size_t argi = 0; x * 32 + argi < num_args && argi < 32; ++argi) + typed_args[x * 32 + argi] = f.insns[i + 1 + x/32] + .value.abs_call_ext.typed_temps[argi]; + ++i; + } + uint16_t norm_args = 0, float_args = 0; + for(uint16_t i = 0; i < num_args; ++i) + { + if(typed_args[i] >> 16 == BRILFLOAT) + ++float_args; + else + ++norm_args; + } + int norm_stack_needed = max((norm_args - 9) * 8, 0); + int float_stack_needed = max((float_args - 8) * 8, 0); + int stack_needed = norm_stack_needed + float_stack_needed; + if(stack_needed % 16 != 0) stack_needed += 8; + if(stack_needed) + write_insn((tagged_arm_insn_t) + {.type = ANORM, .value = (arm_insn_t) + {.norm = (norm_arm_insn_t) + {.op = ASUB, .dest = from_reg(SP), + .a1 = from_reg(SP), .a2 = from_const(stack_needed)}}}, + insn_stream); + int norm_args_left = norm_args; + int float_args_left = float_args; + int stack_args_left = max(norm_args - 8, 0) + max(float_args - 8, 0) - 1; + for(int argidx = num_args - 1; argidx >= 0; --argidx) + { + int *left; + arm_arg_tagged_t (*get_arg)(int); + if(typed_args[argidx] >> 16 == BRILFLOAT) + { + left = &float_args_left; + get_arg = &float_arg_reg; + } else + { + left = &norm_args_left; + get_arg = &norm_arg_reg; + } + if(*left > 8) + { + write_insn((tagged_arm_insn_t) + {.type = ALDR, .value = (arm_insn_t) + {.ldr = (ldr_arm_insn_t) + {.dest = from_reg(X0), + .address = from_reg(SP), + .offset = 16 + 8 * (typed_args[argidx] & 0xffff) + + stack_needed}}}, insn_stream); + write_insn((tagged_arm_insn_t) + {.type = ASTR, .value = (arm_insn_t) + {.str = (str_arm_insn_t) + {.value = from_reg(X0), + .address = from_reg(SP), + .offset = stack_args_left * 8}}}, insn_stream); + } else + { + write_insn((tagged_arm_insn_t) + {.type = ALDR, .value = (arm_insn_t) + {.ldr = (ldr_arm_insn_t) + {.dest = get_arg(*left - 1), + .address = from_reg(SP), + .offset = 16 + 8 * (typed_args[argidx] & 0xffff) + + stack_needed}}}, insn_stream); + } + --(*left); + --stack_args_left; + } + tagged_arm_insn_t call_insn = (tagged_arm_insn_t) + {.type = ACALL}; + sprintf(call_insn.value.call.name, "%s", insn.value.abs_call.name); + write_insn(call_insn, insn_stream); + if(stack_needed) + write_insn((tagged_arm_insn_t) + {.type = ANORM, .value = (arm_insn_t) + {.norm = (norm_arm_insn_t) + {.op = AADD, .dest = from_reg(SP), + .a1 = from_reg(SP), .a2 = from_const(stack_needed)}}}, + insn_stream); + if(insn.value.abs_call.ret_tp == BRILFLOAT) + write_insn(movd(from_tmp(insn.value.abs_call.dest), D0), insn_stream); + else if(insn.value.abs_call.ret_tp != BRILVOID) + write_insn(movd(from_tmp(insn.value.abs_call.dest), X0), insn_stream); + } break; + case ALDR: + write_insn(mov(X0, insn.value.ldr.address), insn_stream); + write_insn((tagged_arm_insn_t) + {.type = ALDR, .value = (arm_insn_t) + {.ldr = (ldr_arm_insn_t) + {.dest = from_reg(X0), + .address = from_reg(X0), + .offset = insn.value.ldr.offset}}}, insn_stream); + write_insn(movd(insn.value.ldr.dest, X0), insn_stream); + break; + case ASTR: + write_insn(mov(X0, insn.value.str.value), insn_stream); + write_insn(mov(X1, insn.value.str.address), insn_stream); + write_insn((tagged_arm_insn_t) + {.type = ASTR, .value = (arm_insn_t) + {.str = (str_arm_insn_t) + {.value = from_reg(X0), + .address = from_reg(X1), + .offset = insn.value.str.offset}}}, insn_stream); + break; + case AMOVK: + case AOTHER: + case ACALL: + write_insn(insn, insn_stream); + break; + default: + fprintf(stderr, "bad insn type: %d\n", insn.type); + exit(1); + } + } + trivial_epilogue(f, insn_stream, stack_offset); + fclose(insn_stream); + asm_func_t ret; + sprintf(ret.name, "%s", f.name); + ret.insns = (asm_insn_t*) mem_stream; + ret.num_insns = size_loc / sizeof(tagged_arm_insn_t); + /* other metadata no longer needed since not abstract*/ + return ret; +} + +asm_prog_t triv_allocate(asm_prog_t p) +{ + asm_func_t *funs = malloc(sizeof(asm_func_t) * p.num_funcs); + + for(size_t i = 0; i < p.num_funcs; ++i) + { + funs[i] = allocate(&p, i); + } + return (asm_prog_t) + {.funcs = funs, + .num_funcs = p.num_funcs}; +} + +#else + +asm_prog_t triv_allocate(asm_prog_t p) +{ + fprintf(stderr, "arch not supported"); + exit(1); +} + +#endif diff --git a/fastbril/src/asm/trivial-regalloc.h b/fastbril/src/asm/trivial-regalloc.h new file mode 100644 index 000000000..b06c5eb95 --- /dev/null +++ b/fastbril/src/asm/trivial-regalloc.h @@ -0,0 +1,7 @@ +#ifndef TRIVREG_H +#define TRIVREG_H +#include "asm.h" + +asm_prog_t triv_allocate(asm_prog_t prog); + +#endif diff --git a/fastbril/src/bril-insns/base.h b/fastbril/src/bril-insns/base.h new file mode 100644 index 000000000..0e487e6f8 --- /dev/null +++ b/fastbril/src/bril-insns/base.h @@ -0,0 +1,21 @@ +#define CONST 1 +#define ADD 2 +#define MUL 3 +#define SUB 4 +#define DIV 5 +#define EQ 6 +#define LT 7 +#define GT 8 +#define LE 9 +#define GE 10 +#define NOT 11 +#define AND 12 +#define OR 13 +#define JMP 14 +#define BR 15 +#define CALL 16 +#define RET 17 +#define PRINT 18 +#define LCONST 19 +#define NOP 20 +#define ID 21 diff --git a/fastbril/src/bril-insns/float.h b/fastbril/src/bril-insns/float.h new file mode 100644 index 000000000..076e438f7 --- /dev/null +++ b/fastbril/src/bril-insns/float.h @@ -0,0 +1,9 @@ +#define FADD 28 +#define FMUL 29 +#define FSUB 30 +#define FDIV 31 +#define FEQ 32 +#define FLT 33 +#define FLE 34 +#define FGT 35 +#define FGE 36 diff --git a/fastbril/src/bril-insns/instrs.c b/fastbril/src/bril-insns/instrs.c new file mode 100644 index 000000000..4aef0bf1e --- /dev/null +++ b/fastbril/src/bril-insns/instrs.c @@ -0,0 +1,92 @@ +#include "instrs.h" +#include "stdio.h" +#include "string.h" + +inline uint16_t get_opcode(const instruction_t i) +{ + return i.norm_insn.opcode_lbled & 0x7fff; +} +inline bool is_labelled(const instruction_t i) +{ + return i.norm_insn.opcode_lbled & 0x8000; +} + +void free_program(program_t *prog) +{ + for (size_t i = 0; i < prog->num_funcs; ++i) + { + free(prog->funcs[i].name); + free(prog->funcs[i].insns); + free(prog->funcs[i].arg_types); + } + free(prog); +} + +char type_to_char[5] = {'i', 'b', 'f', 'p', 'v'}; + +#define TEST_OP(s, o) \ + if (o == op) \ + { \ + return s; \ + } +/** + * this COULD be an array lookup, but this option gives more flexibility, + * and the performance hit is only in parsing + */ +char *opcode_to_string(uint16_t op) +{ + TEST_OP("nop", NOP); + TEST_OP("const", CONST); + TEST_OP("add", ADD); + TEST_OP("mul", MUL); + TEST_OP("mul", MUL); + TEST_OP("sub", SUB); + TEST_OP("div", DIV); + TEST_OP("eq", EQ); + TEST_OP("lt", LT); + TEST_OP("gt", GT); + TEST_OP("le", LE); + TEST_OP("ge", GE); + TEST_OP("not", NOT); + TEST_OP("and", AND); + TEST_OP("or", OR); + TEST_OP("jmp", JMP); + TEST_OP("br", BR); + TEST_OP("call", CALL); + TEST_OP("ret", RET); + TEST_OP("print", PRINT); + TEST_OP("phi", PHI); + TEST_OP("alloc", ALLOC); + TEST_OP("free", FREE); + TEST_OP("store", STORE); + TEST_OP("load", LOAD); + TEST_OP("ptradd", PTRADD); + TEST_OP("fadd", FADD); + TEST_OP("fmul", FMUL); + TEST_OP("fsub", FSUB); + TEST_OP("fdiv", FDIV); + TEST_OP("feq", FEQ); + TEST_OP("flt", FLT); + TEST_OP("fle", FLE); + TEST_OP("fgt", FGT); + TEST_OP("fge", FGE); + TEST_OP("id", ID); + return ""; +} + +uint16_t ptr_depth(briltp tp) { return tp >> 2; } + +uint16_t base_type(briltp tp) { return tp & 0b11; } + +briltp *get_main_types(program_t *prog) +{ + for (size_t i = 0; i < prog->num_funcs; ++i) + { + if (strcmp(prog->funcs[i].name, "main") == 0) + { + return prog->funcs[i].arg_types; + } + } + fprintf(stderr, "no main function found! exiting.\n"); + return 0; +} diff --git a/fastbril/src/bril-insns/instrs.h b/fastbril/src/bril-insns/instrs.h new file mode 100644 index 000000000..0acb92b9a --- /dev/null +++ b/fastbril/src/bril-insns/instrs.h @@ -0,0 +1,158 @@ +#ifndef INSTRS_H +#define INSTRS_H +#include +#include +#include + +#include "base.h" +#include "float.h" +#include "mem.h" +#include "ssa.h" +#include "types.h" +/** + * see the documentation for instruction layouts + */ + +typedef struct norm_instruction +{ + uint16_t opcode_lbled; + uint16_t dest; + uint16_t arg1; + uint16_t arg2; +} norm_instruction_t; + +typedef struct br_inst +{ + uint16_t opcode_lbled; + uint16_t test; + uint16_t ltrue; + uint16_t lfalse; +} br_inst_t; + +typedef struct call_inst +{ + uint16_t opcode_lbled; + uint16_t dest; + uint16_t num_args; + uint16_t target; +} call_inst_t; + +typedef struct call_args +{ + uint16_t args[4]; +} call_args_t; + +typedef struct phi_inst +{ + uint16_t opcode_lbled; + uint16_t dest; + uint16_t num_choices; + uint16_t __unused; +} phi_inst_t; + +typedef struct phi_extension +{ + uint16_t lbl1; + uint16_t val1; + uint16_t lbl2; + uint16_t val2; +} phi_extension_t; + +typedef struct const_instr +{ + uint16_t opcode_lbled; + uint16_t dest; + int32_t value; +} const_instr_t; + +typedef struct long_const_instr +{ + uint16_t opcode_lbled; + uint16_t dest; + uint16_t type; + uint16_t __unused; +} long_const_instr_t; + +typedef union const_extn +{ + int64_t int_val; + double float_val; +} const_extn_t; + +typedef struct print_instr +{ + uint16_t opcode_lbled; + uint16_t num_prints; + uint16_t type1; + uint16_t arg1; +} print_instr_t; + +typedef struct print_args +{ + uint16_t type1; + uint16_t arg1; + uint16_t type2; + uint16_t arg2; +} print_args_t; + +typedef union instruction +{ + norm_instruction_t norm_insn; + br_inst_t br_inst; + phi_inst_t phi_inst; + phi_extension_t phi_ext; + const_instr_t const_insn; + long_const_instr_t long_const_insn; + const_extn_t const_ext; + print_instr_t print_insn; + print_args_t print_args; + call_inst_t call_inst; + call_args_t call_args; +} instruction_t; + + + +typedef uint16_t briltp; + +typedef struct function +{ + char *name; + size_t num_args; + briltp *arg_types; + briltp ret_tp; + size_t num_insns; + size_t num_tmps; + instruction_t *insns; +} function_t; + + +typedef struct program +{ + size_t num_funcs; + function_t funcs[]; +} program_t; + +void free_program(program_t *prog); + +/** + * convenience functions for bit fiddling + */ +uint16_t get_opcode(const instruction_t); +bool is_labelled(const instruction_t i); + +/** + * take an encoded opcode and go back to the original string representation + */ +char *opcode_to_string(uint16_t); + + +uint16_t ptr_depth(briltp); +uint16_t base_type(briltp); + +briltp *get_main_types(program_t *prog); + +extern char type_to_char[]; + + + +#endif diff --git a/fastbril/src/bril-insns/mem.h b/fastbril/src/bril-insns/mem.h new file mode 100644 index 000000000..d65bb6089 --- /dev/null +++ b/fastbril/src/bril-insns/mem.h @@ -0,0 +1,5 @@ +#define ALLOC 23 +#define FREE 24 +#define STORE 25 +#define LOAD 26 +#define PTRADD 27 diff --git a/fastbril/src/bril-insns/ssa.h b/fastbril/src/bril-insns/ssa.h new file mode 100644 index 000000000..7c38d2a90 --- /dev/null +++ b/fastbril/src/bril-insns/ssa.h @@ -0,0 +1 @@ +#define PHI 22 diff --git a/fastbril/src/bril-insns/types.h b/fastbril/src/bril-insns/types.h new file mode 100644 index 000000000..6ffd0adc3 --- /dev/null +++ b/fastbril/src/bril-insns/types.h @@ -0,0 +1,4 @@ +#define BRILINT 0 +#define BRILBOOL 1 +#define BRILFLOAT 2 +#define BRILVOID 3 diff --git a/fastbril/src/byte-io.c b/fastbril/src/byte-io.c new file mode 100644 index 000000000..2eb6fc1df --- /dev/null +++ b/fastbril/src/byte-io.c @@ -0,0 +1,56 @@ +#define _GNU_SOURCE +#include "byte-io.h" +#include + + + +void output_function(function_t *func, FILE *dest) +{ + fprintf(dest, "%s\n", func->name); + fwrite(&func->num_args, sizeof(size_t), 1, dest); + fwrite(func->arg_types, sizeof(briltp), func->num_args, dest); + fwrite(&func->ret_tp, sizeof(briltp), 1, dest); + fwrite(&func->num_insns, sizeof(size_t), 1, dest); + fwrite(&func->num_tmps, sizeof(size_t), 1, dest); + fwrite(func->insns, sizeof(instruction_t), func->num_insns, dest); +} + +void output_program(program_t *prog, FILE *dest) +{ + //printf("%ld\n", prog->num_funcs); + fwrite(&prog->num_funcs, sizeof(size_t), 1, dest); + for(size_t i = 0; i < prog->num_funcs; ++i) + { + output_function(&prog->funcs[i], dest); + } +} + +void read_function(function_t *dest, FILE *source) +{ + char *name = 0; + size_t len = 0; + len = getline(&name, &len, source); + name[len - 1] = 0; + dest->name = name; + fread(&dest->num_args, sizeof(size_t), 1, source); + dest->arg_types = malloc(sizeof(briltp) * dest->num_args); + fread(dest->arg_types, sizeof(briltp), dest->num_args, source); + fread(&dest->ret_tp, sizeof(briltp), 1, source); + fread(&dest->num_insns, sizeof(size_t), 1, source); + fread(&dest->num_tmps, sizeof(size_t), 1, source); + instruction_t *insns = malloc(sizeof(instruction_t) * dest->num_insns); + fread(insns, sizeof(instruction_t), dest->num_insns, source); + dest->insns = insns; +} + + +program_t *read_program(FILE *source) +{ + size_t num_funcs; + fread(&num_funcs, sizeof(size_t), 1, source); + program_t *prog = malloc(sizeof(program_t) + sizeof(function_t) * num_funcs); + prog->num_funcs = num_funcs; + for(size_t i = 0; i < num_funcs; ++i) + read_function(prog->funcs + i, source); + return prog; +} diff --git a/fastbril/src/byte-io.h b/fastbril/src/byte-io.h new file mode 100644 index 000000000..47902e9ff --- /dev/null +++ b/fastbril/src/byte-io.h @@ -0,0 +1,16 @@ +#ifndef BYTE_OUTPUT_H +#define BYTE_OUTPUT_H +#include "bril-insns/instrs.h" +#include + +/** + * emit prog as bytecode to dest + */ +void output_program(program_t *prog, FILE *dest); + +/** + * read source as bytecode and return a program + */ +program_t *read_program(FILE *source); + +#endif diff --git a/fastbril/src/interp/interp.c b/fastbril/src/interp/interp.c new file mode 100644 index 000000000..00f82841d --- /dev/null +++ b/fastbril/src/interp/interp.c @@ -0,0 +1,267 @@ +#include "interp.h" +#include +#include + +value_t interpret_insn(program_t *prog, size_t which_fun, + value_t *context, uint16_t *labels, + size_t *dyn_insns, size_t which_insn) +{ + while(which_insn < prog->funcs[which_fun].num_insns) + { + instruction_t *i = prog->funcs[which_fun].insns + which_insn; + size_t next_insn = which_insn + 1; + ++*dyn_insns; + if(is_labelled(*i)) + { + labels[1] = labels[0]; + labels[0] = which_insn; + } + switch(get_opcode(*i)) + { + case CONST: + context[i->const_insn.dest] = (value_t) {.int_val = i->const_insn.value}; + break; + case ADD: + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].int_val + + context[i->norm_insn.arg2].int_val}; + break; + case MUL: + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].int_val * + context[i->norm_insn.arg2].int_val}; + break; + case SUB: + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].int_val - + context[i->norm_insn.arg2].int_val}; + break; + case DIV: + if(context[i->norm_insn.arg2].int_val == 0) + { + fprintf(stderr, "Divide by 0. Exiting\n"); + exit(1); + } + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].int_val / + context[i->norm_insn.arg2].int_val}; + break; + case EQ: + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].int_val == + context[i->norm_insn.arg2].int_val ? 1 : 0}; + break; + case LT: + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].int_val < + context[i->norm_insn.arg2].int_val ? 1 : 0}; + break; + case GT: + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].int_val > + context[i->norm_insn.arg2].int_val ? 1 : 0}; + break; + case LE: + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].int_val <= + context[i->norm_insn.arg2].int_val ? 1 : 0}; + break; + case GE: + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].int_val >= + context[i->norm_insn.arg2].int_val ? 1 : 0}; + break; + case NOT: + context[i->norm_insn.dest] = (value_t) + {.int_val = 1 ^ context[i->norm_insn.arg1].int_val}; + break; + case AND: + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].int_val & + context[i->norm_insn.arg2].int_val}; + break; + case OR: + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].int_val | + context[i->norm_insn.arg2].int_val}; + break; + case JMP: + next_insn = i->norm_insn.dest; + break; + case BR: + next_insn = context[i->br_inst.test].int_val == 1 + ? i->br_inst.ltrue : i->br_inst.lfalse; + break; + case CALL: + { + value_t args[i->call_inst.num_args]; + for(size_t a = 0; a < i->call_inst.num_args; ++a) + args[a] = context[prog->funcs[which_fun].insns[which_insn + 1 + a / 4] + .call_args.args[a % 4]]; + next_insn += (i->call_inst.num_args + 3) / 4; + value_t tmp = interp_fun(prog, dyn_insns, i->call_inst.target, + args, i->call_inst.num_args); + if(i->call_inst.dest != 0xffff) + context[i->call_inst.dest] = tmp; + break; + } + case RET: + if(i->norm_insn.arg1 == 0xffff) + return (value_t) {.int_val = 0xffffffffffffffff}; + else + return context[i->norm_insn.arg1]; + case PRINT: + { + uint16_t *args = (uint16_t*) &(i->print_insn.type1); + for(size_t a = 0; a < i->print_insn.num_prints; ++a) + { + if(a != 0) + printf(" "); + switch(args[2 * a]) + { + case BRILBOOL: + printf("%s", context[args[2 * a + 1]].int_val ? "true" : "false"); + break; + case BRILINT: + printf("%ld", context[args[2 * a + 1]].int_val); + break; + case BRILFLOAT: + printf("%.17g", context[args[2 * a + 1]].float_val); + break; + default: + fprintf(stderr, "unrecognized type: %d. exiting.\n", args[2 * a]); + exit(1); + } + } + printf("\n"); + next_insn += (i->print_insn.num_prints) / 2; + break; + } + case LCONST: + context[i->long_const_insn.dest] = + *((value_t*) &prog->funcs[which_fun].insns[which_insn + 1]); + ++next_insn; + break; + case NOP: + break; + case ID: + context[i->norm_insn.dest] = context[i->norm_insn.arg1]; + break; + case ALLOC: + context[i->norm_insn.dest] = (value_t) + {.ptr_val = malloc(sizeof(value_t) * context[i->norm_insn.arg1].int_val)}; + break; + case FREE: + free(context[i->norm_insn.arg1].ptr_val); + break; + case STORE: + *(context[i->norm_insn.arg1].ptr_val) = context[i->norm_insn.arg2]; + break; + case LOAD: + context[i->norm_insn.dest] = *(context[i->norm_insn.arg1].ptr_val); + break; + case PTRADD: + context[i->norm_insn.dest] = (value_t) + {.ptr_val = context[i->norm_insn.arg1].ptr_val + + context[i->norm_insn.arg2].int_val}; + break; + case PHI: + { + size_t num_choices = i->phi_inst.num_choices; + next_insn += (num_choices + 1) / 2; + for(size_t a = 0; a < num_choices; ++a) + { + if (labels[1] == + ((uint16_t*) (&prog->funcs[which_fun] + .insns[which_insn + 1 + a/2].phi_ext))[(a % 2) * 2]) + context[i->phi_inst.dest] = + context[ + ((uint16_t*) (&prog->funcs[which_fun] + .insns[which_insn + 1 + a/2] + .phi_ext))[(a % 2) * 2 + 1]]; + } + break; + } + case FADD: + context[i->norm_insn.dest] = (value_t) + {.float_val = context[i->norm_insn.arg1].float_val + + context[i->norm_insn.arg2].float_val}; + break; + case FMUL: + context[i->norm_insn.dest] = (value_t) + {.float_val = context[i->norm_insn.arg1].float_val * + context[i->norm_insn.arg2].float_val}; + break; + case FSUB: + context[i->norm_insn.dest] = (value_t) + {.float_val = context[i->norm_insn.arg1].float_val - + context[i->norm_insn.arg2].float_val}; + break; + case FDIV: + context[i->norm_insn.dest] = (value_t) + {.float_val = context[i->norm_insn.arg1].float_val / + context[i->norm_insn.arg2].float_val}; + break; + case FEQ: + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].float_val == + context[i->norm_insn.arg2].float_val ? 1 : 0}; + break; + case FLT: + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].float_val < + context[i->norm_insn.arg2].float_val ? 1 : 0}; + break; + case FLE: + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].float_val <= + context[i->norm_insn.arg2].float_val ? 1 : 0}; + break; + case FGT: + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].float_val > + context[i->norm_insn.arg2].float_val ? 1 : 0}; + break; + case FGE: + context[i->norm_insn.dest] = (value_t) + {.int_val = context[i->norm_insn.arg1].float_val >= + context[i->norm_insn.arg2].float_val ? 1 : 0}; + break; + default: + fprintf(stderr, "unrecognized opcode: %d, exiting.\n", get_opcode(*i)); + exit(1); + } + which_insn = next_insn; + } + return (value_t) {.int_val = 0}; +} + + +void interp_main(program_t *prog, value_t *args, size_t num_args, bool count_insns) +{ + for(size_t i = 0; i < prog->num_funcs; ++i) + { + if(strcmp(prog->funcs[i].name, "main") == 0) + { + size_t dyn = 0; + interp_fun(prog, &dyn, i, args, num_args); + if(count_insns) + fprintf(stderr, "total_dyn_inst: %ld\n", dyn); + return; + } + } + fprintf(stderr, "no main function found! exiting.\n"); +} + + +value_t interp_fun(program_t *prog, size_t *dyn_insns, + size_t which_fun, value_t *args, size_t num_args) +{ + function_t *f = &prog->funcs[which_fun]; + value_t *context = malloc(sizeof(value_t) * f->num_tmps); + uint16_t labels[] = {0, 0}; + memcpy(context, args, sizeof(value_t) * num_args); + value_t tmp = interpret_insn(prog, which_fun, context, labels, dyn_insns, 0); + free(context); + return tmp; +} diff --git a/fastbril/src/interp/interp.h b/fastbril/src/interp/interp.h new file mode 100644 index 000000000..64a61c5ef --- /dev/null +++ b/fastbril/src/interp/interp.h @@ -0,0 +1,32 @@ +#ifndef INTERP_H +#define INTERP_H +#include +#include "../bril-insns/instrs.h" + +/** + * internal representation of bril types. + * we represent bools as ints, which is why they aren't here. + * this has the convenience of all types being the same size: 64 bits + */ +typedef union value +{ + int64_t int_val; + double float_val; + union value* ptr_val; +} value_t; + +/** + * interpret one function with args. + * dyn_insns is a pointer to a value which contains the number of + * instructions executed so far. + */ +value_t interp_fun(program_t *prog, size_t *dyn_insns, + size_t which_fun, value_t *args, size_t num_args); + +/** + * interpret the main function of prog. count_insns -> we keep track of dynamic + * instruction count + */ +void interp_main(program_t *prog, value_t *args, size_t num_args, bool count_insns); + +#endif diff --git a/fastbril/src/libs/hashmap.c b/fastbril/src/libs/hashmap.c new file mode 100644 index 000000000..b05231efc --- /dev/null +++ b/fastbril/src/libs/hashmap.c @@ -0,0 +1,936 @@ +// Copyright 2020 Joshua J Baker. All rights reserved. +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file. + +#include +#include +#include +#include +#include +#include "hashmap.h" + +static void *(*_malloc)(size_t) = NULL; +static void *(*_realloc)(void *, size_t) = NULL; +static void (*_free)(void *) = NULL; + +// hashmap_set_allocator allows for configuring a custom allocator for +// all hashmap library operations. This function, if needed, should be called +// only once at startup and a prior to calling hashmap_new(). +void hashmap_set_allocator(void *(*malloc)(size_t), void (*free)(void*)) +{ + _malloc = malloc; + _free = free; +} + +#define panic(_msg_) { \ + fprintf(stderr, "panic: %s (%s:%d)\n", (_msg_), __FILE__, __LINE__); \ + exit(1); \ +} + +struct bucket { + uint64_t hash:48; + uint64_t dib:16; +}; + +// hashmap is an open addressed hash map using robinhood hashing. +struct hashmap { + void *(*malloc)(size_t); + void *(*realloc)(void *, size_t); + void (*free)(void *); + bool oom; + size_t elsize; + size_t cap; + uint64_t seed0; + uint64_t seed1; + uint64_t (*hash)(const void *item, uint64_t seed0, uint64_t seed1); + int (*compare)(const void *a, const void *b, void *udata); + void (*elfree)(void *item); + void *udata; + size_t bucketsz; + size_t nbuckets; + size_t count; + size_t mask; + size_t growat; + size_t shrinkat; + void *buckets; + void *spare; + void *edata; +}; + +static struct bucket *bucket_at(struct hashmap *map, size_t index) { + return (struct bucket*)(((char*)map->buckets)+(map->bucketsz*index)); +} + +static void *bucket_item(struct bucket *entry) { + return ((char*)entry)+sizeof(struct bucket); +} + +static uint64_t get_hash(struct hashmap *map, const void *key) { + return map->hash(key, map->seed0, map->seed1) << 16 >> 16; +} + +// hashmap_new_with_allocator returns a new hash map using a custom allocator. +// See hashmap_new for more information information +struct hashmap *hashmap_new_with_allocator( + void *(*_malloc)(size_t), + void *(*_realloc)(void*, size_t), + void (*_free)(void*), + size_t elsize, size_t cap, + uint64_t seed0, uint64_t seed1, + uint64_t (*hash)(const void *item, + uint64_t seed0, uint64_t seed1), + int (*compare)(const void *a, const void *b, + void *udata), + void (*elfree)(void *item), + void *udata) +{ + _malloc = _malloc ? _malloc : malloc; + _realloc = _realloc ? _realloc : realloc; + _free = _free ? _free : free; + int ncap = 16; + if (cap < ncap) { + cap = ncap; + } else { + while (ncap < cap) { + ncap *= 2; + } + cap = ncap; + } + size_t bucketsz = sizeof(struct bucket) + elsize; + while (bucketsz & (sizeof(uintptr_t)-1)) { + bucketsz++; + } + // hashmap + spare + edata + size_t size = sizeof(struct hashmap)+bucketsz*2; + struct hashmap *map = _malloc(size); + if (!map) { + return NULL; + } + memset(map, 0, sizeof(struct hashmap)); + map->elsize = elsize; + map->bucketsz = bucketsz; + map->seed0 = seed0; + map->seed1 = seed1; + map->hash = hash; + map->compare = compare; + map->elfree = elfree; + map->udata = udata; + map->spare = ((char*)map)+sizeof(struct hashmap); + map->edata = (char*)map->spare+bucketsz; + map->cap = cap; + map->nbuckets = cap; + map->mask = map->nbuckets-1; + map->buckets = _malloc(map->bucketsz*map->nbuckets); + if (!map->buckets) { + _free(map); + return NULL; + } + memset(map->buckets, 0, map->bucketsz*map->nbuckets); + map->growat = map->nbuckets*0.75; + map->shrinkat = map->nbuckets*0.10; + map->malloc = _malloc; + map->realloc = _realloc; + map->free = _free; + return map; +} + + +// hashmap_new returns a new hash map. +// Param `elsize` is the size of each element in the tree. Every element that +// is inserted, deleted, or retrieved will be this size. +// Param `cap` is the default lower capacity of the hashmap. Setting this to +// zero will default to 16. +// Params `seed0` and `seed1` are optional seed values that are passed to the +// following `hash` function. These can be any value you wish but it's often +// best to use randomly generated values. +// Param `hash` is a function that generates a hash value for an item. It's +// important that you provide a good hash function, otherwise it will perform +// poorly or be vulnerable to Denial-of-service attacks. This implementation +// comes with two helper functions `hashmap_sip()` and `hashmap_murmur()`. +// Param `compare` is a function that compares items in the tree. See the +// qsort stdlib function for an example of how this function works. +// The hashmap must be freed with hashmap_free(). +// Param `elfree` is a function that frees a specific item. This should be NULL +// unless you're storing some kind of reference data in the hash. +struct hashmap *hashmap_new(size_t elsize, size_t cap, + uint64_t seed0, uint64_t seed1, + uint64_t (*hash)(const void *item, + uint64_t seed0, uint64_t seed1), + int (*compare)(const void *a, const void *b, + void *udata), + void (*elfree)(void *item), + void *udata) +{ + return hashmap_new_with_allocator( + (_malloc?_malloc:malloc), + (_realloc?_realloc:realloc), + (_free?_free:free), + elsize, cap, seed0, seed1, hash, compare, elfree, udata + ); +} + +static void free_elements(struct hashmap *map) { + if (map->elfree) { + for (size_t i = 0; i < map->nbuckets; i++) { + struct bucket *bucket = bucket_at(map, i); + if (bucket->dib) map->elfree(bucket_item(bucket)); + } + } +} + + +// hashmap_clear quickly clears the map. +// Every item is called with the element-freeing function given in hashmap_new, +// if present, to free any data referenced in the elements of the hashmap. +// When the update_cap is provided, the map's capacity will be updated to match +// the currently number of allocated buckets. This is an optimization to ensure +// that this operation does not perform any allocations. +void hashmap_clear(struct hashmap *map, bool update_cap) { + map->count = 0; + free_elements(map); + if (update_cap) { + map->cap = map->nbuckets; + } else if (map->nbuckets != map->cap) { + void *new_buckets = map->malloc(map->bucketsz*map->cap); + if (new_buckets) { + map->free(map->buckets); + map->buckets = new_buckets; + } + map->nbuckets = map->cap; + } + memset(map->buckets, 0, map->bucketsz*map->nbuckets); + map->mask = map->nbuckets-1; + map->growat = map->nbuckets*0.75; + map->shrinkat = map->nbuckets*0.10; +} + + +static bool resize(struct hashmap *map, size_t new_cap) { + struct hashmap *map2 = hashmap_new(map->elsize, new_cap, map->seed1, + map->seed1, map->hash, map->compare, + map->elfree, map->udata); + if (!map2) { + return false; + } + for (size_t i = 0; i < map->nbuckets; i++) { + struct bucket *entry = bucket_at(map, i); + if (!entry->dib) { + continue; + } + entry->dib = 1; + size_t j = entry->hash & map2->mask; + for (;;) { + struct bucket *bucket = bucket_at(map2, j); + if (bucket->dib == 0) { + memcpy(bucket, entry, map->bucketsz); + break; + } + if (bucket->dib < entry->dib) { + memcpy(map2->spare, bucket, map->bucketsz); + memcpy(bucket, entry, map->bucketsz); + memcpy(entry, map2->spare, map->bucketsz); + } + j = (j + 1) & map2->mask; + entry->dib += 1; + } + } + map->free(map->buckets); + map->buckets = map2->buckets; + map->nbuckets = map2->nbuckets; + map->mask = map2->mask; + map->growat = map2->growat; + map->shrinkat = map2->shrinkat; + map->free(map2); + return true; +} + +// hashmap_set inserts or replaces an item in the hash map. If an item is +// replaced then it is returned otherwise NULL is returned. This operation +// may allocate memory. If the system is unable to allocate additional +// memory then NULL is returned and hashmap_oom() returns true. +void *hashmap_set(struct hashmap *map, void *item) { + if (!item) { + panic("item is null"); + } + map->oom = false; + if (map->count == map->growat) { + if (!resize(map, map->nbuckets*2)) { + map->oom = true; + return NULL; + } + } + + + struct bucket *entry = map->edata; + entry->hash = get_hash(map, item); + entry->dib = 1; + memcpy(bucket_item(entry), item, map->elsize); + + size_t i = entry->hash & map->mask; + for (;;) { + struct bucket *bucket = bucket_at(map, i); + if (bucket->dib == 0) { + memcpy(bucket, entry, map->bucketsz); + map->count++; + return NULL; + } + if (entry->hash == bucket->hash && + map->compare(bucket_item(entry), bucket_item(bucket), + map->udata) == 0) + { + memcpy(map->spare, bucket_item(bucket), map->elsize); + memcpy(bucket_item(bucket), bucket_item(entry), map->elsize); + return map->spare; + } + if (bucket->dib < entry->dib) { + memcpy(map->spare, bucket, map->bucketsz); + memcpy(bucket, entry, map->bucketsz); + memcpy(entry, map->spare, map->bucketsz); + } + i = (i + 1) & map->mask; + entry->dib += 1; + } +} + +// hashmap_get returns the item based on the provided key. If the item is not +// found then NULL is returned. +void *hashmap_get(struct hashmap *map, const void *key) { + if (!key) { + panic("key is null"); + } + uint64_t hash = get_hash(map, key); + size_t i = hash & map->mask; + for (;;) { + struct bucket *bucket = bucket_at(map, i); + if (!bucket->dib) { + return NULL; + } + if (bucket->hash == hash && + map->compare(key, bucket_item(bucket), map->udata) == 0) + { + return bucket_item(bucket); + } + i = (i + 1) & map->mask; + } +} + +// hashmap_probe returns the item in the bucket at position or NULL if an item +// is not set for that bucket. The position is 'moduloed' by the number of +// buckets in the hashmap. +void *hashmap_probe(struct hashmap *map, uint64_t position) { + size_t i = position & map->mask; + struct bucket *bucket = bucket_at(map, i); + if (!bucket->dib) { + return NULL; + } + return bucket_item(bucket); +} + + +// hashmap_delete removes an item from the hash map and returns it. If the +// item is not found then NULL is returned. +void *hashmap_delete(struct hashmap *map, void *key) { + if (!key) { + panic("key is null"); + } + map->oom = false; + uint64_t hash = get_hash(map, key); + size_t i = hash & map->mask; + for (;;) { + struct bucket *bucket = bucket_at(map, i); + if (!bucket->dib) { + return NULL; + } + if (bucket->hash == hash && + map->compare(key, bucket_item(bucket), map->udata) == 0) + { + memcpy(map->spare, bucket_item(bucket), map->elsize); + bucket->dib = 0; + for (;;) { + struct bucket *prev = bucket; + i = (i + 1) & map->mask; + bucket = bucket_at(map, i); + if (bucket->dib <= 1) { + prev->dib = 0; + break; + } + memcpy(prev, bucket, map->bucketsz); + prev->dib--; + } + map->count--; + if (map->nbuckets > map->cap && map->count <= map->shrinkat) { + // Ignore the return value. It's ok for the resize operation to + // fail to allocate enough memory because a shrink operation + // does not change the integrity of the data. + resize(map, map->nbuckets/2); + } + return map->spare; + } + i = (i + 1) & map->mask; + } +} + +// hashmap_count returns the number of items in the hash map. +size_t hashmap_count(struct hashmap *map) { + return map->count; +} + +// hashmap_free frees the hash map +// Every item is called with the element-freeing function given in hashmap_new, +// if present, to free any data referenced in the elements of the hashmap. +void hashmap_free(struct hashmap *map) { + if (!map) return; + free_elements(map); + map->free(map->buckets); + map->free(map); +} + +// hashmap_oom returns true if the last hashmap_set() call failed due to the +// system being out of memory. +bool hashmap_oom(struct hashmap *map) { + return map->oom; +} + +// hashmap_scan iterates over all items in the hash map +// Param `iter` can return false to stop iteration early. +// Returns false if the iteration has been stopped early. +bool hashmap_scan(struct hashmap *map, + bool (*iter)(const void *item, void *udata), void *udata) +{ + for (size_t i = 0; i < map->nbuckets; i++) { + struct bucket *bucket = bucket_at(map, i); + if (bucket->dib) { + if (!iter(bucket_item(bucket), udata)) { + return false; + } + } + } + return true; +} + +//----------------------------------------------------------------------------- +// SipHash reference C implementation +// +// Copyright (c) 2012-2016 Jean-Philippe Aumasson +// +// Copyright (c) 2012-2014 Daniel J. Bernstein +// +// To the extent possible under law, the author(s) have dedicated all copyright +// and related and neighboring rights to this software to the public domain +// worldwide. This software is distributed without any warranty. +// +// You should have received a copy of the CC0 Public Domain Dedication along +// with this software. If not, see +// . +// +// default: SipHash-2-4 +//----------------------------------------------------------------------------- +static uint64_t SIP64(const uint8_t *in, const size_t inlen, + uint64_t seed0, uint64_t seed1) +{ +#define U8TO64_LE(p) \ + { (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \ + ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \ + ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \ + ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) } +#define U64TO8_LE(p, v) \ + { U32TO8_LE((p), (uint32_t)((v))); \ + U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); } +#define U32TO8_LE(p, v) \ + { (p)[0] = (uint8_t)((v)); \ + (p)[1] = (uint8_t)((v) >> 8); \ + (p)[2] = (uint8_t)((v) >> 16); \ + (p)[3] = (uint8_t)((v) >> 24); } +#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) +#define SIPROUND \ + { v0 += v1; v1 = ROTL(v1, 13); \ + v1 ^= v0; v0 = ROTL(v0, 32); \ + v2 += v3; v3 = ROTL(v3, 16); \ + v3 ^= v2; \ + v0 += v3; v3 = ROTL(v3, 21); \ + v3 ^= v0; \ + v2 += v1; v1 = ROTL(v1, 17); \ + v1 ^= v2; v2 = ROTL(v2, 32); } + uint64_t k0 = U8TO64_LE((uint8_t*)&seed0); + uint64_t k1 = U8TO64_LE((uint8_t*)&seed1); + uint64_t v3 = UINT64_C(0x7465646279746573) ^ k1; + uint64_t v2 = UINT64_C(0x6c7967656e657261) ^ k0; + uint64_t v1 = UINT64_C(0x646f72616e646f6d) ^ k1; + uint64_t v0 = UINT64_C(0x736f6d6570736575) ^ k0; + const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t)); + for (; in != end; in += 8) { + uint64_t m = U8TO64_LE(in); + v3 ^= m; + SIPROUND; SIPROUND; + v0 ^= m; + } + const int left = inlen & 7; + uint64_t b = ((uint64_t)inlen) << 56; + switch (left) { + case 7: b |= ((uint64_t)in[6]) << 48; + case 6: b |= ((uint64_t)in[5]) << 40; + case 5: b |= ((uint64_t)in[4]) << 32; + case 4: b |= ((uint64_t)in[3]) << 24; + case 3: b |= ((uint64_t)in[2]) << 16; + case 2: b |= ((uint64_t)in[1]) << 8; + case 1: b |= ((uint64_t)in[0]); break; + case 0: break; + } + v3 ^= b; + SIPROUND; SIPROUND; + v0 ^= b; + v2 ^= 0xff; + SIPROUND; SIPROUND; SIPROUND; SIPROUND; + b = v0 ^ v1 ^ v2 ^ v3; + uint64_t out = 0; + U64TO8_LE((uint8_t*)&out, b); + return out; +} + +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. +// +// Murmur3_86_128 +//----------------------------------------------------------------------------- +static void MM86128(const void *key, const int len, uint32_t seed, void *out) { +#define ROTL32(x, r) ((x << r) | (x >> (32 - r))) +#define FMIX32(h) h^=h>>16; h*=0x85ebca6b; h^=h>>13; h*=0xc2b2ae35; h^=h>>16; + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 16; + uint32_t h1 = seed; + uint32_t h2 = seed; + uint32_t h3 = seed; + uint32_t h4 = seed; + uint32_t c1 = 0x239b961b; + uint32_t c2 = 0xab0e9789; + uint32_t c3 = 0x38b34ae5; + uint32_t c4 = 0xa1e38b93; + const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); + for (int i = -nblocks; i; i++) { + uint32_t k1 = blocks[i*4+0]; + uint32_t k2 = blocks[i*4+1]; + uint32_t k3 = blocks[i*4+2]; + uint32_t k4 = blocks[i*4+3]; + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; + k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; + h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; + k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; + h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; + k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; + h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; + } + const uint8_t * tail = (const uint8_t*)(data + nblocks*16); + uint32_t k1 = 0; + uint32_t k2 = 0; + uint32_t k3 = 0; + uint32_t k4 = 0; + switch(len & 15) { + case 15: k4 ^= tail[14] << 16; + case 14: k4 ^= tail[13] << 8; + case 13: k4 ^= tail[12] << 0; + k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; + case 12: k3 ^= tail[11] << 24; + case 11: k3 ^= tail[10] << 16; + case 10: k3 ^= tail[ 9] << 8; + case 9: k3 ^= tail[ 8] << 0; + k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; + case 8: k2 ^= tail[ 7] << 24; + case 7: k2 ^= tail[ 6] << 16; + case 6: k2 ^= tail[ 5] << 8; + case 5: k2 ^= tail[ 4] << 0; + k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; + case 4: k1 ^= tail[ 3] << 24; + case 3: k1 ^= tail[ 2] << 16; + case 2: k1 ^= tail[ 1] << 8; + case 1: k1 ^= tail[ 0] << 0; + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + }; + h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + FMIX32(h1); FMIX32(h2); FMIX32(h3); FMIX32(h4); + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + ((uint32_t*)out)[0] = h1; + ((uint32_t*)out)[1] = h2; + ((uint32_t*)out)[2] = h3; + ((uint32_t*)out)[3] = h4; +} + +// hashmap_sip returns a hash value for `data` using SipHash-2-4. +uint64_t hashmap_sip(const void *data, size_t len, + uint64_t seed0, uint64_t seed1) +{ + return SIP64((uint8_t*)data, len, seed0, seed1); +} + +// hashmap_murmur returns a hash value for `data` using Murmur3_86_128. +uint64_t hashmap_murmur(const void *data, size_t len, + uint64_t seed0, uint64_t seed1) +{ + char out[16]; + MM86128(data, len, seed0, &out); + return *(uint64_t*)out; +} + +//============================================================================== +// TESTS AND BENCHMARKS +// $ cc -DHASHMAP_TEST hashmap.c && ./a.out # run tests +// $ cc -DHASHMAP_TEST -O3 hashmap.c && BENCH=1 ./a.out # run benchmarks +//============================================================================== +#ifdef HASHMAP_TEST + +static size_t deepcount(struct hashmap *map) { + size_t count = 0; + for (size_t i = 0; i < map->nbuckets; i++) { + if (bucket_at(map, i)->dib) { + count++; + } + } + return count; +} + + +#pragma GCC diagnostic ignored "-Wextra" + + +#include +#include +#include +#include +#include +#include "hashmap.h" + +static bool rand_alloc_fail = false; +static int rand_alloc_fail_odds = 3; // 1 in 3 chance malloc will fail. +static uintptr_t total_allocs = 0; +static uintptr_t total_mem = 0; + +static void *xmalloc(size_t size) { + if (rand_alloc_fail && rand()%rand_alloc_fail_odds == 0) { + return NULL; + } + void *mem = malloc(sizeof(uintptr_t)+size); + assert(mem); + *(uintptr_t*)mem = size; + total_allocs++; + total_mem += size; + return (char*)mem+sizeof(uintptr_t); +} + +static void xfree(void *ptr) { + if (ptr) { + total_mem -= *(uintptr_t*)((char*)ptr-sizeof(uintptr_t)); + free((char*)ptr-sizeof(uintptr_t)); + total_allocs--; + } +} + +static void shuffle(void *array, size_t numels, size_t elsize) { + char tmp[elsize]; + char *arr = array; + for (size_t i = 0; i < numels - 1; i++) { + int j = i + rand() / (RAND_MAX / (numels - i) + 1); + memcpy(tmp, arr + j * elsize, elsize); + memcpy(arr + j * elsize, arr + i * elsize, elsize); + memcpy(arr + i * elsize, tmp, elsize); + } +} + +static bool iter_ints(const void *item, void *udata) { + int *vals = *(int**)udata; + vals[*(int*)item] = 1; + return true; +} + +static int compare_ints(const void *a, const void *b) { + return *(int*)a - *(int*)b; +} + +static int compare_ints_udata(const void *a, const void *b, void *udata) { + return *(int*)a - *(int*)b; +} + +static int compare_strs(const void *a, const void *b, void *udata) { + return strcmp(*(char**)a, *(char**)b); +} + +static uint64_t hash_int(const void *item, uint64_t seed0, uint64_t seed1) { + return hashmap_murmur(item, sizeof(int), seed0, seed1); +} + +static uint64_t hash_str(const void *item, uint64_t seed0, uint64_t seed1) { + return hashmap_murmur(*(char**)item, strlen(*(char**)item), seed0, seed1); +} + +static void free_str(void *item) { + xfree(*(char**)item); +} + +static void all() { + int seed = getenv("SEED")?atoi(getenv("SEED")):time(NULL); + int N = getenv("N")?atoi(getenv("N")):2000; + printf("seed=%d, count=%d, item_size=%zu\n", seed, N, sizeof(int)); + srand(seed); + + rand_alloc_fail = true; + + // test sip and murmur hashes + assert(hashmap_sip("hello", 5, 1, 2) == 2957200328589801622); + assert(hashmap_murmur("hello", 5, 1, 2) == 1682575153221130884); + + int *vals; + while (!(vals = xmalloc(N * sizeof(int)))) {} + for (int i = 0; i < N; i++) { + vals[i] = i; + } + + struct hashmap *map; + + while (!(map = hashmap_new(sizeof(int), 0, seed, seed, + hash_int, compare_ints_udata, NULL, NULL))) {} + shuffle(vals, N, sizeof(int)); + for (int i = 0; i < N; i++) { + // // printf("== %d ==\n", vals[i]); + assert(map->count == i); + assert(map->count == hashmap_count(map)); + assert(map->count == deepcount(map)); + int *v; + assert(!hashmap_get(map, &vals[i])); + assert(!hashmap_delete(map, &vals[i])); + while (true) { + assert(!hashmap_set(map, &vals[i])); + if (!hashmap_oom(map)) { + break; + } + } + + for (int j = 0; j < i; j++) { + v = hashmap_get(map, &vals[j]); + assert(v && *v == vals[j]); + } + while (true) { + v = hashmap_set(map, &vals[i]); + if (!v) { + assert(hashmap_oom(map)); + continue; + } else { + assert(!hashmap_oom(map)); + assert(v && *v == vals[i]); + break; + } + } + v = hashmap_get(map, &vals[i]); + assert(v && *v == vals[i]); + v = hashmap_delete(map, &vals[i]); + assert(v && *v == vals[i]); + assert(!hashmap_get(map, &vals[i])); + assert(!hashmap_delete(map, &vals[i])); + assert(!hashmap_set(map, &vals[i])); + assert(map->count == i+1); + assert(map->count == hashmap_count(map)); + assert(map->count == deepcount(map)); + } + + int *vals2; + while (!(vals2 = xmalloc(N * sizeof(int)))) {} + memset(vals2, 0, N * sizeof(int)); + assert(hashmap_scan(map, iter_ints, &vals2)); + for (int i = 0; i < N; i++) { + assert(vals2[i] == 1); + } + xfree(vals2); + + shuffle(vals, N, sizeof(int)); + for (int i = 0; i < N; i++) { + int *v; + v = hashmap_delete(map, &vals[i]); + assert(v && *v == vals[i]); + assert(!hashmap_get(map, &vals[i])); + assert(map->count == N-i-1); + assert(map->count == hashmap_count(map)); + assert(map->count == deepcount(map)); + for (int j = N-1; j > i; j--) { + v = hashmap_get(map, &vals[j]); + assert(v && *v == vals[j]); + } + } + + for (int i = 0; i < N; i++) { + while (true) { + assert(!hashmap_set(map, &vals[i])); + if (!hashmap_oom(map)) { + break; + } + } + } + + assert(map->count != 0); + size_t prev_cap = map->cap; + hashmap_clear(map, true); + assert(prev_cap < map->cap); + assert(map->count == 0); + + + for (int i = 0; i < N; i++) { + while (true) { + assert(!hashmap_set(map, &vals[i])); + if (!hashmap_oom(map)) { + break; + } + } + } + + prev_cap = map->cap; + hashmap_clear(map, false); + assert(prev_cap == map->cap); + + hashmap_free(map); + + xfree(vals); + + + while (!(map = hashmap_new(sizeof(char*), 0, seed, seed, + hash_str, compare_strs, free_str, NULL))); + + for (int i = 0; i < N; i++) { + char *str; + while (!(str = xmalloc(16))); + sprintf(str, "s%i", i); + while(!hashmap_set(map, &str)); + } + + hashmap_clear(map, false); + assert(hashmap_count(map) == 0); + + for (int i = 0; i < N; i++) { + char *str; + while (!(str = xmalloc(16))); + sprintf(str, "s%i", i); + while(!hashmap_set(map, &str)); + } + + hashmap_free(map); + + if (total_allocs != 0) { + fprintf(stderr, "total_allocs: expected 0, got %lu\n", total_allocs); + exit(1); + } +} + +#define bench(name, N, code) {{ \ + if (strlen(name) > 0) { \ + printf("%-14s ", name); \ + } \ + size_t tmem = total_mem; \ + size_t tallocs = total_allocs; \ + uint64_t bytes = 0; \ + clock_t begin = clock(); \ + for (int i = 0; i < N; i++) { \ + (code); \ + } \ + clock_t end = clock(); \ + double elapsed_secs = (double)(end - begin) / CLOCKS_PER_SEC; \ + double bytes_sec = (double)bytes/elapsed_secs; \ + printf("%d ops in %.3f secs, %.0f ns/op, %.0f op/sec", \ + N, elapsed_secs, \ + elapsed_secs/(double)N*1e9, \ + (double)N/elapsed_secs \ + ); \ + if (bytes > 0) { \ + printf(", %.1f GB/sec", bytes_sec/1024/1024/1024); \ + } \ + if (total_mem > tmem) { \ + size_t used_mem = total_mem-tmem; \ + printf(", %.2f bytes/op", (double)used_mem/N); \ + } \ + if (total_allocs > tallocs) { \ + size_t used_allocs = total_allocs-tallocs; \ + printf(", %.2f allocs/op", (double)used_allocs/N); \ + } \ + printf("\n"); \ +}} + +static void benchmarks() { + int seed = getenv("SEED")?atoi(getenv("SEED")):time(NULL); + int N = getenv("N")?atoi(getenv("N")):5000000; + printf("seed=%d, count=%d, item_size=%zu\n", seed, N, sizeof(int)); + srand(seed); + + + int *vals = xmalloc(N * sizeof(int)); + for (int i = 0; i < N; i++) { + vals[i] = i; + } + + shuffle(vals, N, sizeof(int)); + + struct hashmap *map; + shuffle(vals, N, sizeof(int)); + + map = hashmap_new(sizeof(int), 0, seed, seed, hash_int, compare_ints_udata, + NULL, NULL); + bench("set", N, { + int *v = hashmap_set(map, &vals[i]); + assert(!v); + }) + shuffle(vals, N, sizeof(int)); + bench("get", N, { + int *v = hashmap_get(map, &vals[i]); + assert(v && *v == vals[i]); + }) + shuffle(vals, N, sizeof(int)); + bench("delete", N, { + int *v = hashmap_delete(map, &vals[i]); + assert(v && *v == vals[i]); + }) + hashmap_free(map); + + map = hashmap_new(sizeof(int), N, seed, seed, hash_int, compare_ints_udata, + NULL, NULL); + bench("set (cap)", N, { + int *v = hashmap_set(map, &vals[i]); + assert(!v); + }) + shuffle(vals, N, sizeof(int)); + bench("get (cap)", N, { + int *v = hashmap_get(map, &vals[i]); + assert(v && *v == vals[i]); + }) + shuffle(vals, N, sizeof(int)); + bench("delete (cap)" , N, { + int *v = hashmap_delete(map, &vals[i]); + assert(v && *v == vals[i]); + }) + + hashmap_free(map); + + + xfree(vals); + + if (total_allocs != 0) { + fprintf(stderr, "total_allocs: expected 0, got %lu\n", total_allocs); + exit(1); + } +} + +int main() { + hashmap_set_allocator(xmalloc, xfree); + + if (getenv("BENCH")) { + printf("Running hashmap.c benchmarks...\n"); + benchmarks(); + } else { + printf("Running hashmap.c tests...\n"); + all(); + printf("PASSED\n"); + } +} + + +#endif + + + diff --git a/fastbril/src/libs/hashmap.h b/fastbril/src/libs/hashmap.h new file mode 100644 index 000000000..d5cb64ade --- /dev/null +++ b/fastbril/src/libs/hashmap.h @@ -0,0 +1,54 @@ +// Copyright 2020 Joshua J Baker. All rights reserved. +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file. + +#ifndef HASHMAP_H +#define HASHMAP_H + +#include +#include +#include + +struct hashmap; + +struct hashmap *hashmap_new(size_t elsize, size_t cap, + uint64_t seed0, uint64_t seed1, + uint64_t (*hash)(const void *item, + uint64_t seed0, uint64_t seed1), + int (*compare)(const void *a, const void *b, + void *udata), + void (*elfree)(void *item), + void *udata); +struct hashmap *hashmap_new_with_allocator( + void *(*malloc)(size_t), + void *(*realloc)(void *, size_t), + void (*free)(void*), + size_t elsize, size_t cap, + uint64_t seed0, uint64_t seed1, + uint64_t (*hash)(const void *item, + uint64_t seed0, uint64_t seed1), + int (*compare)(const void *a, const void *b, + void *udata), + void (*elfree)(void *item), + void *udata); +void hashmap_free(struct hashmap *map); +void hashmap_clear(struct hashmap *map, bool update_cap); +size_t hashmap_count(struct hashmap *map); +bool hashmap_oom(struct hashmap *map); +void *hashmap_get(struct hashmap *map, const void *item); +void *hashmap_set(struct hashmap *map, void *item); +void *hashmap_delete(struct hashmap *map, void *item); +void *hashmap_probe(struct hashmap *map, uint64_t position); +bool hashmap_scan(struct hashmap *map, + bool (*iter)(const void *item, void *udata), void *udata); + +uint64_t hashmap_sip(const void *data, size_t len, + uint64_t seed0, uint64_t seed1); +uint64_t hashmap_murmur(const void *data, size_t len, + uint64_t seed0, uint64_t seed1); + + +// DEPRECATED: use `hashmap_new_with_allocator` +void hashmap_set_allocator(void *(*malloc)(size_t), void (*free)(void*)); + +#endif diff --git a/fastbril/src/libs/json.h b/fastbril/src/libs/json.h new file mode 100644 index 000000000..a62b842f6 --- /dev/null +++ b/fastbril/src/libs/json.h @@ -0,0 +1,3403 @@ +/* + The latest version of this library is available on GitHub; + https://github.com/sheredom/json.h. +*/ + +/* + This is free and unencumbered software released into the public domain. + + Anyone is free to copy, modify, publish, use, compile, sell, or + distribute this software, either in source code form or as a compiled + binary, for any purpose, commercial or non-commercial, and by any + means. + + In jurisdictions that recognize copyright laws, the author or authors + of this software dedicate any and all copyright interest in the + software to the public domain. We make this dedication for the benefit + of the public at large and to the detriment of our heirs and + successors. We intend this dedication to be an overt act of + relinquishment in perpetuity of all present and future rights to this + software under copyright law. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + For more information, please refer to . +*/ + +#ifndef SHEREDOM_JSON_H_INCLUDED +#define SHEREDOM_JSON_H_INCLUDED + +#if defined(_MSC_VER) +#pragma warning(push) + +/* disable warning: no function prototype given: converting '()' to '(void)' */ +#pragma warning(disable : 4255) + +/* disable warning: '__cplusplus' is not defined as a preprocessor macro, replacing with '0' for '#if/#elif' */ +#pragma warning(disable : 4668) + +/* disable warning: 'bytes padding added after construct' */ +#pragma warning(disable : 4820) +#endif + +#include +#include + +#if defined(_MSC_VER) +#define json_weak __inline +#elif defined(__clang__) || defined(__GNUC__) +#define json_weak __attribute__((weak)) +#else +#error Non clang, non gcc, non MSVC compiler found! +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +struct json_value_s; +struct json_parse_result_s; + +enum json_parse_flags_e { + json_parse_flags_default = 0, + + /* allow trailing commas in objects and arrays. For example, both [true,] and + {"a" : null,} would be allowed with this option on. */ + json_parse_flags_allow_trailing_comma = 0x1, + + /* allow unquoted keys for objects. For example, {a : null} would be allowed + with this option on. */ + json_parse_flags_allow_unquoted_keys = 0x2, + + /* allow a global unbracketed object. For example, a : null, b : true, c : {} + would be allowed with this option on. */ + json_parse_flags_allow_global_object = 0x4, + + /* allow objects to use '=' instead of ':' between key/value pairs. For + example, a = null, b : true would be allowed with this option on. */ + json_parse_flags_allow_equals_in_object = 0x8, + + /* allow that objects don't have to have comma separators between key/value + pairs. */ + json_parse_flags_allow_no_commas = 0x10, + + /* allow c-style comments (either variants) to be ignored in the input JSON + file. */ + json_parse_flags_allow_c_style_comments = 0x20, + + /* deprecated flag, unused. */ + json_parse_flags_deprecated = 0x40, + + /* record location information for each value. */ + json_parse_flags_allow_location_information = 0x80, + + /* allow strings to be 'single quoted'. */ + json_parse_flags_allow_single_quoted_strings = 0x100, + + /* allow numbers to be hexadecimal. */ + json_parse_flags_allow_hexadecimal_numbers = 0x200, + + /* allow numbers like +123 to be parsed. */ + json_parse_flags_allow_leading_plus_sign = 0x400, + + /* allow numbers like .0123 or 123. to be parsed. */ + json_parse_flags_allow_leading_or_trailing_decimal_point = 0x800, + + /* allow Infinity, -Infinity, NaN, -NaN. */ + json_parse_flags_allow_inf_and_nan = 0x1000, + + /* allow multi line string values. */ + json_parse_flags_allow_multi_line_strings = 0x2000, + + /* allow simplified JSON to be parsed. Simplified JSON is an enabling of a set + of other parsing options. */ + json_parse_flags_allow_simplified_json = + (json_parse_flags_allow_trailing_comma | + json_parse_flags_allow_unquoted_keys | + json_parse_flags_allow_global_object | + json_parse_flags_allow_equals_in_object | + json_parse_flags_allow_no_commas), + + /* allow JSON5 to be parsed. JSON5 is an enabling of a set of other parsing + options. */ + json_parse_flags_allow_json5 = + (json_parse_flags_allow_trailing_comma | + json_parse_flags_allow_unquoted_keys | + json_parse_flags_allow_c_style_comments | + json_parse_flags_allow_single_quoted_strings | + json_parse_flags_allow_hexadecimal_numbers | + json_parse_flags_allow_leading_plus_sign | + json_parse_flags_allow_leading_or_trailing_decimal_point | + json_parse_flags_allow_inf_and_nan | + json_parse_flags_allow_multi_line_strings) +}; + +/* Parse a JSON text file, returning a pointer to the root of the JSON + * structure. json_parse performs 1 call to malloc for the entire encoding. + * Returns 0 if an error occurred (malformed JSON input, or malloc failed). */ +json_weak struct json_value_s *json_parse(const void *src, size_t src_size); + +/* Parse a JSON text file, returning a pointer to the root of the JSON + * structure. json_parse performs 1 call to alloc_func_ptr for the entire + * encoding. Returns 0 if an error occurred (malformed JSON input, or malloc + * failed). If an error occurred, the result struct (if not NULL) will explain + * the type of error, and the location in the input it occurred. If + * alloc_func_ptr is null then malloc is used. */ +json_weak struct json_value_s * +json_parse_ex(const void *src, size_t src_size, size_t flags_bitset, + void *(*alloc_func_ptr)(void *, size_t), void *user_data, + struct json_parse_result_s *result); + +/* Extracts a value and all the data that makes it up into a newly created + * value. json_extract_value performs 1 call to malloc for the entire encoding. + */ +json_weak struct json_value_s * +json_extract_value(const struct json_value_s *value); + +/* Extracts a value and all the data that makes it up into a newly created + * value. json_extract_value performs 1 call to alloc_func_ptr for the entire + * encoding. If alloc_func_ptr is null then malloc is used. */ +json_weak struct json_value_s * +json_extract_value_ex(const struct json_value_s *value, + void *(*alloc_func_ptr)(void *, size_t), void *user_data); + +/* Write out a minified JSON utf-8 string. This string is an encoding of the + * minimal string characters required to still encode the same data. + * json_write_minified performs 1 call to malloc for the entire encoding. Return + * 0 if an error occurred (malformed JSON input, or malloc failed). The out_size + * parameter is optional as the utf-8 string is null terminated. */ +json_weak void *json_write_minified(const struct json_value_s *value, + size_t *out_size); + +/* Write out a pretty JSON utf-8 string. This string is encoded such that the + * resultant JSON is pretty in that it is easily human readable. The indent and + * newline parameters allow a user to specify what kind of indentation and + * newline they want (two spaces / three spaces / tabs? \r, \n, \r\n ?). Both + * indent and newline can be NULL, indent defaults to two spaces (" "), and + * newline defaults to linux newlines ('\n' as the newline character). + * json_write_pretty performs 1 call to malloc for the entire encoding. Return 0 + * if an error occurred (malformed JSON input, or malloc failed). The out_size + * parameter is optional as the utf-8 string is null terminated. */ +json_weak void *json_write_pretty(const struct json_value_s *value, + const char *indent, const char *newline, + size_t *out_size); + +/* Reinterpret a JSON value as a string. Returns null is the value was not a + * string. */ +json_weak struct json_string_s * +json_value_as_string(struct json_value_s *const value); + +/* Reinterpret a JSON value as a number. Returns null is the value was not a + * number. */ +json_weak struct json_number_s * +json_value_as_number(struct json_value_s *const value); + +/* Reinterpret a JSON value as an object. Returns null is the value was not an + * object. */ +json_weak struct json_object_s * +json_value_as_object(struct json_value_s *const value); + +/* Reinterpret a JSON value as an array. Returns null is the value was not an + * array. */ +json_weak struct json_array_s * +json_value_as_array(struct json_value_s *const value); + +/* Whether the value is true. */ +json_weak int json_value_is_true(const struct json_value_s *const value); + +/* Whether the value is false. */ +json_weak int json_value_is_false(const struct json_value_s *const value); + +/* Whether the value is null. */ +json_weak int json_value_is_null(const struct json_value_s *const value); + +/* The various types JSON values can be. Used to identify what a value is. */ +enum json_type_e { + json_type_string, + json_type_number, + json_type_object, + json_type_array, + json_type_true, + json_type_false, + json_type_null +}; + +/* A JSON string value. */ +struct json_string_s { + /* utf-8 string */ + const char *string; + /* The size (in bytes) of the string */ + size_t string_size; +}; + +/* A JSON string value (extended). */ +struct json_string_ex_s { + /* The JSON string this extends. */ + struct json_string_s string; + + /* The character offset for the value in the JSON input. */ + size_t offset; + + /* The line number for the value in the JSON input. */ + size_t line_no; + + /* The row number for the value in the JSON input, in bytes. */ + size_t row_no; +}; + +/* A JSON number value. */ +struct json_number_s { + /* ASCII string containing representation of the number. */ + const char *number; + /* the size (in bytes) of the number. */ + size_t number_size; +}; + +/* an element of a JSON object. */ +struct json_object_element_s { + /* the name of this element. */ + struct json_string_s *name; + /* the value of this element. */ + struct json_value_s *value; + /* the next object element (can be NULL if the last element in the object). */ + struct json_object_element_s *next; +}; + +/* a JSON object value. */ +struct json_object_s { + /* a linked list of the elements in the object. */ + struct json_object_element_s *start; + /* the number of elements in the object. */ + size_t length; +}; + +/* an element of a JSON array. */ +struct json_array_element_s { + /* the value of this element. */ + struct json_value_s *value; + /* the next array element (can be NULL if the last element in the array). */ + struct json_array_element_s *next; +}; + +/* a JSON array value. */ +struct json_array_s { + /* a linked list of the elements in the array. */ + struct json_array_element_s *start; + /* the number of elements in the array. */ + size_t length; +}; + +/* a JSON value. */ +struct json_value_s { + /* a pointer to either a json_string_s, json_number_s, json_object_s, or. */ + /* json_array_s. Should be cast to the appropriate struct type based on what. + */ + /* the type of this value is. */ + void *payload; + /* must be one of json_type_e. If type is json_type_true, json_type_false, or. + */ + /* json_type_null, payload will be NULL. */ + size_t type; +}; + +/* a JSON value (extended). */ +struct json_value_ex_s { + /* the JSON value this extends. */ + struct json_value_s value; + + /* the character offset for the value in the JSON input. */ + size_t offset; + + /* the line number for the value in the JSON input. */ + size_t line_no; + + /* the row number for the value in the JSON input, in bytes. */ + size_t row_no; +}; + +/* a parsing error code. */ +enum json_parse_error_e { + /* no error occurred (huzzah!). */ + json_parse_error_none = 0, + + /* expected either a comma or a closing '}' or ']' to close an object or. */ + /* array! */ + json_parse_error_expected_comma_or_closing_bracket, + + /* colon separating name/value pair was missing! */ + json_parse_error_expected_colon, + + /* expected string to begin with '"'! */ + json_parse_error_expected_opening_quote, + + /* invalid escaped sequence in string! */ + json_parse_error_invalid_string_escape_sequence, + + /* invalid number format! */ + json_parse_error_invalid_number_format, + + /* invalid value! */ + json_parse_error_invalid_value, + + /* reached end of buffer before object/array was complete! */ + json_parse_error_premature_end_of_buffer, + + /* string was malformed! */ + json_parse_error_invalid_string, + + /* a call to malloc, or a user provider allocator, failed. */ + json_parse_error_allocator_failed, + + /* the JSON input had unexpected trailing characters that weren't part of the. + */ + /* JSON value. */ + json_parse_error_unexpected_trailing_characters, + + /* catch-all error for everything else that exploded (real bad chi!). */ + json_parse_error_unknown +}; + +/* error report from json_parse_ex(). */ +struct json_parse_result_s { + /* the error code (one of json_parse_error_e). */ + size_t error; + + /* the character offset for the error in the JSON input. */ + size_t error_offset; + + /* the line number for the error in the JSON input. */ + size_t error_line_no; + + /* the row number for the error, in bytes. */ + size_t error_row_no; +}; + +#ifdef __cplusplus +} /* extern "C". */ +#endif + +#include + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +#if defined(_MSC_VER) && (_MSC_VER < 1920) +#define json_uintmax_t unsigned __int64 +#else +#include +#define json_uintmax_t uintmax_t +#endif + +#if defined(_MSC_VER) +#define json_strtoumax _strtoui64 +#else +#define json_strtoumax strtoumax +#endif + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define json_null nullptr +#else +#define json_null 0 +#endif + +#if defined(__clang__) +#pragma clang diagnostic push + +/* we do one big allocation via malloc, then cast aligned slices of this for. */ +/* our structures - we don't have a way to tell the compiler we know what we. */ +/* are doing, so disable the warning instead! */ +#pragma clang diagnostic ignored "-Wcast-align" + +/* We use C style casts everywhere. */ +#pragma clang diagnostic ignored "-Wold-style-cast" + +/* We need long long for strtoull. */ +#pragma clang diagnostic ignored "-Wc++11-long-long" + +/* Who cares if nullptr doesn't work with C++98, we don't use it there! */ +#pragma clang diagnostic ignored "-Wc++98-compat" +#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#elif defined(_MSC_VER) +#pragma warning(push) + +/* disable 'function selected for inline expansion' warning. */ +#pragma warning(disable : 4711) + +/* disable '#pragma warning: there is no warning number' warning. */ +#pragma warning(disable : 4619) + +/* disable 'warning number not a valid compiler warning' warning. */ +#pragma warning(disable : 4616) + +/* disable 'Compiler will insert Spectre mitigation for memory load if + * /Qspectre. */ +/* switch specified' warning. */ +#pragma warning(disable : 5045) +#endif + +struct json_parse_state_s { + const char *src; + size_t size; + size_t offset; + size_t flags_bitset; + char *data; + char *dom; + size_t dom_size; + size_t data_size; + size_t line_no; /* line counter for error reporting. */ + size_t line_offset; /* (offset-line_offset) is the character number (in + bytes). */ + size_t error; +}; + +json_weak int json_hexadecimal_digit(const char c); +int json_hexadecimal_digit(const char c) { + if ('0' <= c && c <= '9') { + return c - '0'; + } + if ('a' <= c && c <= 'f') { + return c - 'a' + 10; + } + if ('A' <= c && c <= 'F') { + return c - 'A' + 10; + } + return -1; +} + +json_weak int json_hexadecimal_value(const char *c, const unsigned long size, + unsigned long *result); +int json_hexadecimal_value(const char *c, const unsigned long size, + unsigned long *result) { + const char *p; + int digit; + + if (size > sizeof(unsigned long) * 2) { + return 0; + } + + *result = 0; + for (p = c; (unsigned long)(p - c) < size; ++p) { + *result <<= 4; + digit = json_hexadecimal_digit(*p); + if (digit < 0 || digit > 15) { + return 0; + } + *result |= (unsigned char)digit; + } + return 1; +} + +json_weak int json_skip_whitespace(struct json_parse_state_s *state); +int json_skip_whitespace(struct json_parse_state_s *state) { + size_t offset = state->offset; + const size_t size = state->size; + const char *const src = state->src; + + /* the only valid whitespace according to ECMA-404 is ' ', '\n', '\r' and + * '\t'. */ + switch (src[offset]) { + default: + return 0; + case ' ': + case '\r': + case '\t': + case '\n': + break; + } + + do { + switch (src[offset]) { + default: + /* Update offset. */ + state->offset = offset; + return 1; + case ' ': + case '\r': + case '\t': + break; + case '\n': + state->line_no++; + state->line_offset = offset; + break; + } + + offset++; + } while (offset < size); + + /* Update offset. */ + state->offset = offset; + return 1; +} + +json_weak int json_skip_c_style_comments(struct json_parse_state_s *state); +int json_skip_c_style_comments(struct json_parse_state_s *state) { + /* do we have a comment?. */ + if ('/' == state->src[state->offset]) { + /* skip '/'. */ + state->offset++; + + if ('/' == state->src[state->offset]) { + /* we had a comment of the form //. */ + + /* skip second '/'. */ + state->offset++; + + while (state->offset < state->size) { + switch (state->src[state->offset]) { + default: + /* skip the character in the comment. */ + state->offset++; + break; + case '\n': + /* if we have a newline, our comment has ended! Skip the newline. */ + state->offset++; + + /* we entered a newline, so move our line info forward. */ + state->line_no++; + state->line_offset = state->offset; + return 1; + } + } + + /* we reached the end of the JSON file! */ + return 1; + } else if ('*' == state->src[state->offset]) { + /* we had a comment in the C-style long form. */ + + /* skip '*'. */ + state->offset++; + + while (state->offset + 1 < state->size) { + if (('*' == state->src[state->offset]) && + ('/' == state->src[state->offset + 1])) { + /* we reached the end of our comment! */ + state->offset += 2; + return 1; + } else if ('\n' == state->src[state->offset]) { + /* we entered a newline, so move our line info forward. */ + state->line_no++; + state->line_offset = state->offset; + } + + /* skip character within comment. */ + state->offset++; + } + + /* Comment wasn't ended correctly which is a failure. */ + return 1; + } + } + + /* we didn't have any comment, which is ok too! */ + return 0; +} + +json_weak int json_skip_all_skippables(struct json_parse_state_s *state); +int json_skip_all_skippables(struct json_parse_state_s *state) { + /* skip all whitespace and other skippables until there are none left. note + * that the previous version suffered from read past errors should. the + * stream end on json_skip_c_style_comments eg. '{"a" ' with comments flag. + */ + + int did_consume = 0; + const size_t size = state->size; + + if (json_parse_flags_allow_c_style_comments & state->flags_bitset) { + do { + if (state->offset == size) { + state->error = json_parse_error_premature_end_of_buffer; + return 1; + } + + did_consume = json_skip_whitespace(state); + + /* This should really be checked on access, not in front of every call. + */ + if (state->offset == size) { + state->error = json_parse_error_premature_end_of_buffer; + return 1; + } + + did_consume |= json_skip_c_style_comments(state); + } while (0 != did_consume); + } else { + do { + if (state->offset == size) { + state->error = json_parse_error_premature_end_of_buffer; + return 1; + } + + did_consume = json_skip_whitespace(state); + } while (0 != did_consume); + } + + if (state->offset == size) { + state->error = json_parse_error_premature_end_of_buffer; + return 1; + } + + return 0; +} + +json_weak int json_get_value_size(struct json_parse_state_s *state, + int is_global_object); + +json_weak int json_get_string_size(struct json_parse_state_s *state, + size_t is_key); +int json_get_string_size(struct json_parse_state_s *state, size_t is_key) { + size_t offset = state->offset; + const size_t size = state->size; + size_t data_size = 0; + const char *const src = state->src; + const int is_single_quote = '\'' == src[offset]; + const char quote_to_use = is_single_quote ? '\'' : '"'; + const size_t flags_bitset = state->flags_bitset; + unsigned long codepoint; + unsigned long high_surrogate = 0; + + if ((json_parse_flags_allow_location_information & flags_bitset) != 0 && + is_key != 0) { + state->dom_size += sizeof(struct json_string_ex_s); + } else { + state->dom_size += sizeof(struct json_string_s); + } + + if ('"' != src[offset]) { + /* if we are allowed single quoted strings check for that too. */ + if (!((json_parse_flags_allow_single_quoted_strings & flags_bitset) && + is_single_quote)) { + state->error = json_parse_error_expected_opening_quote; + state->offset = offset; + return 1; + } + } + + /* skip leading '"' or '\''. */ + offset++; + + while ((offset < size) && (quote_to_use != src[offset])) { + /* add space for the character. */ + data_size++; + + switch (src[offset]) { + default: + break; + case '\0': + case '\t': + state->error = json_parse_error_invalid_string; + state->offset = offset; + return 1; + } + + if ('\\' == src[offset]) { + /* skip reverse solidus character. */ + offset++; + + if (offset == size) { + state->error = json_parse_error_premature_end_of_buffer; + state->offset = offset; + return 1; + } + + switch (src[offset]) { + default: + state->error = json_parse_error_invalid_string_escape_sequence; + state->offset = offset; + return 1; + case '"': + case '\\': + case '/': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + /* all valid characters! */ + offset++; + break; + case 'u': + if (!(offset + 5 < size)) { + /* invalid escaped unicode sequence! */ + state->error = json_parse_error_invalid_string_escape_sequence; + state->offset = offset; + return 1; + } + + codepoint = 0; + if (!json_hexadecimal_value(&src[offset + 1], 4, &codepoint)) { + /* escaped unicode sequences must contain 4 hexadecimal digits! */ + state->error = json_parse_error_invalid_string_escape_sequence; + state->offset = offset; + return 1; + } + + /* Valid sequence! + * see: https://en.wikipedia.org/wiki/UTF-8#Invalid_code_points. + * 1 7 U + 0000 U + 007F 0xxxxxxx. + * 2 11 U + 0080 U + 07FF 110xxxxx + * 10xxxxxx. + * 3 16 U + 0800 U + FFFF 1110xxxx + * 10xxxxxx 10xxxxxx. + * 4 21 U + 10000 U + 10FFFF 11110xxx + * 10xxxxxx 10xxxxxx 10xxxxxx. + * Note: the high and low surrogate halves used by UTF-16 (U+D800 + * through U+DFFF) and code points not encodable by UTF-16 (those after + * U+10FFFF) are not legal Unicode values, and their UTF-8 encoding must + * be treated as an invalid byte sequence. */ + + if (high_surrogate != 0) { + /* we previously read the high half of the \uxxxx\uxxxx pair, so now + * we expect the low half. */ + if (codepoint >= 0xdc00 && + codepoint <= 0xdfff) { /* low surrogate range. */ + data_size += 3; + high_surrogate = 0; + } else { + state->error = json_parse_error_invalid_string_escape_sequence; + state->offset = offset; + return 1; + } + } else if (codepoint <= 0x7f) { + data_size += 0; + } else if (codepoint <= 0x7ff) { + data_size += 1; + } else if (codepoint >= 0xd800 && + codepoint <= 0xdbff) { /* high surrogate range. */ + /* The codepoint is the first half of a "utf-16 surrogate pair". so we + * need the other half for it to be valid: \uHHHH\uLLLL. */ + if (offset + 11 > size || '\\' != src[offset + 5] || + 'u' != src[offset + 6]) { + state->error = json_parse_error_invalid_string_escape_sequence; + state->offset = offset; + return 1; + } + high_surrogate = codepoint; + } else if (codepoint >= 0xd800 && + codepoint <= 0xdfff) { /* low surrogate range. */ + /* we did not read the other half before. */ + state->error = json_parse_error_invalid_string_escape_sequence; + state->offset = offset; + return 1; + } else { + data_size += 2; + } + /* escaped codepoints after 0xffff are supported in json through utf-16 + * surrogate pairs: \uD83D\uDD25 for U+1F525. */ + + offset += 5; + break; + } + } else if (('\r' == src[offset]) || ('\n' == src[offset])) { + if (!(json_parse_flags_allow_multi_line_strings & flags_bitset)) { + /* invalid escaped unicode sequence! */ + state->error = json_parse_error_invalid_string_escape_sequence; + state->offset = offset; + return 1; + } + + offset++; + } else { + /* skip character (valid part of sequence). */ + offset++; + } + } + + /* If the offset is equal to the size, we had a non-terminated string! */ + if (offset == size) { + state->error = json_parse_error_premature_end_of_buffer; + state->offset = offset - 1; + return 1; + } + + /* skip trailing '"' or '\''. */ + offset++; + + /* add enough space to store the string. */ + state->data_size += data_size; + + /* one more byte for null terminator ending the string! */ + state->data_size++; + + /* update offset. */ + state->offset = offset; + + return 0; +} + +json_weak int is_valid_unquoted_key_char(const char c); +int is_valid_unquoted_key_char(const char c) { + return (('0' <= c && c <= '9') || ('a' <= c && c <= 'z') || + ('A' <= c && c <= 'Z') || ('_' == c)); +} + +json_weak int json_get_key_size(struct json_parse_state_s *state); +int json_get_key_size(struct json_parse_state_s *state) { + const size_t flags_bitset = state->flags_bitset; + + if (json_parse_flags_allow_unquoted_keys & flags_bitset) { + size_t offset = state->offset; + const size_t size = state->size; + const char *const src = state->src; + size_t data_size = state->data_size; + + /* if we are allowing unquoted keys, first grok for a quote... */ + if ('"' == src[offset]) { + /* ... if we got a comma, just parse the key as a string as normal. */ + return json_get_string_size(state, 1); + } else if ((json_parse_flags_allow_single_quoted_strings & flags_bitset) && + ('\'' == src[offset])) { + /* ... if we got a comma, just parse the key as a string as normal. */ + return json_get_string_size(state, 1); + } else { + while ((offset < size) && is_valid_unquoted_key_char(src[offset])) { + offset++; + data_size++; + } + + /* one more byte for null terminator ending the string! */ + data_size++; + + if (json_parse_flags_allow_location_information & flags_bitset) { + state->dom_size += sizeof(struct json_string_ex_s); + } else { + state->dom_size += sizeof(struct json_string_s); + } + + /* update offset. */ + state->offset = offset; + + /* update data_size. */ + state->data_size = data_size; + + return 0; + } + } else { + /* we are only allowed to have quoted keys, so just parse a string! */ + return json_get_string_size(state, 1); + } +} + +json_weak int json_get_object_size(struct json_parse_state_s *state, + int is_global_object); +int json_get_object_size(struct json_parse_state_s *state, + int is_global_object) { + const size_t flags_bitset = state->flags_bitset; + const char *const src = state->src; + const size_t size = state->size; + size_t elements = 0; + int allow_comma = 0; + int found_closing_brace = 0; + + if (is_global_object) { + /* if we found an opening '{' of an object, we actually have a normal JSON + * object at the root of the DOM... */ + if (!json_skip_all_skippables(state) && '{' == state->src[state->offset]) { + /* . and we don't actually have a global object after all! */ + is_global_object = 0; + } + } + + if (!is_global_object) { + if ('{' != src[state->offset]) { + state->error = json_parse_error_unknown; + return 1; + } + + /* skip leading '{'. */ + state->offset++; + } + + state->dom_size += sizeof(struct json_object_s); + + if ((state->offset == size) && !is_global_object) { + state->error = json_parse_error_premature_end_of_buffer; + return 1; + } + + do { + if (!is_global_object) { + if (json_skip_all_skippables(state)) { + state->error = json_parse_error_premature_end_of_buffer; + return 1; + } + + if ('}' == src[state->offset]) { + /* skip trailing '}'. */ + state->offset++; + + found_closing_brace = 1; + + /* finished the object! */ + break; + } + } else { + /* we don't require brackets, so that means the object ends when the input + * stream ends! */ + if (json_skip_all_skippables(state)) { + break; + } + } + + /* if we parsed at least once element previously, grok for a comma. */ + if (allow_comma) { + if (',' == src[state->offset]) { + /* skip comma. */ + state->offset++; + allow_comma = 0; + } else if (json_parse_flags_allow_no_commas & flags_bitset) { + /* we don't require a comma, and we didn't find one, which is ok! */ + allow_comma = 0; + } else { + /* otherwise we are required to have a comma, and we found none. */ + state->error = json_parse_error_expected_comma_or_closing_bracket; + return 1; + } + + if (json_parse_flags_allow_trailing_comma & flags_bitset) { + continue; + } else { + if (json_skip_all_skippables(state)) { + state->error = json_parse_error_premature_end_of_buffer; + return 1; + } + } + } + + if (json_get_key_size(state)) { + /* key parsing failed! */ + state->error = json_parse_error_invalid_string; + return 1; + } + + if (json_skip_all_skippables(state)) { + state->error = json_parse_error_premature_end_of_buffer; + return 1; + } + + if (json_parse_flags_allow_equals_in_object & flags_bitset) { + const char current = src[state->offset]; + if ((':' != current) && ('=' != current)) { + state->error = json_parse_error_expected_colon; + return 1; + } + } else { + if (':' != src[state->offset]) { + state->error = json_parse_error_expected_colon; + return 1; + } + } + + /* skip colon. */ + state->offset++; + + if (json_skip_all_skippables(state)) { + state->error = json_parse_error_premature_end_of_buffer; + return 1; + } + + if (json_get_value_size(state, /* is_global_object = */ 0)) { + /* value parsing failed! */ + return 1; + } + + /* successfully parsed a name/value pair! */ + elements++; + allow_comma = 1; + } while (state->offset < size); + + if ((state->offset == size) && !is_global_object && !found_closing_brace) { + state->error = json_parse_error_premature_end_of_buffer; + return 1; + } + + state->dom_size += sizeof(struct json_object_element_s) * elements; + + return 0; +} + +json_weak int json_get_array_size(struct json_parse_state_s *state); +int json_get_array_size(struct json_parse_state_s *state) { + const size_t flags_bitset = state->flags_bitset; + size_t elements = 0; + int allow_comma = 0; + const char *const src = state->src; + const size_t size = state->size; + + if ('[' != src[state->offset]) { + /* expected array to begin with leading '['. */ + state->error = json_parse_error_unknown; + return 1; + } + + /* skip leading '['. */ + state->offset++; + + state->dom_size += sizeof(struct json_array_s); + + while (state->offset < size) { + if (json_skip_all_skippables(state)) { + state->error = json_parse_error_premature_end_of_buffer; + return 1; + } + + if (']' == src[state->offset]) { + /* skip trailing ']'. */ + state->offset++; + + state->dom_size += sizeof(struct json_array_element_s) * elements; + + /* finished the object! */ + return 0; + } + + /* if we parsed at least once element previously, grok for a comma. */ + if (allow_comma) { + if (',' == src[state->offset]) { + /* skip comma. */ + state->offset++; + allow_comma = 0; + } else if (!(json_parse_flags_allow_no_commas & flags_bitset)) { + state->error = json_parse_error_expected_comma_or_closing_bracket; + return 1; + } + + if (json_parse_flags_allow_trailing_comma & flags_bitset) { + allow_comma = 0; + continue; + } else { + if (json_skip_all_skippables(state)) { + state->error = json_parse_error_premature_end_of_buffer; + return 1; + } + } + } + + if (json_get_value_size(state, /* is_global_object = */ 0)) { + /* value parsing failed! */ + return 1; + } + + /* successfully parsed an array element! */ + elements++; + allow_comma = 1; + } + + /* we consumed the entire input before finding the closing ']' of the array! + */ + state->error = json_parse_error_premature_end_of_buffer; + return 1; +} + +json_weak int json_get_number_size(struct json_parse_state_s *state); +int json_get_number_size(struct json_parse_state_s *state) { + const size_t flags_bitset = state->flags_bitset; + size_t offset = state->offset; + const size_t size = state->size; + int had_leading_digits = 0; + const char *const src = state->src; + + state->dom_size += sizeof(struct json_number_s); + + if ((json_parse_flags_allow_hexadecimal_numbers & flags_bitset) && + (offset + 1 < size) && ('0' == src[offset]) && + (('x' == src[offset + 1]) || ('X' == src[offset + 1]))) { + /* skip the leading 0x that identifies a hexadecimal number. */ + offset += 2; + + /* consume hexadecimal digits. */ + while ((offset < size) && (('0' <= src[offset] && src[offset] <= '9') || + ('a' <= src[offset] && src[offset] <= 'f') || + ('A' <= src[offset] && src[offset] <= 'F'))) { + offset++; + } + } else { + int found_sign = 0; + int inf_or_nan = 0; + + if ((offset < size) && + (('-' == src[offset]) || + ((json_parse_flags_allow_leading_plus_sign & flags_bitset) && + ('+' == src[offset])))) { + /* skip valid leading '-' or '+'. */ + offset++; + + found_sign = 1; + } + + if (json_parse_flags_allow_inf_and_nan & flags_bitset) { + const char inf[9] = "Infinity"; + const size_t inf_strlen = sizeof(inf) - 1; + const char nan[4] = "NaN"; + const size_t nan_strlen = sizeof(nan) - 1; + + if (offset + inf_strlen < size) { + int found = 1; + size_t i; + for (i = 0; i < inf_strlen; i++) { + if (inf[i] != src[offset + i]) { + found = 0; + break; + } + } + + if (found) { + /* We found our special 'Infinity' keyword! */ + offset += inf_strlen; + + inf_or_nan = 1; + } + } + + if (offset + nan_strlen < size) { + int found = 1; + size_t i; + for (i = 0; i < nan_strlen; i++) { + if (nan[i] != src[offset + i]) { + found = 0; + break; + } + } + + if (found) { + /* We found our special 'NaN' keyword! */ + offset += nan_strlen; + + inf_or_nan = 1; + } + } + } + + if (found_sign && !inf_or_nan && (offset < size) && + !('0' <= src[offset] && src[offset] <= '9')) { + /* check if we are allowing leading '.'. */ + if (!(json_parse_flags_allow_leading_or_trailing_decimal_point & + flags_bitset) || + ('.' != src[offset])) { + /* a leading '-' must be immediately followed by any digit! */ + state->error = json_parse_error_invalid_number_format; + state->offset = offset; + return 1; + } + } + + if ((offset < size) && ('0' == src[offset])) { + /* skip valid '0'. */ + offset++; + + /* we need to record whether we had any leading digits for checks later. + */ + had_leading_digits = 1; + + if ((offset < size) && ('0' <= src[offset] && src[offset] <= '9')) { + /* a leading '0' must not be immediately followed by any digit! */ + state->error = json_parse_error_invalid_number_format; + state->offset = offset; + return 1; + } + } + + /* the main digits of our number next. */ + while ((offset < size) && ('0' <= src[offset] && src[offset] <= '9')) { + offset++; + + /* we need to record whether we had any leading digits for checks later. + */ + had_leading_digits = 1; + } + + if ((offset < size) && ('.' == src[offset])) { + offset++; + + if (!('0' <= src[offset] && src[offset] <= '9')) { + if (!(json_parse_flags_allow_leading_or_trailing_decimal_point & + flags_bitset) || + !had_leading_digits) { + /* a decimal point must be followed by at least one digit. */ + state->error = json_parse_error_invalid_number_format; + state->offset = offset; + return 1; + } + } + + /* a decimal point can be followed by more digits of course! */ + while ((offset < size) && ('0' <= src[offset] && src[offset] <= '9')) { + offset++; + } + } + + if ((offset < size) && ('e' == src[offset] || 'E' == src[offset])) { + /* our number has an exponent! Skip 'e' or 'E'. */ + offset++; + + if ((offset < size) && ('-' == src[offset] || '+' == src[offset])) { + /* skip optional '-' or '+'. */ + offset++; + } + + if ((offset < size) && !('0' <= src[offset] && src[offset] <= '9')) { + /* an exponent must have at least one digit! */ + state->error = json_parse_error_invalid_number_format; + state->offset = offset; + return 1; + } + + /* consume exponent digits. */ + do { + offset++; + } while ((offset < size) && ('0' <= src[offset] && src[offset] <= '9')); + } + } + + if (offset < size) { + switch (src[offset]) { + case ' ': + case '\t': + case '\r': + case '\n': + case '}': + case ',': + case ']': + /* all of the above are ok. */ + break; + case '=': + if (json_parse_flags_allow_equals_in_object & flags_bitset) { + break; + } + + state->error = json_parse_error_invalid_number_format; + state->offset = offset; + return 1; + default: + state->error = json_parse_error_invalid_number_format; + state->offset = offset; + return 1; + } + } + + state->data_size += offset - state->offset; + + /* one more byte for null terminator ending the number string! */ + state->data_size++; + + /* update offset. */ + state->offset = offset; + + return 0; +} + +json_weak int json_get_value_size(struct json_parse_state_s *state, + int is_global_object); +int json_get_value_size(struct json_parse_state_s *state, + int is_global_object) { + const size_t flags_bitset = state->flags_bitset; + const char *const src = state->src; + size_t offset; + const size_t size = state->size; + + if (json_parse_flags_allow_location_information & flags_bitset) { + state->dom_size += sizeof(struct json_value_ex_s); + } else { + state->dom_size += sizeof(struct json_value_s); + } + + if (is_global_object) { + return json_get_object_size(state, /* is_global_object = */ 1); + } else { + if (json_skip_all_skippables(state)) { + state->error = json_parse_error_premature_end_of_buffer; + return 1; + } + + /* can cache offset now. */ + offset = state->offset; + + switch (src[offset]) { + case '"': + return json_get_string_size(state, 0); + case '\'': + if (json_parse_flags_allow_single_quoted_strings & flags_bitset) { + return json_get_string_size(state, 0); + } else { + /* invalid value! */ + state->error = json_parse_error_invalid_value; + return 1; + } + case '{': + return json_get_object_size(state, /* is_global_object = */ 0); + case '[': + return json_get_array_size(state); + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return json_get_number_size(state); + case '+': + if (json_parse_flags_allow_leading_plus_sign & flags_bitset) { + return json_get_number_size(state); + } else { + /* invalid value! */ + state->error = json_parse_error_invalid_number_format; + return 1; + } + case '.': + if (json_parse_flags_allow_leading_or_trailing_decimal_point & + flags_bitset) { + return json_get_number_size(state); + } else { + /* invalid value! */ + state->error = json_parse_error_invalid_number_format; + return 1; + } + default: + if ((offset + 4) <= size && 't' == src[offset + 0] && + 'r' == src[offset + 1] && 'u' == src[offset + 2] && + 'e' == src[offset + 3]) { + state->offset += 4; + return 0; + } else if ((offset + 5) <= size && 'f' == src[offset + 0] && + 'a' == src[offset + 1] && 'l' == src[offset + 2] && + 's' == src[offset + 3] && 'e' == src[offset + 4]) { + state->offset += 5; + return 0; + } else if ((offset + 4) <= size && 'n' == state->src[offset + 0] && + 'u' == state->src[offset + 1] && + 'l' == state->src[offset + 2] && + 'l' == state->src[offset + 3]) { + state->offset += 4; + return 0; + } else if ((json_parse_flags_allow_inf_and_nan & flags_bitset) && + (offset + 3) <= size && 'N' == src[offset + 0] && + 'a' == src[offset + 1] && 'N' == src[offset + 2]) { + return json_get_number_size(state); + } else if ((json_parse_flags_allow_inf_and_nan & flags_bitset) && + (offset + 8) <= size && 'I' == src[offset + 0] && + 'n' == src[offset + 1] && 'f' == src[offset + 2] && + 'i' == src[offset + 3] && 'n' == src[offset + 4] && + 'i' == src[offset + 5] && 't' == src[offset + 6] && + 'y' == src[offset + 7]) { + return json_get_number_size(state); + } + + /* invalid value! */ + state->error = json_parse_error_invalid_value; + return 1; + } + } +} + +json_weak void json_parse_value(struct json_parse_state_s *state, + int is_global_object, + struct json_value_s *value); + +json_weak void json_parse_string(struct json_parse_state_s *state, + struct json_string_s *string); +void json_parse_string(struct json_parse_state_s *state, + struct json_string_s *string) { + size_t offset = state->offset; + size_t bytes_written = 0; + const char *const src = state->src; + const char quote_to_use = '\'' == src[offset] ? '\'' : '"'; + char *data = state->data; + unsigned long high_surrogate = 0; + unsigned long codepoint; + + string->string = data; + + /* skip leading '"' or '\''. */ + offset++; + + while (quote_to_use != src[offset]) { + if ('\\' == src[offset]) { + /* skip the reverse solidus. */ + offset++; + + switch (src[offset++]) { + default: + return; /* we cannot ever reach here. */ + case 'u': { + codepoint = 0; + if (!json_hexadecimal_value(&src[offset], 4, &codepoint)) { + return; /* this shouldn't happen as the value was already validated. + */ + } + + offset += 4; + + if (codepoint <= 0x7fu) { + data[bytes_written++] = (char)codepoint; /* 0xxxxxxx. */ + } else if (codepoint <= 0x7ffu) { + data[bytes_written++] = + (char)(0xc0u | (codepoint >> 6)); /* 110xxxxx. */ + data[bytes_written++] = + (char)(0x80u | (codepoint & 0x3fu)); /* 10xxxxxx. */ + } else if (codepoint >= 0xd800 && + codepoint <= 0xdbff) { /* high surrogate. */ + high_surrogate = codepoint; + continue; /* we need the low half to form a complete codepoint. */ + } else if (codepoint >= 0xdc00 && + codepoint <= 0xdfff) { /* low surrogate. */ + /* combine with the previously read half to obtain the complete + * codepoint. */ + const unsigned long surrogate_offset = + 0x10000u - (0xD800u << 10) - 0xDC00u; + codepoint = (high_surrogate << 10) + codepoint + surrogate_offset; + high_surrogate = 0; + data[bytes_written++] = + (char)(0xF0u | (codepoint >> 18)); /* 11110xxx. */ + data[bytes_written++] = + (char)(0x80u | ((codepoint >> 12) & 0x3fu)); /* 10xxxxxx. */ + data[bytes_written++] = + (char)(0x80u | ((codepoint >> 6) & 0x3fu)); /* 10xxxxxx. */ + data[bytes_written++] = + (char)(0x80u | (codepoint & 0x3fu)); /* 10xxxxxx. */ + } else { + /* we assume the value was validated and thus is within the valid + * range. */ + data[bytes_written++] = + (char)(0xe0u | (codepoint >> 12)); /* 1110xxxx. */ + data[bytes_written++] = + (char)(0x80u | ((codepoint >> 6) & 0x3fu)); /* 10xxxxxx. */ + data[bytes_written++] = + (char)(0x80u | (codepoint & 0x3fu)); /* 10xxxxxx. */ + } + } break; + case '"': + data[bytes_written++] = '"'; + break; + case '\\': + data[bytes_written++] = '\\'; + break; + case '/': + data[bytes_written++] = '/'; + break; + case 'b': + data[bytes_written++] = '\b'; + break; + case 'f': + data[bytes_written++] = '\f'; + break; + case 'n': + data[bytes_written++] = '\n'; + break; + case 'r': + data[bytes_written++] = '\r'; + break; + case 't': + data[bytes_written++] = '\t'; + break; + case '\r': + data[bytes_written++] = '\r'; + + /* check if we have a "\r\n" sequence. */ + if ('\n' == src[offset]) { + data[bytes_written++] = '\n'; + offset++; + } + + break; + case '\n': + data[bytes_written++] = '\n'; + break; + } + } else { + /* copy the character. */ + data[bytes_written++] = src[offset++]; + } + } + + /* skip trailing '"' or '\''. */ + offset++; + + /* record the size of the string. */ + string->string_size = bytes_written; + + /* add null terminator to string. */ + data[bytes_written++] = '\0'; + + /* move data along. */ + state->data += bytes_written; + + /* update offset. */ + state->offset = offset; +} + +json_weak void json_parse_key(struct json_parse_state_s *state, + struct json_string_s *string); +void json_parse_key(struct json_parse_state_s *state, + struct json_string_s *string) { + if (json_parse_flags_allow_unquoted_keys & state->flags_bitset) { + const char *const src = state->src; + char *const data = state->data; + size_t offset = state->offset; + + /* if we are allowing unquoted keys, check for quoted anyway... */ + if (('"' == src[offset]) || ('\'' == src[offset])) { + /* ... if we got a quote, just parse the key as a string as normal. */ + json_parse_string(state, string); + } else { + size_t size = 0; + + string->string = state->data; + + while (is_valid_unquoted_key_char(src[offset])) { + data[size++] = src[offset++]; + } + + /* add null terminator to string. */ + data[size] = '\0'; + + /* record the size of the string. */ + string->string_size = size++; + + /* move data along. */ + state->data += size; + + /* update offset. */ + state->offset = offset; + } + } else { + /* we are only allowed to have quoted keys, so just parse a string! */ + json_parse_string(state, string); + } +} + +json_weak void json_parse_object(struct json_parse_state_s *state, + int is_global_object, + struct json_object_s *object); +void json_parse_object(struct json_parse_state_s *state, int is_global_object, + struct json_object_s *object) { + const size_t flags_bitset = state->flags_bitset; + const size_t size = state->size; + const char *const src = state->src; + size_t elements = 0; + int allow_comma = 0; + struct json_object_element_s *previous = json_null; + + if (is_global_object) { + /* if we skipped some whitespace, and then found an opening '{' of an. */ + /* object, we actually have a normal JSON object at the root of the DOM... + */ + if ('{' == src[state->offset]) { + /* . and we don't actually have a global object after all! */ + is_global_object = 0; + } + } + + if (!is_global_object) { + /* skip leading '{'. */ + state->offset++; + } + + (void)json_skip_all_skippables(state); + + /* reset elements. */ + elements = 0; + + while (state->offset < size) { + struct json_object_element_s *element = json_null; + struct json_string_s *string = json_null; + struct json_value_s *value = json_null; + + if (!is_global_object) { + (void)json_skip_all_skippables(state); + + if ('}' == src[state->offset]) { + /* skip trailing '}'. */ + state->offset++; + + /* finished the object! */ + break; + } + } else { + if (json_skip_all_skippables(state)) { + /* global object ends when the file ends! */ + break; + } + } + + /* if we parsed at least one element previously, grok for a comma. */ + if (allow_comma) { + if (',' == src[state->offset]) { + /* skip comma. */ + state->offset++; + allow_comma = 0; + continue; + } + } + + element = (struct json_object_element_s *)state->dom; + + state->dom += sizeof(struct json_object_element_s); + + if (json_null == previous) { + /* this is our first element, so record it in our object. */ + object->start = element; + } else { + previous->next = element; + } + + previous = element; + + if (json_parse_flags_allow_location_information & flags_bitset) { + struct json_string_ex_s *string_ex = + (struct json_string_ex_s *)state->dom; + state->dom += sizeof(struct json_string_ex_s); + + string_ex->offset = state->offset; + string_ex->line_no = state->line_no; + string_ex->row_no = state->offset - state->line_offset; + + string = &(string_ex->string); + } else { + string = (struct json_string_s *)state->dom; + state->dom += sizeof(struct json_string_s); + } + + element->name = string; + + (void)json_parse_key(state, string); + + (void)json_skip_all_skippables(state); + + /* skip colon or equals. */ + state->offset++; + + (void)json_skip_all_skippables(state); + + if (json_parse_flags_allow_location_information & flags_bitset) { + struct json_value_ex_s *value_ex = (struct json_value_ex_s *)state->dom; + state->dom += sizeof(struct json_value_ex_s); + + value_ex->offset = state->offset; + value_ex->line_no = state->line_no; + value_ex->row_no = state->offset - state->line_offset; + + value = &(value_ex->value); + } else { + value = (struct json_value_s *)state->dom; + state->dom += sizeof(struct json_value_s); + } + + element->value = value; + + json_parse_value(state, /* is_global_object = */ 0, value); + + /* successfully parsed a name/value pair! */ + elements++; + allow_comma = 1; + } + + /* if we had at least one element, end the linked list. */ + if (previous) { + previous->next = json_null; + } + + if (0 == elements) { + object->start = json_null; + } + + object->length = elements; +} + +json_weak void json_parse_array(struct json_parse_state_s *state, + struct json_array_s *array); +void json_parse_array(struct json_parse_state_s *state, + struct json_array_s *array) { + const char *const src = state->src; + const size_t size = state->size; + size_t elements = 0; + int allow_comma = 0; + struct json_array_element_s *previous = json_null; + + /* skip leading '['. */ + state->offset++; + + (void)json_skip_all_skippables(state); + + /* reset elements. */ + elements = 0; + + do { + struct json_array_element_s *element = json_null; + struct json_value_s *value = json_null; + + (void)json_skip_all_skippables(state); + + if (']' == src[state->offset]) { + /* skip trailing ']'. */ + state->offset++; + + /* finished the array! */ + break; + } + + /* if we parsed at least one element previously, grok for a comma. */ + if (allow_comma) { + if (',' == src[state->offset]) { + /* skip comma. */ + state->offset++; + allow_comma = 0; + continue; + } + } + + element = (struct json_array_element_s *)state->dom; + + state->dom += sizeof(struct json_array_element_s); + + if (json_null == previous) { + /* this is our first element, so record it in our array. */ + array->start = element; + } else { + previous->next = element; + } + + previous = element; + + if (json_parse_flags_allow_location_information & state->flags_bitset) { + struct json_value_ex_s *value_ex = (struct json_value_ex_s *)state->dom; + state->dom += sizeof(struct json_value_ex_s); + + value_ex->offset = state->offset; + value_ex->line_no = state->line_no; + value_ex->row_no = state->offset - state->line_offset; + + value = &(value_ex->value); + } else { + value = (struct json_value_s *)state->dom; + state->dom += sizeof(struct json_value_s); + } + + element->value = value; + + json_parse_value(state, /* is_global_object = */ 0, value); + + /* successfully parsed an array element! */ + elements++; + allow_comma = 1; + } while (state->offset < size); + + /* end the linked list. */ + if (previous) { + previous->next = json_null; + } + + if (0 == elements) { + array->start = json_null; + } + + array->length = elements; +} + +json_weak void json_parse_number(struct json_parse_state_s *state, + struct json_number_s *number); +void json_parse_number(struct json_parse_state_s *state, + struct json_number_s *number) { + const size_t flags_bitset = state->flags_bitset; + size_t offset = state->offset; + const size_t size = state->size; + size_t bytes_written = 0; + const char *const src = state->src; + char *data = state->data; + + number->number = data; + + if (json_parse_flags_allow_hexadecimal_numbers & flags_bitset) { + if (('0' == src[offset]) && + (('x' == src[offset + 1]) || ('X' == src[offset + 1]))) { + /* consume hexadecimal digits. */ + while ((offset < size) && + (('0' <= src[offset] && src[offset] <= '9') || + ('a' <= src[offset] && src[offset] <= 'f') || + ('A' <= src[offset] && src[offset] <= 'F') || + ('x' == src[offset]) || ('X' == src[offset]))) { + data[bytes_written++] = src[offset++]; + } + } + } + + while (offset < size) { + int end = 0; + + switch (src[offset]) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '.': + case 'e': + case 'E': + case '+': + case '-': + data[bytes_written++] = src[offset++]; + break; + default: + end = 1; + break; + } + + if (0 != end) { + break; + } + } + + if (json_parse_flags_allow_inf_and_nan & flags_bitset) { + const size_t inf_strlen = 8; /* = strlen("Infinity");. */ + const size_t nan_strlen = 3; /* = strlen("NaN");. */ + + if (offset + inf_strlen < size) { + if ('I' == src[offset]) { + size_t i; + /* We found our special 'Infinity' keyword! */ + for (i = 0; i < inf_strlen; i++) { + data[bytes_written++] = src[offset++]; + } + } + } + + if (offset + nan_strlen < size) { + if ('N' == src[offset]) { + size_t i; + /* We found our special 'NaN' keyword! */ + for (i = 0; i < nan_strlen; i++) { + data[bytes_written++] = src[offset++]; + } + } + } + } + + /* record the size of the number. */ + number->number_size = bytes_written; + /* add null terminator to number string. */ + data[bytes_written++] = '\0'; + /* move data along. */ + state->data += bytes_written; + /* update offset. */ + state->offset = offset; +} + +json_weak void json_parse_value(struct json_parse_state_s *state, + int is_global_object, + struct json_value_s *value); +void json_parse_value(struct json_parse_state_s *state, int is_global_object, + struct json_value_s *value) { + const size_t flags_bitset = state->flags_bitset; + const char *const src = state->src; + const size_t size = state->size; + size_t offset; + + (void)json_skip_all_skippables(state); + + /* cache offset now. */ + offset = state->offset; + + if (is_global_object) { + value->type = json_type_object; + value->payload = state->dom; + state->dom += sizeof(struct json_object_s); + json_parse_object(state, /* is_global_object = */ 1, + (struct json_object_s *)value->payload); + } else { + switch (src[offset]) { + case '"': + case '\'': + value->type = json_type_string; + value->payload = state->dom; + state->dom += sizeof(struct json_string_s); + json_parse_string(state, (struct json_string_s *)value->payload); + break; + case '{': + value->type = json_type_object; + value->payload = state->dom; + state->dom += sizeof(struct json_object_s); + json_parse_object(state, /* is_global_object = */ 0, + (struct json_object_s *)value->payload); + break; + case '[': + value->type = json_type_array; + value->payload = state->dom; + state->dom += sizeof(struct json_array_s); + json_parse_array(state, (struct json_array_s *)value->payload); + break; + case '-': + case '+': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '.': + value->type = json_type_number; + value->payload = state->dom; + state->dom += sizeof(struct json_number_s); + json_parse_number(state, (struct json_number_s *)value->payload); + break; + default: + if ((offset + 4) <= size && 't' == src[offset + 0] && + 'r' == src[offset + 1] && 'u' == src[offset + 2] && + 'e' == src[offset + 3]) { + value->type = json_type_true; + value->payload = json_null; + state->offset += 4; + } else if ((offset + 5) <= size && 'f' == src[offset + 0] && + 'a' == src[offset + 1] && 'l' == src[offset + 2] && + 's' == src[offset + 3] && 'e' == src[offset + 4]) { + value->type = json_type_false; + value->payload = json_null; + state->offset += 5; + } else if ((offset + 4) <= size && 'n' == src[offset + 0] && + 'u' == src[offset + 1] && 'l' == src[offset + 2] && + 'l' == src[offset + 3]) { + value->type = json_type_null; + value->payload = json_null; + state->offset += 4; + } else if ((json_parse_flags_allow_inf_and_nan & flags_bitset) && + (offset + 3) <= size && 'N' == src[offset + 0] && + 'a' == src[offset + 1] && 'N' == src[offset + 2]) { + value->type = json_type_number; + value->payload = state->dom; + state->dom += sizeof(struct json_number_s); + json_parse_number(state, (struct json_number_s *)value->payload); + } else if ((json_parse_flags_allow_inf_and_nan & flags_bitset) && + (offset + 8) <= size && 'I' == src[offset + 0] && + 'n' == src[offset + 1] && 'f' == src[offset + 2] && + 'i' == src[offset + 3] && 'n' == src[offset + 4] && + 'i' == src[offset + 5] && 't' == src[offset + 6] && + 'y' == src[offset + 7]) { + value->type = json_type_number; + value->payload = state->dom; + state->dom += sizeof(struct json_number_s); + json_parse_number(state, (struct json_number_s *)value->payload); + } + break; + } + } +} + +struct json_value_s * +json_parse_ex(const void *src, size_t src_size, size_t flags_bitset, + void *(*alloc_func_ptr)(void *user_data, size_t size), + void *user_data, struct json_parse_result_s *result) { + struct json_parse_state_s state; + void *allocation; + struct json_value_s *value; + size_t total_size; + int input_error; + + if (result) { + result->error = json_parse_error_none; + result->error_offset = 0; + result->error_line_no = 0; + result->error_row_no = 0; + } + + if (json_null == src) { + /* invalid src pointer was null! */ + return json_null; + } + + state.src = (const char *)src; + state.size = src_size; + state.offset = 0; + state.line_no = 1; + state.line_offset = 0; + state.error = json_parse_error_none; + state.dom_size = 0; + state.data_size = 0; + state.flags_bitset = flags_bitset; + + input_error = json_get_value_size( + &state, (int)(json_parse_flags_allow_global_object & state.flags_bitset)); + + if (0 == input_error) { + json_skip_all_skippables(&state); + + if (state.offset != state.size) { + /* our parsing didn't have an error, but there are characters remaining in + * the input that weren't part of the JSON! */ + + state.error = json_parse_error_unexpected_trailing_characters; + input_error = 1; + } + } + + if (input_error) { + /* parsing value's size failed (most likely an invalid JSON DOM!). */ + if (result) { + result->error = state.error; + result->error_offset = state.offset; + result->error_line_no = state.line_no; + result->error_row_no = state.offset - state.line_offset; + } + return json_null; + } + + /* our total allocation is the combination of the dom and data sizes (we. */ + /* first encode the structure of the JSON, and then the data referenced by. */ + /* the JSON values). */ + total_size = state.dom_size + state.data_size; + + if (json_null == alloc_func_ptr) { + allocation = malloc(total_size); + } else { + allocation = alloc_func_ptr(user_data, total_size); + } + + if (json_null == allocation) { + /* malloc failed! */ + if (result) { + result->error = json_parse_error_allocator_failed; + result->error_offset = 0; + result->error_line_no = 0; + result->error_row_no = 0; + } + + return json_null; + } + + /* reset offset so we can reuse it. */ + state.offset = 0; + + /* reset the line information so we can reuse it. */ + state.line_no = 1; + state.line_offset = 0; + + state.dom = (char *)allocation; + state.data = state.dom + state.dom_size; + + if (json_parse_flags_allow_location_information & state.flags_bitset) { + struct json_value_ex_s *value_ex = (struct json_value_ex_s *)state.dom; + state.dom += sizeof(struct json_value_ex_s); + + value_ex->offset = state.offset; + value_ex->line_no = state.line_no; + value_ex->row_no = state.offset - state.line_offset; + + value = &(value_ex->value); + } else { + value = (struct json_value_s *)state.dom; + state.dom += sizeof(struct json_value_s); + } + + json_parse_value( + &state, (int)(json_parse_flags_allow_global_object & state.flags_bitset), + value); + + return (struct json_value_s *)allocation; +} + +struct json_value_s *json_parse(const void *src, size_t src_size) { + return json_parse_ex(src, src_size, json_parse_flags_default, json_null, + json_null, json_null); +} + +struct json_extract_result_s { + size_t dom_size; + size_t data_size; +}; + +struct json_value_s *json_extract_value(const struct json_value_s *value) { + return json_extract_value_ex(value, json_null, json_null); +} + +json_weak struct json_extract_result_s +json_extract_get_number_size(const struct json_number_s *const number); +json_weak struct json_extract_result_s +json_extract_get_string_size(const struct json_string_s *const string); +json_weak struct json_extract_result_s +json_extract_get_object_size(const struct json_object_s *const object); +json_weak struct json_extract_result_s +json_extract_get_array_size(const struct json_array_s *const array); +json_weak struct json_extract_result_s +json_extract_get_value_size(const struct json_value_s *const value); + +struct json_extract_result_s +json_extract_get_number_size(const struct json_number_s *const number) { + struct json_extract_result_s result; + result.dom_size = sizeof(struct json_number_s); + result.data_size = number->number_size; + return result; +} + +struct json_extract_result_s +json_extract_get_string_size(const struct json_string_s *const string) { + struct json_extract_result_s result; + result.dom_size = sizeof(struct json_string_s); + result.data_size = string->string_size + 1; + return result; +} + +struct json_extract_result_s +json_extract_get_object_size(const struct json_object_s *const object) { + struct json_extract_result_s result; + size_t i; + const struct json_object_element_s *element = object->start; + + result.dom_size = sizeof(struct json_object_s) + + (sizeof(struct json_object_element_s) * object->length); + result.data_size = 0; + + for (i = 0; i < object->length; i++) { + const struct json_extract_result_s string_result = + json_extract_get_string_size(element->name); + const struct json_extract_result_s value_result = + json_extract_get_value_size(element->value); + + result.dom_size += string_result.dom_size; + result.data_size += string_result.data_size; + + result.dom_size += value_result.dom_size; + result.data_size += value_result.data_size; + + element = element->next; + } + + return result; +} + +struct json_extract_result_s +json_extract_get_array_size(const struct json_array_s *const array) { + struct json_extract_result_s result; + size_t i; + const struct json_array_element_s *element = array->start; + + result.dom_size = sizeof(struct json_array_s) + + (sizeof(struct json_array_element_s) * array->length); + result.data_size = 0; + + for (i = 0; i < array->length; i++) { + const struct json_extract_result_s value_result = + json_extract_get_value_size(element->value); + + result.dom_size += value_result.dom_size; + result.data_size += value_result.data_size; + + element = element->next; + } + + return result; +} + +struct json_extract_result_s +json_extract_get_value_size(const struct json_value_s *const value) { + struct json_extract_result_s result = {0, 0}; + + switch (value->type) { + default: + break; + case json_type_object: + result = json_extract_get_object_size( + (const struct json_object_s *)value->payload); + break; + case json_type_array: + result = json_extract_get_array_size( + (const struct json_array_s *)value->payload); + break; + case json_type_number: + result = json_extract_get_number_size( + (const struct json_number_s *)value->payload); + break; + case json_type_string: + result = json_extract_get_string_size( + (const struct json_string_s *)value->payload); + break; + } + + result.dom_size += sizeof(struct json_value_s); + + return result; +} + +struct json_extract_state_s { + char *dom; + char *data; +}; + +json_weak void json_extract_copy_value(struct json_extract_state_s *const state, + const struct json_value_s *const value); +void json_extract_copy_value(struct json_extract_state_s *const state, + const struct json_value_s *const value) { + struct json_string_s *string; + struct json_number_s *number; + struct json_object_s *object; + struct json_array_s *array; + struct json_value_s *new_value; + + memcpy(state->dom, value, sizeof(struct json_value_s)); + new_value = (struct json_value_s *)state->dom; + state->dom += sizeof(struct json_value_s); + new_value->payload = state->dom; + + if (json_type_string == value->type) { + memcpy(state->dom, value->payload, sizeof(struct json_string_s)); + string = (struct json_string_s *)state->dom; + state->dom += sizeof(struct json_string_s); + + memcpy(state->data, string->string, string->string_size + 1); + string->string = state->data; + state->data += string->string_size + 1; + } else if (json_type_number == value->type) { + memcpy(state->dom, value->payload, sizeof(struct json_number_s)); + number = (struct json_number_s *)state->dom; + state->dom += sizeof(struct json_number_s); + + memcpy(state->data, number->number, number->number_size); + number->number = state->data; + state->data += number->number_size; + } else if (json_type_object == value->type) { + struct json_object_element_s *element; + size_t i; + + memcpy(state->dom, value->payload, sizeof(struct json_object_s)); + object = (struct json_object_s *)state->dom; + state->dom += sizeof(struct json_object_s); + + element = object->start; + object->start = (struct json_object_element_s *)state->dom; + + for (i = 0; i < object->length; i++) { + struct json_value_s *previous_value; + struct json_object_element_s *previous_element; + + memcpy(state->dom, element, sizeof(struct json_object_element_s)); + element = (struct json_object_element_s *)state->dom; + state->dom += sizeof(struct json_object_element_s); + + string = element->name; + memcpy(state->dom, string, sizeof(struct json_string_s)); + string = (struct json_string_s *)state->dom; + state->dom += sizeof(struct json_string_s); + element->name = string; + + memcpy(state->data, string->string, string->string_size + 1); + string->string = state->data; + state->data += string->string_size + 1; + + previous_value = element->value; + element->value = (struct json_value_s *)state->dom; + json_extract_copy_value(state, previous_value); + + previous_element = element; + element = element->next; + + if (element) { + previous_element->next = (struct json_object_element_s *)state->dom; + } + } + } else if (json_type_array == value->type) { + struct json_array_element_s *element; + size_t i; + + memcpy(state->dom, value->payload, sizeof(struct json_array_s)); + array = (struct json_array_s *)state->dom; + state->dom += sizeof(struct json_array_s); + + element = array->start; + array->start = (struct json_array_element_s *)state->dom; + + for (i = 0; i < array->length; i++) { + struct json_value_s *previous_value; + struct json_array_element_s *previous_element; + + memcpy(state->dom, element, sizeof(struct json_array_element_s)); + element = (struct json_array_element_s *)state->dom; + state->dom += sizeof(struct json_array_element_s); + + previous_value = element->value; + element->value = (struct json_value_s *)state->dom; + json_extract_copy_value(state, previous_value); + + previous_element = element; + element = element->next; + + if (element) { + previous_element->next = (struct json_array_element_s *)state->dom; + } + } + } +} + +struct json_value_s *json_extract_value_ex(const struct json_value_s *value, + void *(*alloc_func_ptr)(void *, + size_t), + void *user_data) { + void *allocation; + struct json_extract_result_s result; + struct json_extract_state_s state; + size_t total_size; + + if (json_null == value) { + /* invalid value was null! */ + return json_null; + } + + result = json_extract_get_value_size(value); + total_size = result.dom_size + result.data_size; + + if (json_null == alloc_func_ptr) { + allocation = malloc(total_size); + } else { + allocation = alloc_func_ptr(user_data, total_size); + } + + state.dom = (char *)allocation; + state.data = state.dom + result.dom_size; + + json_extract_copy_value(&state, value); + + return (struct json_value_s *)allocation; +} + +struct json_string_s *json_value_as_string(struct json_value_s *const value) { + if (value->type != json_type_string) { + return json_null; + } + + return (struct json_string_s *)value->payload; +} + +struct json_number_s *json_value_as_number(struct json_value_s *const value) { + if (value->type != json_type_number) { + return json_null; + } + + return (struct json_number_s *)value->payload; +} + +struct json_object_s *json_value_as_object(struct json_value_s *const value) { + if (value->type != json_type_object) { + return json_null; + } + + return (struct json_object_s *)value->payload; +} + +struct json_array_s *json_value_as_array(struct json_value_s *const value) { + if (value->type != json_type_array) { + return json_null; + } + + return (struct json_array_s *)value->payload; +} + +int json_value_is_true(const struct json_value_s *const value) { + return value->type == json_type_true; +} + +int json_value_is_false(const struct json_value_s *const value) { + return value->type == json_type_false; +} + +int json_value_is_null(const struct json_value_s *const value) { + return value->type == json_type_null; +} + +json_weak int +json_write_minified_get_value_size(const struct json_value_s *value, + size_t *size); + +json_weak int json_write_get_number_size(const struct json_number_s *number, + size_t *size); +int json_write_get_number_size(const struct json_number_s *number, + size_t *size) { + json_uintmax_t parsed_number; + size_t i; + + if (number->number_size >= 2) { + switch (number->number[1]) { + default: + break; + case 'x': + case 'X': + /* the number is a json_parse_flags_allow_hexadecimal_numbers hexadecimal + * so we have to do extra work to convert it to a non-hexadecimal for JSON + * output. */ + parsed_number = json_strtoumax(number->number, json_null, 0); + + i = 0; + + while (0 != parsed_number) { + parsed_number /= 10; + i++; + } + + *size += i; + return 0; + } + } + + /* check to see if the number has leading/trailing decimal point. */ + i = 0; + + /* skip any leading '+' or '-'. */ + if ((i < number->number_size) && + (('+' == number->number[i]) || ('-' == number->number[i]))) { + i++; + } + + /* check if we have infinity. */ + if ((i < number->number_size) && ('I' == number->number[i])) { + const char *inf = "Infinity"; + size_t k; + + for (k = i; k < number->number_size; k++) { + const char c = *inf++; + + /* Check if we found the Infinity string! */ + if ('\0' == c) { + break; + } else if (c != number->number[k]) { + break; + } + } + + if ('\0' == *inf) { + /* Inf becomes 1.7976931348623158e308 because JSON can't support it. */ + *size += 22; + + /* if we had a leading '-' we need to record it in the JSON output. */ + if ('-' == number->number[0]) { + *size += 1; + } + } + + return 0; + } + + /* check if we have nan. */ + if ((i < number->number_size) && ('N' == number->number[i])) { + const char *nan = "NaN"; + size_t k; + + for (k = i; k < number->number_size; k++) { + const char c = *nan++; + + /* Check if we found the NaN string! */ + if ('\0' == c) { + break; + } else if (c != number->number[k]) { + break; + } + } + + if ('\0' == *nan) { + /* NaN becomes 1 because JSON can't support it. */ + *size += 1; + + return 0; + } + } + + /* if we had a leading decimal point. */ + if ((i < number->number_size) && ('.' == number->number[i])) { + /* 1 + because we had a leading decimal point. */ + *size += 1; + goto cleanup; + } + + for (; i < number->number_size; i++) { + const char c = number->number[i]; + if (!('0' <= c && c <= '9')) { + break; + } + } + + /* if we had a trailing decimal point. */ + if ((i + 1 == number->number_size) && ('.' == number->number[i])) { + /* 1 + because we had a trailing decimal point. */ + *size += 1; + goto cleanup; + } + +cleanup: + *size += number->number_size; /* the actual string of the number. */ + + /* if we had a leading '+' we don't record it in the JSON output. */ + if ('+' == number->number[0]) { + *size -= 1; + } + + return 0; +} + +json_weak int json_write_get_string_size(const struct json_string_s *string, + size_t *size); +int json_write_get_string_size(const struct json_string_s *string, + size_t *size) { + size_t i; + for (i = 0; i < string->string_size; i++) { + switch (string->string[i]) { + case '"': + case '\\': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + *size += 2; + break; + default: + *size += 1; + break; + } + } + + *size += 2; /* need to encode the surrounding '"' characters. */ + + return 0; +} + +json_weak int +json_write_minified_get_array_size(const struct json_array_s *array, + size_t *size); +int json_write_minified_get_array_size(const struct json_array_s *array, + size_t *size) { + struct json_array_element_s *element; + + *size += 2; /* '[' and ']'. */ + + if (1 < array->length) { + *size += array->length - 1; /* ','s seperate each element. */ + } + + for (element = array->start; json_null != element; element = element->next) { + if (json_write_minified_get_value_size(element->value, size)) { + /* value was malformed! */ + return 1; + } + } + + return 0; +} + +json_weak int +json_write_minified_get_object_size(const struct json_object_s *object, + size_t *size); +int json_write_minified_get_object_size(const struct json_object_s *object, + size_t *size) { + struct json_object_element_s *element; + + *size += 2; /* '{' and '}'. */ + + *size += object->length; /* ':'s seperate each name/value pair. */ + + if (1 < object->length) { + *size += object->length - 1; /* ','s seperate each element. */ + } + + for (element = object->start; json_null != element; element = element->next) { + if (json_write_get_string_size(element->name, size)) { + /* string was malformed! */ + return 1; + } + + if (json_write_minified_get_value_size(element->value, size)) { + /* value was malformed! */ + return 1; + } + } + + return 0; +} + +json_weak int +json_write_minified_get_value_size(const struct json_value_s *value, + size_t *size); +int json_write_minified_get_value_size(const struct json_value_s *value, + size_t *size) { + switch (value->type) { + default: + /* unknown value type found! */ + return 1; + case json_type_number: + return json_write_get_number_size((struct json_number_s *)value->payload, + size); + case json_type_string: + return json_write_get_string_size((struct json_string_s *)value->payload, + size); + case json_type_array: + return json_write_minified_get_array_size( + (struct json_array_s *)value->payload, size); + case json_type_object: + return json_write_minified_get_object_size( + (struct json_object_s *)value->payload, size); + case json_type_true: + *size += 4; /* the string "true". */ + return 0; + case json_type_false: + *size += 5; /* the string "false". */ + return 0; + case json_type_null: + *size += 4; /* the string "null". */ + return 0; + } +} + +json_weak char *json_write_minified_value(const struct json_value_s *value, + char *data); + +json_weak char *json_write_number(const struct json_number_s *number, + char *data); +char *json_write_number(const struct json_number_s *number, char *data) { + json_uintmax_t parsed_number, backup; + size_t i; + + if (number->number_size >= 2) { + switch (number->number[1]) { + default: + break; + case 'x': + case 'X': + /* The number is a json_parse_flags_allow_hexadecimal_numbers hexadecimal + * so we have to do extra work to convert it to a non-hexadecimal for JSON + * output. */ + parsed_number = json_strtoumax(number->number, json_null, 0); + + /* We need a copy of parsed number twice, so take a backup of it. */ + backup = parsed_number; + + i = 0; + + while (0 != parsed_number) { + parsed_number /= 10; + i++; + } + + /* Restore parsed_number to its original value stored in the backup. */ + parsed_number = backup; + + /* Now use backup to take a copy of i, or the length of the string. */ + backup = i; + + do { + *(data + i - 1) = '0' + (char)(parsed_number % 10); + parsed_number /= 10; + i--; + } while (0 != parsed_number); + + data += backup; + + return data; + } + } + + /* check to see if the number has leading/trailing decimal point. */ + i = 0; + + /* skip any leading '-'. */ + if ((i < number->number_size) && + (('+' == number->number[i]) || ('-' == number->number[i]))) { + i++; + } + + /* check if we have infinity. */ + if ((i < number->number_size) && ('I' == number->number[i])) { + const char *inf = "Infinity"; + size_t k; + + for (k = i; k < number->number_size; k++) { + const char c = *inf++; + + /* Check if we found the Infinity string! */ + if ('\0' == c) { + break; + } else if (c != number->number[k]) { + break; + } + } + + if ('\0' == *inf++) { + const char *dbl_max; + + /* if we had a leading '-' we need to record it in the JSON output. */ + if ('-' == number->number[0]) { + *data++ = '-'; + } + + /* Inf becomes 1.7976931348623158e308 because JSON can't support it. */ + for (dbl_max = "1.7976931348623158e308"; '\0' != *dbl_max; dbl_max++) { + *data++ = *dbl_max; + } + + return data; + } + } + + /* check if we have nan. */ + if ((i < number->number_size) && ('N' == number->number[i])) { + const char *nan = "NaN"; + size_t k; + + for (k = i; k < number->number_size; k++) { + const char c = *nan++; + + /* Check if we found the NaN string! */ + if ('\0' == c) { + break; + } else if (c != number->number[k]) { + break; + } + } + + if ('\0' == *nan++) { + /* NaN becomes 0 because JSON can't support it. */ + *data++ = '0'; + return data; + } + } + + /* if we had a leading decimal point. */ + if ((i < number->number_size) && ('.' == number->number[i])) { + i = 0; + + /* skip any leading '+'. */ + if ('+' == number->number[i]) { + i++; + } + + /* output the leading '-' if we had one. */ + if ('-' == number->number[i]) { + *data++ = '-'; + i++; + } + + /* insert a '0' to fix the leading decimal point for JSON output. */ + *data++ = '0'; + + /* and output the rest of the number as normal. */ + for (; i < number->number_size; i++) { + *data++ = number->number[i]; + } + + return data; + } + + for (; i < number->number_size; i++) { + const char c = number->number[i]; + if (!('0' <= c && c <= '9')) { + break; + } + } + + /* if we had a trailing decimal point. */ + if ((i + 1 == number->number_size) && ('.' == number->number[i])) { + i = 0; + + /* skip any leading '+'. */ + if ('+' == number->number[i]) { + i++; + } + + /* output the leading '-' if we had one. */ + if ('-' == number->number[i]) { + *data++ = '-'; + i++; + } + + /* and output the rest of the number as normal. */ + for (; i < number->number_size; i++) { + *data++ = number->number[i]; + } + + /* insert a '0' to fix the trailing decimal point for JSON output. */ + *data++ = '0'; + + return data; + } + + i = 0; + + /* skip any leading '+'. */ + if ('+' == number->number[i]) { + i++; + } + + for (; i < number->number_size; i++) { + *data++ = number->number[i]; + } + + return data; +} + +json_weak char *json_write_string(const struct json_string_s *string, + char *data); +char *json_write_string(const struct json_string_s *string, char *data) { + size_t i; + + *data++ = '"'; /* open the string. */ + + for (i = 0; i < string->string_size; i++) { + switch (string->string[i]) { + case '"': + *data++ = '\\'; /* escape the control character. */ + *data++ = '"'; + break; + case '\\': + *data++ = '\\'; /* escape the control character. */ + *data++ = '\\'; + break; + case '\b': + *data++ = '\\'; /* escape the control character. */ + *data++ = 'b'; + break; + case '\f': + *data++ = '\\'; /* escape the control character. */ + *data++ = 'f'; + break; + case '\n': + *data++ = '\\'; /* escape the control character. */ + *data++ = 'n'; + break; + case '\r': + *data++ = '\\'; /* escape the control character. */ + *data++ = 'r'; + break; + case '\t': + *data++ = '\\'; /* escape the control character. */ + *data++ = 't'; + break; + default: + *data++ = string->string[i]; + break; + } + } + + *data++ = '"'; /* close the string. */ + + return data; +} + +json_weak char *json_write_minified_array(const struct json_array_s *array, + char *data); +char *json_write_minified_array(const struct json_array_s *array, char *data) { + struct json_array_element_s *element = json_null; + + *data++ = '['; /* open the array. */ + + for (element = array->start; json_null != element; element = element->next) { + if (element != array->start) { + *data++ = ','; /* ','s seperate each element. */ + } + + data = json_write_minified_value(element->value, data); + + if (json_null == data) { + /* value was malformed! */ + return json_null; + } + } + + *data++ = ']'; /* close the array. */ + + return data; +} + +json_weak char *json_write_minified_object(const struct json_object_s *object, + char *data); +char *json_write_minified_object(const struct json_object_s *object, + char *data) { + struct json_object_element_s *element = json_null; + + *data++ = '{'; /* open the object. */ + + for (element = object->start; json_null != element; element = element->next) { + if (element != object->start) { + *data++ = ','; /* ','s seperate each element. */ + } + + data = json_write_string(element->name, data); + + if (json_null == data) { + /* string was malformed! */ + return json_null; + } + + *data++ = ':'; /* ':'s seperate each name/value pair. */ + + data = json_write_minified_value(element->value, data); + + if (json_null == data) { + /* value was malformed! */ + return json_null; + } + } + + *data++ = '}'; /* close the object. */ + + return data; +} + +json_weak char *json_write_minified_value(const struct json_value_s *value, + char *data); +char *json_write_minified_value(const struct json_value_s *value, char *data) { + switch (value->type) { + default: + /* unknown value type found! */ + return json_null; + case json_type_number: + return json_write_number((struct json_number_s *)value->payload, data); + case json_type_string: + return json_write_string((struct json_string_s *)value->payload, data); + case json_type_array: + return json_write_minified_array((struct json_array_s *)value->payload, + data); + case json_type_object: + return json_write_minified_object((struct json_object_s *)value->payload, + data); + case json_type_true: + data[0] = 't'; + data[1] = 'r'; + data[2] = 'u'; + data[3] = 'e'; + return data + 4; + case json_type_false: + data[0] = 'f'; + data[1] = 'a'; + data[2] = 'l'; + data[3] = 's'; + data[4] = 'e'; + return data + 5; + case json_type_null: + data[0] = 'n'; + data[1] = 'u'; + data[2] = 'l'; + data[3] = 'l'; + return data + 4; + } +} + +void *json_write_minified(const struct json_value_s *value, size_t *out_size) { + size_t size = 0; + char *data = json_null; + char *data_end = json_null; + + if (json_null == value) { + return json_null; + } + + if (json_write_minified_get_value_size(value, &size)) { + /* value was malformed! */ + return json_null; + } + + size += 1; /* for the '\0' null terminating character. */ + + data = (char *)malloc(size); + + if (json_null == data) { + /* malloc failed! */ + return json_null; + } + + data_end = json_write_minified_value(value, data); + + if (json_null == data_end) { + /* bad chi occurred! */ + free(data); + return json_null; + } + + /* null terminated the string. */ + *data_end = '\0'; + + if (json_null != out_size) { + *out_size = size; + } + + return data; +} + +json_weak int json_write_pretty_get_value_size(const struct json_value_s *value, + size_t depth, size_t indent_size, + size_t newline_size, + size_t *size); + +json_weak int json_write_pretty_get_array_size(const struct json_array_s *array, + size_t depth, size_t indent_size, + size_t newline_size, + size_t *size); +int json_write_pretty_get_array_size(const struct json_array_s *array, + size_t depth, size_t indent_size, + size_t newline_size, size_t *size) { + struct json_array_element_s *element; + + *size += 1; /* '['. */ + + if (0 < array->length) { + /* if we have any elements we need to add a newline after our '['. */ + *size += newline_size; + + *size += array->length - 1; /* ','s seperate each element. */ + + for (element = array->start; json_null != element; + element = element->next) { + /* each element gets an indent. */ + *size += (depth + 1) * indent_size; + + if (json_write_pretty_get_value_size(element->value, depth + 1, + indent_size, newline_size, size)) { + /* value was malformed! */ + return 1; + } + + /* each element gets a newline too. */ + *size += newline_size; + } + + /* since we wrote out some elements, need to add a newline and indentation. + */ + /* to the trailing ']'. */ + *size += depth * indent_size; + } + + *size += 1; /* ']'. */ + + return 0; +} + +json_weak int +json_write_pretty_get_object_size(const struct json_object_s *object, + size_t depth, size_t indent_size, + size_t newline_size, size_t *size); +int json_write_pretty_get_object_size(const struct json_object_s *object, + size_t depth, size_t indent_size, + size_t newline_size, size_t *size) { + struct json_object_element_s *element; + + *size += 1; /* '{'. */ + + if (0 < object->length) { + *size += newline_size; /* need a newline next. */ + + *size += object->length - 1; /* ','s seperate each element. */ + + for (element = object->start; json_null != element; + element = element->next) { + /* each element gets an indent and newline. */ + *size += (depth + 1) * indent_size; + *size += newline_size; + + if (json_write_get_string_size(element->name, size)) { + /* string was malformed! */ + return 1; + } + + *size += 3; /* seperate each name/value pair with " : ". */ + + if (json_write_pretty_get_value_size(element->value, depth + 1, + indent_size, newline_size, size)) { + /* value was malformed! */ + return 1; + } + } + + *size += depth * indent_size; + } + + *size += 1; /* '}'. */ + + return 0; +} + +json_weak int json_write_pretty_get_value_size(const struct json_value_s *value, + size_t depth, size_t indent_size, + size_t newline_size, + size_t *size); +int json_write_pretty_get_value_size(const struct json_value_s *value, + size_t depth, size_t indent_size, + size_t newline_size, size_t *size) { + switch (value->type) { + default: + /* unknown value type found! */ + return 1; + case json_type_number: + return json_write_get_number_size((struct json_number_s *)value->payload, + size); + case json_type_string: + return json_write_get_string_size((struct json_string_s *)value->payload, + size); + case json_type_array: + return json_write_pretty_get_array_size( + (struct json_array_s *)value->payload, depth, indent_size, newline_size, + size); + case json_type_object: + return json_write_pretty_get_object_size( + (struct json_object_s *)value->payload, depth, indent_size, + newline_size, size); + case json_type_true: + *size += 4; /* the string "true". */ + return 0; + case json_type_false: + *size += 5; /* the string "false". */ + return 0; + case json_type_null: + *size += 4; /* the string "null". */ + return 0; + } +} + +json_weak char *json_write_pretty_value(const struct json_value_s *value, + size_t depth, const char *indent, + const char *newline, char *data); + +json_weak char *json_write_pretty_array(const struct json_array_s *array, + size_t depth, const char *indent, + const char *newline, char *data); +char *json_write_pretty_array(const struct json_array_s *array, size_t depth, + const char *indent, const char *newline, + char *data) { + size_t k, m; + struct json_array_element_s *element; + + *data++ = '['; /* open the array. */ + + if (0 < array->length) { + for (k = 0; '\0' != newline[k]; k++) { + *data++ = newline[k]; + } + + for (element = array->start; json_null != element; + element = element->next) { + if (element != array->start) { + *data++ = ','; /* ','s seperate each element. */ + + for (k = 0; '\0' != newline[k]; k++) { + *data++ = newline[k]; + } + } + + for (k = 0; k < depth + 1; k++) { + for (m = 0; '\0' != indent[m]; m++) { + *data++ = indent[m]; + } + } + + data = json_write_pretty_value(element->value, depth + 1, indent, newline, + data); + + if (json_null == data) { + /* value was malformed! */ + return json_null; + } + } + + for (k = 0; '\0' != newline[k]; k++) { + *data++ = newline[k]; + } + + for (k = 0; k < depth; k++) { + for (m = 0; '\0' != indent[m]; m++) { + *data++ = indent[m]; + } + } + } + + *data++ = ']'; /* close the array. */ + + return data; +} + +json_weak char *json_write_pretty_object(const struct json_object_s *object, + size_t depth, const char *indent, + const char *newline, char *data); +char *json_write_pretty_object(const struct json_object_s *object, size_t depth, + const char *indent, const char *newline, + char *data) { + size_t k, m; + struct json_object_element_s *element; + + *data++ = '{'; /* open the object. */ + + if (0 < object->length) { + for (k = 0; '\0' != newline[k]; k++) { + *data++ = newline[k]; + } + + for (element = object->start; json_null != element; + element = element->next) { + if (element != object->start) { + *data++ = ','; /* ','s seperate each element. */ + + for (k = 0; '\0' != newline[k]; k++) { + *data++ = newline[k]; + } + } + + for (k = 0; k < depth + 1; k++) { + for (m = 0; '\0' != indent[m]; m++) { + *data++ = indent[m]; + } + } + + data = json_write_string(element->name, data); + + if (json_null == data) { + /* string was malformed! */ + return json_null; + } + + /* " : "s seperate each name/value pair. */ + *data++ = ' '; + *data++ = ':'; + *data++ = ' '; + + data = json_write_pretty_value(element->value, depth + 1, indent, newline, + data); + + if (json_null == data) { + /* value was malformed! */ + return json_null; + } + } + + for (k = 0; '\0' != newline[k]; k++) { + *data++ = newline[k]; + } + + for (k = 0; k < depth; k++) { + for (m = 0; '\0' != indent[m]; m++) { + *data++ = indent[m]; + } + } + } + + *data++ = '}'; /* close the object. */ + + return data; +} + +json_weak char *json_write_pretty_value(const struct json_value_s *value, + size_t depth, const char *indent, + const char *newline, char *data); +char *json_write_pretty_value(const struct json_value_s *value, size_t depth, + const char *indent, const char *newline, + char *data) { + switch (value->type) { + default: + /* unknown value type found! */ + return json_null; + case json_type_number: + return json_write_number((struct json_number_s *)value->payload, data); + case json_type_string: + return json_write_string((struct json_string_s *)value->payload, data); + case json_type_array: + return json_write_pretty_array((struct json_array_s *)value->payload, depth, + indent, newline, data); + case json_type_object: + return json_write_pretty_object((struct json_object_s *)value->payload, + depth, indent, newline, data); + case json_type_true: + data[0] = 't'; + data[1] = 'r'; + data[2] = 'u'; + data[3] = 'e'; + return data + 4; + case json_type_false: + data[0] = 'f'; + data[1] = 'a'; + data[2] = 'l'; + data[3] = 's'; + data[4] = 'e'; + return data + 5; + case json_type_null: + data[0] = 'n'; + data[1] = 'u'; + data[2] = 'l'; + data[3] = 'l'; + return data + 4; + } +} + +void *json_write_pretty(const struct json_value_s *value, const char *indent, + const char *newline, size_t *out_size) { + size_t size = 0; + size_t indent_size = 0; + size_t newline_size = 0; + char *data = json_null; + char *data_end = json_null; + + if (json_null == value) { + return json_null; + } + + if (json_null == indent) { + indent = " "; /* default to two spaces. */ + } + + if (json_null == newline) { + newline = "\n"; /* default to linux newlines. */ + } + + while ('\0' != indent[indent_size]) { + ++indent_size; /* skip non-null terminating characters. */ + } + + while ('\0' != newline[newline_size]) { + ++newline_size; /* skip non-null terminating characters. */ + } + + if (json_write_pretty_get_value_size(value, 0, indent_size, newline_size, + &size)) { + /* value was malformed! */ + return json_null; + } + + size += 1; /* for the '\0' null terminating character. */ + + data = (char *)malloc(size); + + if (json_null == data) { + /* malloc failed! */ + return json_null; + } + + data_end = json_write_pretty_value(value, 0, indent, newline, data); + + if (json_null == data_end) { + /* bad chi occurred! */ + free(data); + return json_null; + } + + /* null terminated the string. */ + *data_end = '\0'; + + if (json_null != out_size) { + *out_size = size; + } + + return data; +} + +#if defined(__clang__) +#pragma clang diagnostic pop +#elif defined(_MSC_VER) +#pragma warning(pop) +#endif + +#endif /* SHEREDOM_JSON_H_INCLUDED. */ diff --git a/fastbril/src/main.c b/fastbril/src/main.c new file mode 100644 index 000000000..45c064913 --- /dev/null +++ b/fastbril/src/main.c @@ -0,0 +1,149 @@ +#include +#include +#include +#include + +#include "bril-insns/instrs.h" +#include "byte-io.h" +#include "interp/interp.h" +#include "libs/json.h" +#include "parser.h" +#include "pretty-printer.h" +// #include "emission.h" +#include "asm/asm.h" +#include "asm/linear-scan.h" +#include "asm/to_abstract_asm.h" +#include "asm/trivial-regalloc.h" + +/* Bit masks for cmd flags/modes */ +#define OUTPUT_BYTECODE 0x0001 +#define COUNT_INSNS 0x0002 +#define NO_INTERPRET 0x0004 +#define PRINT_OUT 0x0008 +#define EMIT_ASM 0x0010 +#define READ_BYTECODE 0x0020 + +/** + * read the contents of stdin and return a single heap allocated string. + */ +char *get_stdin() +{ + size_t buf_len = 128; + char *buffer = malloc(buf_len); + size_t i = 0; + while (true) + { + if (i == buf_len - 1) + { + buf_len *= 2; + buffer = realloc(buffer, buf_len); + } + int c = getchar(); + if (c == EOF) + { + buffer[i] = 0x00; + break; + } + buffer[i] = c; + ++i; + } + return buffer; +} + +/** + * turn a string into a value_t that can be used by the interpreter + */ +value_t parse_argument(const char *str, briltp expected) +{ + switch (expected) + { + case BRILINT: + return (value_t){.int_val = strtol(str, 0, 0)}; + case BRILBOOL: + if (strcmp(str, "true") == 0) + return (value_t){.int_val = 1}; + else + return (value_t){.int_val = 0}; + case BRILFLOAT: + return (value_t){.float_val = strtod(str, 0)}; + default: + return (value_t){.int_val = 0}; + } +} + +int main(int argc, char **argv) +{ + long options = 0; + char *bout_file = 0, *out_file = 0; + char *args_strs[argc]; + size_t argidx = 0; + for (int i = 1; i < argc; ++i) + { + if (strcmp(argv[i], "-p") == 0) + options |= COUNT_INSNS; + else if (strcmp(argv[i], "-b") == 0) + options |= READ_BYTECODE; + else if (strcmp(argv[i], "-bo") == 0) + { + options |= OUTPUT_BYTECODE; + bout_file = i + 1 < argc ? argv[++i] : 0; + } + else if (strcmp(argv[i], "-pr") == 0) + options |= PRINT_OUT; + else if (strcmp(argv[i], "-ni") == 0) + options |= NO_INTERPRET; + else if (strcmp(argv[i], "-e") == 0) + { + options |= EMIT_ASM; + out_file = i + 1 < argc ? argv[++i] : 0; + } + else + { + args_strs[argidx++] = argv[i]; + } + } + program_t *prog; + char *string = 0; + struct json_value_s *root = 0; + if (options & READ_BYTECODE) + prog = read_program(stdin); + else + { + string = get_stdin(); + root = json_parse(string, strlen(string)); + struct json_object_s *functions = root->payload; + prog = parse_program(functions); + } + if (options & OUTPUT_BYTECODE) + { + FILE *f = fopen(bout_file ? bout_file : "my-output", "w+"); + output_program(prog, f); + fclose(f); + } + if (!(options & NO_INTERPRET)) + { + value_t args[argidx]; + briltp *tps = get_main_types(prog); + if (!tps) + return 1; + for (size_t i = 0; i < argidx; ++i) + args[i] = parse_argument(args_strs[i], tps[i]); + + interp_main(prog, args, argidx, options & COUNT_INSNS); + } + if (options & PRINT_OUT) + format_program(stdout, prog); + if (options & EMIT_ASM) + { + FILE *f = fopen(out_file ? out_file : "output.s", "w+"); + asm_prog_t p = bytecode_to_abs_asm(prog); + asm_prog_t allocd = triv_allocate(p); + free_asm_prog(p); + emit_insns(f, &allocd); + fclose(f); + } + free(string); + free(root); + free_program(prog); + return 0; +} diff --git a/fastbril/src/parser.c b/fastbril/src/parser.c new file mode 100644 index 000000000..73d95ba43 --- /dev/null +++ b/fastbril/src/parser.c @@ -0,0 +1,669 @@ +#include "parser.h" +#include "libs/hashmap.h" +#include + +#define TEST_OP(s, ret) if(strcmp(s, str) == 0) { return (ret);} +#define MAKE_HASH_MAP hashmap_new(sizeof(struct string_uint16), 0, 0, 0, \ + hashfun, hash_compare, NULL, NULL); + + +/** + * translate strings into our internal opcode number + */ +static inline uint16_t opcode_of_string(const char *str) +{ + TEST_OP( "nop", NOP); + TEST_OP( "const", CONST); + TEST_OP( "add", ADD); + TEST_OP( "mul", MUL); + TEST_OP( "mul", MUL); + TEST_OP( "sub", SUB); + TEST_OP( "div", DIV); + TEST_OP( "eq", EQ); + TEST_OP( "lt", LT); + TEST_OP( "gt", GT); + TEST_OP( "le", LE); + TEST_OP( "ge", GE); + TEST_OP( "not", NOT); + TEST_OP( "and", AND); + TEST_OP( "or", OR); + TEST_OP( "jmp", JMP); + TEST_OP( "br", BR); + TEST_OP( "call", CALL); + TEST_OP( "ret", RET); + TEST_OP( "print", PRINT); + TEST_OP( "phi", PHI); + TEST_OP( "alloc", ALLOC); + TEST_OP( "free", FREE); + TEST_OP( "store", STORE); + TEST_OP( "load", LOAD); + TEST_OP( "ptradd", PTRADD); + TEST_OP( "fadd", FADD); + TEST_OP( "fmul", FMUL); + TEST_OP( "fsub", FSUB); + TEST_OP( "fdiv", FDIV); + TEST_OP( "feq", FEQ); + TEST_OP( "flt", FLT); + TEST_OP( "fle", FLE); + TEST_OP( "fgt", FGT); + TEST_OP( "fge", FGE); + TEST_OP( "id", ID); + return 0xffff; +} + +static inline uint16_t type_of_string(const char *str) +{ + TEST_OP( "int", BRILINT); + TEST_OP( "bool", BRILBOOL); + TEST_OP( "float", BRILFLOAT); + return 0xffff; +} + +/** + * take value, which is in json form, and convert it to a type. + * undefined behaviour if not actually a type + */ +static inline uint16_t type_of_json_value(struct json_value_s *value) +{ + if(value == 0) + return BRILVOID; + uint16_t ptr_depth = 0; + while(value->type == json_type_object) + { + if(ptr_depth >= 1 << 14) + { + fprintf(stderr, "pointers nested too deep\n"); + exit(1); + } + ++ptr_depth; + value = json_value_as_object(value)->start->value; + } + uint16_t base_tp = type_of_string(json_value_as_string(value)->string); + return ptr_depth << 2 | base_tp; +} + +/** + * boilerplate to make the hashmap work properly. No need to understand this. + */ +typedef struct string_uint16 +{ + const char *str; + uint16_t num; +} hashdat; + +uint64_t hashfun(const void *item, uint64_t seed0, uint64_t seed1) { + const struct string_uint16 *val = item; + return hashmap_sip(val->str, strlen(val->str), seed0, seed1); +} + +int hash_compare(const void *a, const void *b, void *udata) { + const struct string_uint16 *ua = a; + const struct string_uint16 *ub = b; + return strcmp(ua->str, ub->str); +} + +/** + * Parse a temp value (variable) from the json tmp. + * tmp_map is the current mapping from names -> numbers, as internally we + * represent temps as numbers. If tmp is already mapped, we will return that, + * but if it isn't we will map it to *num_tmps, and then increment this + * variable to reflect the new number of mapped temps. + */ +static inline uint16_t parse_temp(struct json_value_s *tmp, + struct hashmap *tmp_map, + uint16_t *num_tmps) +{ + const char *nm = json_value_as_string(tmp)->string; + hashdat *precomped = hashmap_get(tmp_map, &(hashdat){.str = nm}); + if (precomped) + { + //printf("found %s -> %d\n", nm, precomped->num); + return precomped->num; + } else + { + if(*num_tmps + 1 == 0xffff) + { + fprintf(stderr, "too many variables!!!\n"); + exit(1); + } + uint16_t tmp = *num_tmps; + *num_tmps = *num_tmps + 1; + hashmap_set(tmp_map, &(hashdat){.str = nm, .num = tmp}); + return tmp; + } +} + + +/** + * take a label in the 1st representation (indexed by order seen) and + * turn it into the final representation (instruction index). + * see below for details. + */ +static inline uint16_t translate_label(struct hashmap *lbl_map, + const char **idx_to_lbl, uint16_t old_lbl) +{ + return ((hashdat*) hashmap_get(lbl_map, &(hashdat) + {.str = idx_to_lbl[old_lbl]}))->num; +} + +/** + * parse the json value lbl as a label. Update appropriate state as necessary. + */ +static inline uint16_t parse_lbls(struct json_value_s *lbl, + struct hashmap *prt_lbl_map, + const char **idx_to_lbl, + uint16_t *num_lbls) +{ + const char *nm = json_value_as_string(lbl)->string; + hashdat *precomped = hashmap_get(prt_lbl_map, &(hashdat){.str = nm}); + if (precomped) + { + return precomped->num; + } else + { + uint16_t new_lbl = (*num_lbls)++; + hashmap_set(prt_lbl_map, &(hashdat){.str = nm, .num = new_lbl}); + idx_to_lbl[new_lbl] = nm; + return new_lbl; + } +} + + +/** + * Parse a single instruction out of json. + * put this parsed instruction into the array insns[dest], resizing as necessary + * insn_length is the length of the array insns. + * next_labelled is whether the next instruction is tagged by a label + * (needed for ssa) + * lbl_map is a map from label names to their indices in insns + * tmp_map is a map from temp names to their number representations + * prt_lbl_map is a map from label names to numbers which represent the order + * in which labels were encountered. We use this since when we encounter a jump + * to a label, we can't know the actual representation of this label until + * later, so we need to put in a filler value to be replaced later + * idx_to_lbl is the inverse of prt_lbl_map + * num_lbls, num_tmps are what they sound like + * tmp_types is a mapping from the internal rep of tmps to their types + */ +size_t parse_instruction(struct json_object_s *json, + instruction_t **insns, + size_t dest, + size_t *insn_length, + uint16_t *next_labelled, + struct hashmap *lbl_map, + struct hashmap *tmp_map, + struct hashmap *prt_lbl_map, + struct hashmap *fun_name_to_idx, + const char **idx_to_lbl, + uint16_t *num_lbls, + uint16_t *num_tmps, + uint16_t *tmp_types + ) +/* when see label: string -> instruction index */ +/* when see jmp lbl: prt_lbl_map[lbl] || {prt_lbl_map[lbl] = *num_lbls;*/ +/* idx_to_lbl[*num_lbls] = lbl;*/ +/* ++*num_lbls;}*/ +/* when see jmp: (string, ordered index) */ +{ + + struct json_object_element_s *field = json->start; + uint16_t tagged_opcode = 0xffff; + uint16_t opcode = 0; + uint16_t insn_dest = 0xffff; + bool is_label = false; + uint16_t *args = 0; + + size_t numargs = 0; + size_t num_cur_lbls = 0; + uint16_t *lbls = 0; + uint16_t type = 0xffff; + const char *value = 0; + const char *fun_nm = 0; + + if(dest + 1 == 0xffff) + { + fprintf(stderr, "too many instructions. please modularize your code\n"); + exit(1); + } + while(field) + { + if(strcmp(field->name->string, "op") == 0) + { + opcode = opcode_of_string(json_value_as_string(field->value)->string); + tagged_opcode = opcode | (*next_labelled ? 0x8000 : 0); + } else if (strcmp(field->name->string, "label") == 0) + { + is_label = true; + const char *nm = json_value_as_string(field->value)->string; + hashmap_set(lbl_map, &(hashdat){.str = nm, .num = dest}); + } else if (strcmp(field->name->string, "dest") == 0) + { + insn_dest = parse_temp(field->value, tmp_map, num_tmps); + } else if (strcmp(field->name->string, "args") == 0) + { + struct json_array_s *arr = field->value->payload; + numargs = arr->length; + args = malloc(sizeof(uint16_t) * numargs); + struct json_array_element_s *elem = arr->start; + for(int i = 0; i < numargs; ++i) + { + args[i] = parse_temp(elem->value, tmp_map, num_tmps); + elem = elem->next; + } + } else if (strcmp(field->name->string, "labels") == 0) + { + struct json_array_s *arr = field->value->payload; + num_cur_lbls = arr->length; + lbls = malloc(sizeof(uint16_t) * num_cur_lbls); + struct json_array_element_s *elem = arr->start; + for(int i = 0; i < num_cur_lbls; ++i) + { + lbls[i] = parse_lbls(elem->value, prt_lbl_map, idx_to_lbl, num_lbls); + elem = elem->next; + } + } else if (strcmp(field->name->string, "type") == 0) + { + type = type_of_json_value(field->value); + } else if (strcmp(field->name->string, "value") == 0) + { + switch(field->value->type) + { + case json_type_true: + value = "1"; + break; + case json_type_false: + value = "0"; + break; + case json_type_number: + value = json_value_as_number(field->value)->number; + break; + default: + fprintf(stderr, "bad constant. exiting\n"); + exit(1); + } + } else if (strcmp(field->name->string, "funcs") == 0) + { + fun_nm = json_value_as_string + (json_value_as_array(field->value)->start->value)->string; + } + field = field->next; + } + /* add this variable to the type map (maybe) */ + if(type != 0xffff) + { + tmp_types[insn_dest] = type; + } + size_t extra_words_needed = 0; + + + /* calculate multi-word instructions*/ + if(opcode == PHI) + { + extra_words_needed = (numargs + 1)/2; + } else if (opcode == PRINT) + { + extra_words_needed = numargs / 2; + } else if (opcode == CALL) + { + extra_words_needed = ((numargs + 3) / 4); + } else if (opcode == CONST && (type == BRILINT || type == BRILBOOL)) + { + int64_t val = strtoll(value, 0, 0); + if ((int64_t) ((int32_t) val) != val) + goto set_long_const; + } else if (opcode == CONST && type == BRILFLOAT) + { + set_long_const: + opcode = LCONST; + tagged_opcode = (tagged_opcode & 0x8000 ? opcode | 0x8000 : opcode); + extra_words_needed = 1; + } + /* realloc when we run out of space */ + if(dest + extra_words_needed >= *insn_length) + { + (*insn_length) *= 2; + *insns = realloc(*insns, *insn_length * sizeof(instruction_t)); + } + + /* do actual emission */ + switch (opcode) + { + case 0: break; + case PHI: + { + (*insns)[dest].phi_inst = (phi_inst_t) + { + .opcode_lbled = tagged_opcode, + .dest = insn_dest, + .num_choices = numargs + }; + for(size_t phi_ext_idx = 0; phi_ext_idx < numargs; phi_ext_idx += 2) + { + phi_extension_t ext; + ext.lbl1 = lbls[phi_ext_idx]; + ext.val1 = args[phi_ext_idx]; + if(phi_ext_idx + 1 < numargs) + { + ext.lbl2 = lbls[phi_ext_idx + 1]; + ext.val2 = args[phi_ext_idx + 1]; + } + (*insns)[dest + (phi_ext_idx/2) + 1].phi_ext = ext; + } + } break; + case PRINT: + { + (*insns)[dest].print_insn = (print_instr_t) + { + .opcode_lbled = tagged_opcode, + .num_prints = numargs, + .arg1 = args[0] + }; + for(size_t xtra_arg = 1; xtra_arg < numargs; xtra_arg += 2) + { + print_args_t pa; + pa.arg1 = args[xtra_arg]; + if(xtra_arg + 1 < numargs) + pa.arg2 = args[xtra_arg + 1]; + (*insns)[dest + (xtra_arg - 1)/2 + 1].print_args = pa; + } + } break; + case CALL: + { + (*insns)[dest].call_inst = (call_inst_t) + { + .opcode_lbled = tagged_opcode, + .dest = insn_dest, + .num_args = numargs, + .target = ((hashdat*) hashmap_get(fun_name_to_idx, + &(hashdat){.str = fun_nm}))->num, + }; + for(size_t arg = 0; arg < numargs; arg += 4) + { + call_args_t ca; + ca.args[0] = args[arg]; + if(arg + 1 < numargs) + ca.args[1] = args[arg + 1]; + if(arg + 2 < numargs) + ca.args[2] = args[arg + 2]; + if(arg + 3 < numargs) + ca.args[3] = args[arg + 3]; + (*insns)[dest + arg/4 + 1].call_args = ca; + } + } break; + case LCONST: + { + (*insns)[dest].long_const_insn = (long_const_instr_t) + {.opcode_lbled = tagged_opcode, + .dest = insn_dest, + .type = type}; + if(type == BRILFLOAT) + (*insns)[dest + 1].const_ext.float_val = strtod(value, 0); + else + (*insns)[dest + 1].const_ext.int_val = strtoll(value, 0, 0); + } break; + case CONST: + { + (*insns)[dest].const_insn = (const_instr_t) + { + .opcode_lbled = tagged_opcode, + .dest = insn_dest, + .value = (int32_t) strtol(value, 0, 0) + }; + } break; + case BR: + { + (*insns)[dest].br_inst = (br_inst_t) + { + .opcode_lbled = tagged_opcode, + .test = args[0], + .ltrue = lbls[0], + .lfalse = lbls[1] + }; + } break; + case JMP: + { + (*insns)[dest].norm_insn = (norm_instruction_t) + { + .opcode_lbled = tagged_opcode, + .dest = lbls[0], + .arg1 = 0, + .arg2 = 0 + }; + } break; + case ID: + { + (*insns)[dest].norm_insn = (norm_instruction_t) + { + .opcode_lbled = tagged_opcode, + .dest = insn_dest, + .arg1 = args[0], + .arg2 = type + }; + } break; + default: + { + (*insns)[dest].norm_insn = (norm_instruction_t) + { + .opcode_lbled = tagged_opcode, + .dest = insn_dest, + .arg1 = numargs > 0 ? args[0] : 0xffff, + .arg2 = numargs > 1 ? args[1] : 0xffff + }; + } + } + *next_labelled = is_label ? 1 : 0; + /* tidy up*/ + if(args) + free(args); + if(lbls) + free(lbls); + if(is_label) + return dest; + return dest + 1 + extra_words_needed; +} + +/** + * parse the instructions of json for a single function. + */ +instruction_t *parse_instructions(struct json_array_s* json, + struct hashmap *fun_name_to_idx, + struct hashmap *tmp_map, + uint16_t *num_temps, + uint16_t *tmp_types, + size_t *num_instrs) +{ + size_t insn_len = 32; + instruction_t *insns = malloc(sizeof(instruction_t) * insn_len); + struct hashmap *lbl_map = MAKE_HASH_MAP; + struct hashmap *prt_lbl_map = MAKE_HASH_MAP; + struct json_array_element_s *tmp = json->start; + size_t dest = 0; + uint16_t next_labelled = 0; + uint16_t num_lbls = 0; + const char **idx_to_lbl = malloc(sizeof(char*) * json->length); + for(size_t i = 0; i < json->length; ++i) + { + dest = parse_instruction(json_value_as_object(tmp->value), &insns, + dest, &insn_len, &next_labelled, + lbl_map, tmp_map, prt_lbl_map, fun_name_to_idx, + idx_to_lbl, &num_lbls, num_temps, tmp_types); + tmp = tmp->next; + } + *num_instrs = dest; + /* clean up filler values for labels, and fill in types for print*/ + for(size_t i = 0; i < dest; ++i) + { + instruction_t *insn = insns + i; + switch(get_opcode(*insn)) + { + case JMP: + insn->norm_insn.dest = translate_label(lbl_map, + idx_to_lbl, + insn->norm_insn.dest); + break; + case BR: + insn->br_inst.ltrue = translate_label(lbl_map, + idx_to_lbl, + insn->br_inst.ltrue); + insn->br_inst.lfalse = translate_label(lbl_map, + idx_to_lbl, + insn->br_inst.lfalse); + break; + case PHI: + { + uint16_t num_lbls = insn->phi_inst.num_choices; + for(uint16_t j = 0; j < num_lbls; j += 2) + { + ++i; + instruction_t *phi_ext = insns + i; + phi_ext->phi_ext.lbl1 = translate_label(lbl_map, + idx_to_lbl, + phi_ext->phi_ext.lbl1); + if(j + 1 < num_lbls) + phi_ext->phi_ext.lbl2 = translate_label(lbl_map, + idx_to_lbl, + phi_ext->phi_ext.lbl2); + } + } break; + case PRINT: + { + uint16_t num_args = insn->print_insn.num_prints; + insn->print_insn.type1 = tmp_types[insn->print_insn.arg1]; + for(uint16_t j = 0; j < num_args - 1; j += 2) + { + ++i; + instruction_t *args = insns + i; + args->print_args.type1 = tmp_types[args->print_args.arg1]; + if(j + 1 < num_args) + args->print_args.type2 = tmp_types[args->print_args.arg2]; + } + } break; + case CALL: + i += (insn->call_inst.num_args + 3) / 4; + break; + case LCONST: + ++i; + break; + } + } + hashmap_free(lbl_map); + hashmap_free(tmp_map); + hashmap_free(prt_lbl_map); + free(idx_to_lbl); + free(tmp_types); + return insns; +} + +/** + * parse a function from json. + * fun_name_to_idx is a map from function names to their indices in the program. + */ +function_t parse_function(struct json_object_s *json, struct hashmap *fun_name_to_idx) +{ + struct hashmap *tmp_map = MAKE_HASH_MAP; + uint16_t num_temps = 0; + uint16_t num_args = 0; + uint16_t num_instrs = 0; + uint16_t *tmp_types = 0; + struct json_object_element_s *field = json->start; + function_t fun; + struct json_array_s *instrs_json; + struct json_array_s *args_json = 0; + struct json_value_s *ret_tp = 0; + while(field) + { + if(strcmp(field->name->string, "name") == 0) + { + const char *str = json_value_as_string(field->value)->string; + /* printf("parsing function %s\n", str); */ + char *fun_nm = malloc(sizeof(char) * (1 + strlen(str))); + fun.name = strcpy(fun_nm, str); + } else if(strcmp(field->name->string, "instrs") == 0) + { + num_instrs = json_value_as_array(field->value)->length; + instrs_json = json_value_as_array(field->value); + } else if(strcmp(field->name->string, "args") == 0) + { + args_json = json_value_as_array(field->value); + num_args = args_json->length; + } else if(strcmp(field->name->string, "type") == 0) + { + ret_tp = field->value; + } + field = field->next; + } + tmp_types = malloc(sizeof(uint16_t) * (num_args + num_instrs)); + fun.ret_tp = type_of_json_value(ret_tp); + fun.arg_types = malloc(sizeof(uint16_t) * num_args); + fun.num_args = 0; + if(args_json) + { + struct json_array_element_s *arg = args_json->start; + size_t argidx = 0; + while(arg) + { + struct json_object_element_s *a = json_value_as_object(arg->value)->start; + int16_t alias = num_temps; + while(a) + { + const char *str = a->name->string; + if(strcmp(str, "name") == 0) + { + hashmap_set(tmp_map, &(hashdat) + {.str = json_value_as_string(a->value)->string, + .num = num_temps++}); + } else if(strcmp(str, "type") == 0) + { + uint16_t tp = type_of_json_value(a->value); + tmp_types[alias] = tp; + fun.arg_types[argidx] = tp; + } + a = a->next; + } + arg = arg->next; + ++argidx; + } + fun.num_args = argidx; + } + size_t num_words; + fun.insns = parse_instructions(instrs_json, fun_name_to_idx, + tmp_map, &num_temps, tmp_types, &num_words); + fun.num_insns = num_words; + fun.num_tmps = num_temps; + return fun; +} + + +program_t *parse_program(struct json_object_s *json) +{ + struct json_array_s *json_funcs = json->start->value->payload; + size_t num_funcs = json_funcs->length; + program_t *prog = malloc(sizeof(program_t) + sizeof(function_t) * num_funcs); + struct json_array_element_s *json_fun = json_funcs->start; + struct hashmap *fun_name_to_idx = MAKE_HASH_MAP; + for(uint16_t i = 0; i < num_funcs; ++i) + { + struct json_object_element_s *field = + json_value_as_object(json_fun->value)->start; + while(field) + { + if(strcmp(field->name->string, "name") == 0) + { + const char *nm = json_value_as_string(field->value)->string; + hashmap_set(fun_name_to_idx, &(hashdat){.str = nm, .num = i}); + } + field = field->next; + } + json_fun = json_fun->next; + } + json_fun = json_funcs->start; + for(size_t i = 0; i < num_funcs; ++i) + { + prog->funcs[i] = parse_function(json_value_as_object(json_fun->value), + fun_name_to_idx); + json_fun = json_fun->next; + } + prog->num_funcs = num_funcs; + hashmap_free(fun_name_to_idx); + return prog; +} diff --git a/fastbril/src/parser.h b/fastbril/src/parser.h new file mode 100644 index 000000000..e63dabd3d --- /dev/null +++ b/fastbril/src/parser.h @@ -0,0 +1,11 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "libs/json.h" +#include "bril-insns/instrs.h" + +/** + * parses a program... + */ +program_t *parse_program(struct json_object_s *json); +#endif diff --git a/fastbril/src/pretty-printer.c b/fastbril/src/pretty-printer.c new file mode 100644 index 000000000..077e735a2 --- /dev/null +++ b/fastbril/src/pretty-printer.c @@ -0,0 +1,202 @@ +#include "pretty-printer.h" +#include +#include + +#define TEST_OP(s, o) if(o == op) {return s;} + +/** + * pretty prints type to stream + */ +void format_type(FILE *stream, uint16_t type) +{ + uint16_t depth = ptr_depth(type); + uint16_t base_tp = base_type(type); + for(size_t i = 0; i < depth; ++i) + fprintf(stream, "ptr<"); + switch(base_tp) + { + case BRILINT: + fprintf(stream, "int"); + break; + case BRILBOOL: + fprintf(stream, "bool"); + break; + case BRILFLOAT: + fprintf(stream, "float"); + break; + case BRILVOID: + break; + } + for(size_t i = 0; i < depth; ++i) + putc('>', stream); +} + +/** + * formats a function name to stream + */ +void format_fun_name(FILE *stream, const char *fun_name) +{ + putc('@', stream); + char *num = strrchr(fun_name, '_'); + if(num) + { + for(const char *c = fun_name; c != num; ++c) + putc(*c, stream); + } + else + fprintf(stream, "%s", fun_name); +} + + +size_t format_insn(FILE *stream, program_t *prog, instruction_t *insns, size_t idx) +{ + if(is_labelled(insns[idx])) + fprintf(stream, ".L%ld:\n", idx); + switch(get_opcode(insns[idx])) + { + case CONST: + fprintf(stream, " t%d = const %d;\n", insns[idx].const_insn.dest, + insns[idx].const_insn.value); + break; + case ADD: + case MUL: + case SUB: + case DIV: + case EQ: + case LT: + case GT: + case LE: + case GE: + case AND: + case OR: + case PTRADD: + case FADD: + case FMUL: + case FSUB: + case FDIV: + case FEQ: + case FLT: + case FLE: + case FGT: + case FGE: + fprintf(stream, " t%d = %s t%d t%d;\n", insns[idx].norm_insn.dest, + opcode_to_string(get_opcode(insns[idx])), + insns[idx].norm_insn.arg1, insns[idx].norm_insn.arg2); + break; + case NOT: + case ID: + case ALLOC: + case LOAD: + fprintf(stream, " t%d = %s t%d;\n", insns[idx].norm_insn.dest, + opcode_to_string(get_opcode(insns[idx])), + insns[idx].norm_insn.arg1); + break; + case JMP: + fprintf(stream, " jmp .L%d;\n", insns[idx].norm_insn.dest); + break; + case BR: + fprintf(stream, " br t%d .L%d .L%d;\n", insns[idx].br_inst.test, + insns[idx].br_inst.ltrue, insns[idx].br_inst.lfalse); + break; + case CALL: + { + const function_t *target = &prog->funcs[insns[idx].call_inst.target]; + if(target->ret_tp != BRILVOID) + { + fprintf(stream, " t%d :", insns[idx].call_inst.dest); + format_type(stream, target->ret_tp); + fprintf(stream, " = call "); + format_fun_name(stream, target->name); + } + else + { + fprintf(stream, " call "); + format_fun_name(stream, target->name); + } + uint16_t *args = (uint16_t*) (insns + idx + 1); + for(size_t i = 0; i < insns[idx].call_inst.num_args; ++i) + fprintf(stream, " t%d", args[i]); + fprintf(stream, ";\n"); + return idx + 1 + (insns[idx].call_inst.num_args + 3) / 4; + } + case RET: + if(insns[idx].norm_insn.arg1 == 0xffff) + fprintf(stream, " ret;\n"); + else + fprintf(stream, " ret t%d;\n", insns[idx].norm_insn.arg1); + break; + case PRINT: + fprintf(stream, " print"); + uint16_t *args = (uint16_t*) &insns[idx].print_insn.arg1; + for(size_t i = 0; i < insns[idx].print_insn.num_prints; ++i) + fprintf(stream, " t%d", args[2 * i]); + fprintf(stream, ";\n"); + return idx + 1 + insns[idx].print_insn.num_prints / 2; + case LCONST: + fprintf(stream, " t%d = const ", insns[idx].long_const_insn.dest); + switch(insns[idx].long_const_insn.type) + { + case BRILINT: + fprintf(stream, "%ld;\n", insns[idx + 1].const_ext.int_val); + break; + case BRILFLOAT: + fprintf(stream, "%f;\n", insns[idx + 1].const_ext.float_val); + break; + case BRILBOOL: + fprintf(stream, "%s;\n", + insns[idx + 1].const_ext.int_val ? "true" : "false"); + } + return idx + 2; + case PHI: + fprintf(stream, " t%d = phi", insns[idx].phi_inst.dest); + uint16_t *phi_ext = (uint16_t*) (insns + idx + 1); + for(size_t i = 0; i < insns[idx].phi_inst.num_choices; ++i) + fprintf(stream, " t%d .L%d", phi_ext[2 * i + 1], phi_ext[2 * i]); + fprintf(stream, ";\n"); + return idx + 1 + (insns[idx].phi_inst.num_choices + 1) / 2; + case STORE: + fprintf(stream, " store t%d t%d;\n", insns[idx].norm_insn.arg1, + insns[idx].norm_insn.arg2); + break; + case FREE: + fprintf(stream, " free t%d;\n", insns[idx].norm_insn.arg1); + break; + } + return idx + 1; +} + +/** + * formats the header of fun to stream + */ +void format_fun_header(FILE *stream, const function_t *fun) +{ + fprintf(stream, "@%s(", fun->name); + for(size_t a = 0; a < fun->num_args; ++a) + { + if(a != 0) + fprintf(stream, ", "); + fprintf(stream, "t%ld :", a); + format_type(stream, fun->arg_types[a]); + } + putc(')', stream); + if(fun->ret_tp != BRILVOID) + { + fprintf(stream, " :"); + format_type(stream, fun->ret_tp); + } + putc('\n', stream); +} + + +void format_program(FILE *stream, program_t *prog) +{ + for(size_t f = 0; f < prog->num_funcs; ++f) + { + format_fun_header(stream, prog->funcs + f); + fprintf(stream, " {\n"); + size_t idx = 0; + while(idx < prog->funcs[f].num_insns) + idx = format_insn(stream, prog, prog->funcs[f].insns, idx); + fprintf(stream, " }\n\n"); + } +} diff --git a/fastbril/src/pretty-printer.h b/fastbril/src/pretty-printer.h new file mode 100644 index 000000000..a82376fa6 --- /dev/null +++ b/fastbril/src/pretty-printer.h @@ -0,0 +1,17 @@ +#ifndef PRETTY_PRINTER_H +#define PRETTY_PRINTER_H +#include "bril-insns/instrs.h" +#include + +/** + * formats the instruction insns[idx] to stream. + * needs the program struct to resolve some naming stucc + */ +size_t format_insn(FILE *stream, program_t *prog, instruction_t *insns, size_t idx); + +/** + * pretty prints a program + */ +void format_program(FILE *stream, program_t *prog); + +#endif diff --git a/fastbril/srcgen.awk b/fastbril/srcgen.awk new file mode 100755 index 000000000..981a4b063 --- /dev/null +++ b/fastbril/srcgen.awk @@ -0,0 +1,6 @@ +#!/usr/bin/env -S awk -f + +{ + if($0 != "") + print "#define " $1 " " $2; +}