From aa8ed388d58b8dfc0bc1fb104a5deaefa0a1f221 Mon Sep 17 00:00:00 2001 From: Samuel Huang Date: Thu, 7 Dec 2023 23:44:05 -0500 Subject: [PATCH] Code Generator * Modify typechecker test cases to cover return type error * Improve a boolean expression * Add support for function rtype and prototype checking * Condense Makefile * Create scratch.c and scratch.h * Create label.c and label.h * Create codegen.c and codegen.h * Protect type_print() from null pointer * Implement decl_codegen() * Implement symbol_codegen() * Implement stmt_codegen() * Implement expr_codegen() * Format library.c * Add --codegen option in bminor.c * Add codegen_test * Create codegen test cases --- Makefile | 20 +-- bminor.c | 3 + codegen.c | 18 +++ codegen.h | 9 ++ decl.c | 168 ++++++++++++++++++++- decl.h | 1 + encoder.c | 8 +- expr.c | 267 +++++++++++++++++++++++++++++++++- expr.h | 6 + label.c | 55 +++++++ label.h | 17 +++ library.c | 25 ++-- runtest.sh | 16 ++ scratch.c | 54 +++++++ scratch.h | 16 ++ stmt.c | 139 +++++++++++++++++- stmt.h | 1 + symbol.c | 26 ++++ symbol.h | 5 +- test/codegen/good00.bminor | 3 + test/codegen/good01.bminor | 3 + test/codegen/good02.bminor | 3 + test/codegen/good03.bminor | 4 + test/codegen/good04.bminor | 5 + test/codegen/good05.bminor | 5 + test/codegen/good06.bminor | 6 + test/codegen/good07.bminor | 3 + test/codegen/good08.bminor | 5 + test/codegen/good09.bminor | 5 + test/codegen/good10.bminor | 6 + test/codegen/good11.bminor | 8 + test/codegen/good12.bminor | 8 + test/codegen/good13.bminor | 8 + test/codegen/good14.bminor | 8 + test/codegen/good15.bminor | 3 + test/codegen/good16.bminor | 3 + test/codegen/good17.bminor | 3 + test/codegen/good18.bminor | 4 + test/codegen/good19.bminor | 5 + test/typechecker/bad1.bminor | 2 +- test/typechecker/bad3.bminor | 1 + test/typechecker/good0.bminor | 8 +- type.c | 2 +- 43 files changed, 917 insertions(+), 48 deletions(-) create mode 100644 codegen.c create mode 100644 codegen.h create mode 100644 label.c create mode 100644 label.h create mode 100644 scratch.c create mode 100644 scratch.h create mode 100644 test/codegen/good00.bminor create mode 100644 test/codegen/good01.bminor create mode 100644 test/codegen/good02.bminor create mode 100644 test/codegen/good03.bminor create mode 100644 test/codegen/good04.bminor create mode 100644 test/codegen/good05.bminor create mode 100644 test/codegen/good06.bminor create mode 100644 test/codegen/good07.bminor create mode 100644 test/codegen/good08.bminor create mode 100644 test/codegen/good09.bminor create mode 100644 test/codegen/good10.bminor create mode 100644 test/codegen/good11.bminor create mode 100644 test/codegen/good12.bminor create mode 100644 test/codegen/good13.bminor create mode 100644 test/codegen/good14.bminor create mode 100644 test/codegen/good15.bminor create mode 100644 test/codegen/good16.bminor create mode 100644 test/codegen/good17.bminor create mode 100644 test/codegen/good18.bminor create mode 100644 test/codegen/good19.bminor diff --git a/Makefile b/Makefile index 66de01a..0014067 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,13 @@ -CFLAGS = -MODS = encoder scanner parser printer resolver typechecker -SRCS = bminor.c $(addsuffix .c, $(MODS)) \ - decl.c stmt.c expr.c type.c param_list.c \ - symbol.c scope.c hash_table.c \ - lex.yy.c grammar.tab.c +CFLAGS = -Wall -Wextra -g +MODS = encoder scanner parser printer resolver typechecker codegen +SRCS = $(shell find . -maxdepth 1 -type f -name "*.c") OBJS = $(SRCS:.c=.o) -bminor: $(OBJS) +bminor: grammar.tab.o lex.yy.o $(OBJS) gcc $(CFLAGS) $^ -o $@ -scanner.o: scanner.c token.h - gcc $(CFLAGS) -c $< -o $@ +lex.yy.o: lex.yy.c + gcc -c $< -o $@ # suppress warnings %.o: %.c gcc $(CFLAGS) -c $< -o $@ @@ -18,7 +15,7 @@ scanner.o: scanner.c token.h lex.yy.c: lex.yy.l flex $< -grammar.tab.c token.h: grammar.y lex.yy.c +grammar.tab.c token.h: grammar.y bison --defines=token.h $< # Tests @@ -30,8 +27,7 @@ test-%: bminor clean-test: rm -f ./test/*/*.bminor.out -clean: - rm -f ./test/*/*.bminor.out +clean: clean-test rm -f lex.yy.c rm -f token.h grammar.tab.c grammar.output rm -f *.o diff --git a/bminor.c b/bminor.c index bdf0288..6e72a6a 100644 --- a/bminor.c +++ b/bminor.c @@ -8,6 +8,7 @@ #include "printer.h" #include "resolver.h" #include "typechecker.h" +#include "codegen.h" void usage(int exit_code) { @@ -69,6 +70,8 @@ int main(int argc, char* argv[]) return resolve(d); else if (strcmp(option, "--typecheck") == 0) return typecheck(d); + else if (strcmp(option, "--codegen") == 0) + return codegen(d); else { fprintf(stderr, "Unknown option '%s'\n", option); diff --git a/codegen.c b/codegen.c new file mode 100644 index 0000000..ecbb807 --- /dev/null +++ b/codegen.c @@ -0,0 +1,18 @@ +#include +#include "typechecker.h" +#include "codegen.h" + +int codegen_errors = 0; + +int codegen(struct decl* d) +{ + int type_errors = typecheck(d); // name resolution and type checking + if (type_errors) + { + fprintf(stderr, "CodeGen Error | Type errors occurred, aborting code generation\n"); + return type_errors; + } + + decl_codegen(d); + return codegen_errors; +} \ No newline at end of file diff --git a/codegen.h b/codegen.h new file mode 100644 index 0000000..257b6a2 --- /dev/null +++ b/codegen.h @@ -0,0 +1,9 @@ +#ifndef CODEGEN_H +#define CODEGEN_H + +#include "decl.h" + +/** @return number of errors */ +int codegen(struct decl* d); + +#endif //CODEGEN_H diff --git a/decl.c b/decl.c index 82c1257..57f9823 100644 --- a/decl.c +++ b/decl.c @@ -1,6 +1,7 @@ #include #include +#include "encoder.h" #include "stmt.h" #include "expr.h" #include "decl.h" @@ -8,6 +9,7 @@ #include "param_list.h" #include "symbol.h" #include "scope.h" +#include "scratch.h" struct decl* decl_create(char* name, struct type* type, struct expr* value, struct stmt* code, struct decl* next) { @@ -98,12 +100,16 @@ void decl_resolve(struct decl* d) expr_resolve(d->value); if (d->code) { - d->symbol->prototype = 1; + d->symbol->prototype = 0; scope_enter(); param_list_resolve(d->type->params); stmt_resolve(d->code); scope_exit(); } + else + d->symbol->prototype = 1; + + d->symbol->locals = cur_local; decl_resolve(d->next); } @@ -159,10 +165,29 @@ void type_typecheck(const struct type* t, const char* name) } } +struct type* rtype = NULL; + void decl_typecheck(struct decl* d) { if (!d) return; + // Global declarations must be constant + if (d->symbol->kind == SYMBOL_GLOBAL && !expr_is_constant(d->value)) + { + printf("Type Error | global variable ('%s') cannot be initialized " + "with non-constant expression ", d->name); + if (d->value && d->value->kind == EXPR_LIST) + { + printf("{"); + expr_print(d->value); + printf("}"); + } + else + expr_print(d->value); + printf("\n"); + type_errors++; + } + type_typecheck(d->type, d->name); struct type* val_type = expr_typecheck(d->value); if (d->value) @@ -180,10 +205,8 @@ void decl_typecheck(struct decl* d) // Check initializer type struct expr* e = d->value; - int len = 0; while (e) { - len++; struct type* t = expr_typecheck(e->left); if (!type_equals(d->type->subtype, t)) { @@ -212,12 +235,145 @@ void decl_typecheck(struct decl* d) } } - if (d->type->kind == TYPE_FUNCTION && d->symbol->kind == SYMBOL_LOCAL) + if (d->type->kind == TYPE_FUNCTION) { - printf("Type Error | cannot declare function ('%s') inside function\n", d->name); - type_errors++; + rtype = d->type->subtype; + if (d->symbol->kind == SYMBOL_LOCAL) + { + printf("Type Error | cannot declare function ('%s') inside function\n", d->name); + type_errors++; + } } stmt_typecheck(d->code); decl_typecheck(d->next); } + +/** Pass current function name to stmt_codegen */ +const char* cur_func; + +void decl_codegen_func(struct decl* d) +{ + if (d->symbol->prototype) + { + fprintf(stderr, "is a prototype"); + return; + } + + // Prologue + printf(".text\n"); + printf(".global %s\n", d->name); + printf("%s:\n", d->name); + + // Save old base ptr and set new base ptr + printf("pushq %%rbp\n"); + printf("movq %%rsp, %%rbp\n"); + + // Allocate space for local variables, all word size + printf("subq $%d, %%rsp\n", d->symbol->locals * 8); + + // Push callee-saved registers + printf("pushq %%rbx\n"); + printf("pushq %%r12\n"); + printf("pushq %%r13\n"); + printf("pushq %%r14\n"); + printf("pushq %%r15\n"); + + cur_func = d->name; + stmt_codegen(d->code); + + // Epilogue + printf(".%s_epilogue:\n", d->name); + + // Restore callee-saved registers + printf("popq %%r15\n"); + printf("popq %%r14\n"); + printf("popq %%r13\n"); + printf("popq %%r12\n"); + printf("popq %%rbx\n"); + + // Restore old base ptr + printf("movq %%rbp, %%rsp\n"); + printf("popq %%rbp\n"); + printf("ret\n"); + + printf("\n"); +} + +/** Generate code for global initializer */ +void decl_codegen_val(const struct expr* v) +{ + if (!v) + { + printf(" .quad 0\n"); + return; + } + printf(" "); + char es[MAX_STRING_LEN * 5 + 2]; + + switch (v->kind) + { + case EXPR_NEG: + // FIXME: handle global negative value + printf(".quad -"); + expr_print(v->right); + printf("\n"); + break; + case EXPR_INTEGER_LITERAL: + case EXPR_BOOLEAN_LITERAL: + printf(".quad %d\n", v->integer_literal); + break; + case EXPR_FLOAT_LITERAL: + printf(".float %f\n", v->float_literal); + break; + case EXPR_CHAR_LITERAL: + printf(".quad %d\n", v->char_literal); + case EXPR_STRING_LITERAL: + string_encode(v->string_literal, es); + printf(".string %s\n", es); + break; + case EXPR_LIST: + decl_codegen_val(v->left); + if (v->right) + decl_codegen_val(v->right); + break; + default: + // Invalid initializer + break; + } +} + +/** Define global variables */ +void decl_codegen_var(struct decl* d) +{ + printf(".data\n"); + printf(".global %s\n", d->name); + printf("%s:\n", d->name); + + decl_codegen_val(d->value); + printf("\n"); +} + +void decl_codegen(struct decl* d) +{ + if (!d) return; + switch (d->symbol->kind) + { + case SYMBOL_GLOBAL: + d->type->kind == TYPE_FUNCTION ? decl_codegen_func(d) : decl_codegen_var(d); + break; + case SYMBOL_LOCAL: + if (d->value) + { + expr_codegen(d->value); + printf("movq %s, -%d(%%rbp)\n", scratch_name(d->value->reg), (d->symbol->which + 1) * 8); + scratch_free(d->value->reg); + } + // Otherwise do nothing + break; + case SYMBOL_PARAM: + // Do nothing + break; + } + decl_codegen(d->next); +} diff --git a/decl.h b/decl.h index 85becd2..c9b4359 100755 --- a/decl.h +++ b/decl.h @@ -15,5 +15,6 @@ struct decl* decl_create(char* name, struct type* type, struct expr* value, stru void decl_print(struct decl* d, int indent); void decl_resolve(struct decl* d); void decl_typecheck(struct decl* d); +void decl_codegen(struct decl* d); #endif diff --git a/encoder.c b/encoder.c index 12d8df3..f27d405 100644 --- a/encoder.c +++ b/encoder.c @@ -5,9 +5,9 @@ int is_hex(char c) { - return c >= '0' && c <= '9' || - c >= 'A' && c <= 'F' || - c >= 'a' && c <= 'f'; + return (c >= '0' && c <= '9') || + (c >= 'A' && c <= 'F') || + (c >= 'a' && c <= 'f'); } int string_decode(const char* es, char* s) @@ -202,7 +202,7 @@ int decode(FILE* fp) { // Find the size of the file fseek(fp, 0, SEEK_END); - long file_size = ftell(fp); + unsigned long file_size = ftell(fp); if (file_size > (MAX_STRING_LEN * 5 + 2) * sizeof(char)) { fprintf(stderr, "Invalid string: too long\n"); diff --git a/expr.c b/expr.c index c9288fe..f9bc6ac 100644 --- a/expr.c +++ b/expr.c @@ -8,6 +8,8 @@ #include "type.h" #include "scope.h" #include "symbol.h" +#include "scratch.h" +#include "label.h" /* Creating binary nodes by default */ @@ -253,6 +255,7 @@ void expr_print_leaf(const struct expr* e) void expr_print_list(const struct expr* e) { + if(!e) return; expr_print(e->left); if (e->right != NULL) { @@ -263,6 +266,7 @@ void expr_print_list(const struct expr* e) void expr_print(const struct expr* e) { + if (!e) return; switch (e->kind) { /* Leaf nodes */ @@ -397,8 +401,8 @@ struct type* expr_typecheck(const struct expr* e) case EXPR_MOD: case EXPR_DIV: case EXPR_EXP: - if (lt->kind != rt->kind || - lt->kind != TYPE_INTEGER && lt->kind != TYPE_FLOAT) + if ((lt->kind != rt->kind) || + (lt->kind != TYPE_INTEGER && lt->kind != TYPE_FLOAT)) type_error_msg(e, lt, rt); return lt; @@ -411,8 +415,8 @@ struct type* expr_typecheck(const struct expr* e) case EXPR_EQ: case EXPR_NEQ: if (lt->kind != rt->kind || - lt->kind != TYPE_INTEGER && lt->kind != TYPE_FLOAT && - lt->kind != TYPE_CHAR && lt->kind != TYPE_BOOLEAN) + (lt->kind != TYPE_INTEGER && lt->kind != TYPE_FLOAT && + lt->kind != TYPE_CHAR && lt->kind != TYPE_BOOLEAN)) type_error_msg(e, lt, rt); return type_create(TYPE_BOOLEAN); @@ -479,3 +483,258 @@ struct type* expr_typecheck(const struct expr* e) } return NULL; } + +int expr_is_constant(const struct expr* e) +{ + if (!e) return 1; + switch (e->kind) + { + case EXPR_INTEGER_LITERAL: + case EXPR_FLOAT_LITERAL: + case EXPR_BOOLEAN_LITERAL: + case EXPR_CHAR_LITERAL: + case EXPR_STRING_LITERAL: + case EXPR_NAME: + return 1; + case EXPR_NEG: + return expr_is_constant(e->left); + case EXPR_LIST: + return expr_is_constant(e->left) && expr_is_constant(e->right); + default: + return 0; + } +} + +/* Codegen */ + +extern int codegen_errors; + +/** @return the key op associated with the expression */ +static char* expr_codegen_op(expr_t kind) +{ + switch (kind) + { + /* Arithmetic operators */ + case EXPR_ADD: + return "addq"; + case EXPR_SUB: + return "subq"; + case EXPR_MUL: + return "imulq"; + case EXPR_DIV: + case EXPR_MOD: + return "idivq"; + + /* Comparison operators */ + case EXPR_GT: + return "setg"; + case EXPR_GEQ: + return "setge"; + case EXPR_LT: + return "setl"; + case EXPR_LEQ: + return "setle"; + case EXPR_EQ: + return "sete"; + case EXPR_NEQ: + return "setne"; + + /* Logical operators */ + case EXPR_AND: + return "andq"; + case EXPR_OR: + return "orq"; + + /* Other binary operators */ + case EXPR_CALL: + return "call"; + case EXPR_INDEX: + case EXPR_ASSIGN: + return "movq"; + + /* Unary operators */ + case EXPR_NEG: + return "negq"; + case EXPR_NOT: + return "xorq"; + case EXPR_INCREMENT: + return "incq"; + case EXPR_DECREMENT: + return "decq"; + + default: + return NULL; // Handled elsewhere + } +} + +void expr_codegen(struct expr* e) +{ + if (!e) return; + expr_codegen(e->left); + // Delay pushing args for func call + if (e->kind != EXPR_CALL) + expr_codegen(e->right); + // Allocate reg if leaf + e->reg = e->left ? e->left->reg : scratch_alloc(); + + printf("# "); + expr_print(e); + printf("\n"); + + char* op; + struct expr* arg; + int arg_count; + + switch (e->kind) + { + /* Literals */ + case EXPR_INTEGER_LITERAL: + case EXPR_BOOLEAN_LITERAL: + printf("movq $%d, %s\n", e->integer_literal, scratch_name(e->reg)); + break; + case EXPR_FLOAT_LITERAL: + fprintf(stderr, "CodeGen Error | float literals not supported\n"); + codegen_errors++; + break; + case EXPR_CHAR_LITERAL: + printf("movq $%d, %s\n", e->char_literal, scratch_name(e->reg)); + break; + case EXPR_STRING_LITERAL: + string_create(e->string_literal, e->reg); + break; + + /* Arithmetic operators */ + case EXPR_ADD: + case EXPR_SUB: + printf("%s %s, %s\n", expr_codegen_op(e->kind), + scratch_name(e->right->reg), scratch_name(e->reg)); + break; + case EXPR_MUL: + case EXPR_DIV: + case EXPR_MOD: + printf("movq %s, %%rax\n", scratch_name(e->reg)); + if (e->kind == EXPR_DIV || e->kind == EXPR_MOD) + printf("cqo\n"); + printf("%s %s\n", expr_codegen_op(e->kind), scratch_name(e->right->reg)); + printf("movq %s, %s\n", + e->kind == EXPR_MOD ? "%rdx" : "%rax", + scratch_name(e->reg)); + break; + case EXPR_EXP: + // TODO: call runtime library exp + break; + + /* Unary operators */ + case EXPR_NEG: + case EXPR_INCREMENT: + case EXPR_DECREMENT: + printf("%s %s\n", expr_codegen_op(e->kind), scratch_name(e->reg)); + break; + case EXPR_NOT: + printf("xorq $1 %s\n", scratch_name(e->reg)); + break; + + /* Comparison and logical operators */ + case EXPR_GT: + case EXPR_GEQ: + case EXPR_LT: + case EXPR_LEQ: + case EXPR_EQ: + case EXPR_NEQ: + printf("cmpq %s, %s\n", + scratch_name(e->reg), scratch_name(e->right->reg)); + printf("xor %s, %s", + scratch_name(e->reg), scratch_name(e->reg)); // Clear register + printf("%s %%al\n", expr_codegen_op(e->kind)); // Set lower 8 bits + printf("movq %%rax, %s\n", scratch_name(e->reg)); + break; + + case EXPR_AND: + case EXPR_OR: + printf("testq %s, %s\n", + scratch_name(e->reg), scratch_name(e->reg)); + printf("xor %s, %s", + scratch_name(e->reg), scratch_name(e->reg)); // Clear register + printf("setnz %s\n", scratch_name(e->reg)); + printf("testq %s, %s\n", + scratch_name(e->right->reg), scratch_name(e->right->reg)); + printf("xor %s, %s", + scratch_name(e->right->reg), scratch_name(e->right->reg)); + printf("setnz %s\n", scratch_name(e->right->reg)); + printf("%s %s, %s\n", expr_codegen_op(e->kind), + scratch_name(e->right->reg), scratch_name(e->reg)); + break; + + /* Other binary operators */ + case EXPR_ASSIGN: + printf("movq %s, %s\n", + scratch_name(e->right->reg), + symbol_codegen(e->left->symbol)); + scratch_free(e->reg); + e->reg = e->right->reg; // Side effect + e->right->reg = -1; // Don't free it + break; + case EXPR_INDEX: + e->reg = scratch_alloc(); // 3rd scratch needed + // Always word size + printf("movq (%s, %s, 8), %s\n", + scratch_name(e->left->reg), + scratch_name(e->right->reg), + scratch_name(e->reg)); + scratch_free(e->left->reg); + break; + case EXPR_CALL: + e->reg = scratch_alloc(); + + // Push caller-saved registers + printf("pushq %%r10\n"); + printf("pushq %%r11\n"); + + expr_codegen(e->right); // Push arguments by codegen + printf("call %s\n", e->left->name); // Make function call + // Stack frame is set by callee + // Recover stack + arg_count = 0; + arg = e->right; + while (arg && arg->left) + { + arg_count++; + arg = arg->right; + } + printf("addq $%d, %%rsp\n", arg_count * 8); + + // Pop caller-saved registers + printf("pushq %%r11\n"); + printf("pushq %%r10\n"); + + printf("movq %%rax, %s\n", scratch_name(e->reg)); // Return value + break; + + /* Other nodes */ + case EXPR_LIST: + // Already at the base condition of recursion, the last arg + printf("pushq %s\n", scratch_name(e->reg)); // Left child already stored + scratch_free(e->reg); // Not needed anymore + break; + case EXPR_NAME: + switch (e->symbol->type->kind) + { + case TYPE_ARRAY: + case TYPE_STRING: + op = "leaq"; // Load address + break; + case TYPE_FUNCTION: + // Do nothing. Handled by function call. + return; + default: + op = "movq"; // Load value + break; + } + + printf("%s %s, %s\n", + op, symbol_codegen(e->symbol), scratch_name(e->reg)); + break; + } + + if (e->right) scratch_free(e->right->reg); +} diff --git a/expr.h b/expr.h index 2929bdb..4c3ee44 100755 --- a/expr.h +++ b/expr.h @@ -50,6 +50,7 @@ struct expr expr_t kind; struct expr* left; struct expr* right; + int reg; // Used by codegen /* Used by various leaf expressions */ struct symbol* symbol; @@ -84,4 +85,9 @@ void expr_resolve(struct expr* e); */ struct type* expr_typecheck(const struct expr* e); +/** @return 1 if the expression is constant, 0 otherwise */ +int expr_is_constant(const struct expr* e); + +void expr_codegen(struct expr* e); + #endif diff --git a/label.c b/label.c new file mode 100644 index 0000000..f5a967f --- /dev/null +++ b/label.c @@ -0,0 +1,55 @@ +#include +#include +#include "label.h" +#include "scratch.h" + +#define LABEL_LENGTH 8 +#define MAX_LABEL 99999 +static char name[LABEL_LENGTH]; + +extern int codegen_errors; + +static int label_count = 0; + +int label_create() +{ + if (label_count++ == MAX_LABEL) + { + fprintf(stderr, "CodeGen Error | Maximum number of labels reached\n"); + codegen_errors++; + exit(EXIT_FAILURE); + } + return label_count; +} + +char* label_name(int index) +{ + sprintf(name, ".L%d", index); + return name; +} + +static int string_count = 0; + +/** @return a static string representation the label, e.g. .S1 */ +char* string_name(int index) +{ + sprintf(name, ".S%d", index); + return name; +} + +int string_create(const char* string, int reg) +{ + if (string_count++ == MAX_LABEL) + { + fprintf(stderr, "CodeGen Error | Maximum number of strings reached\n"); + codegen_errors++; + exit(EXIT_FAILURE); + } + printf("\n"); + printf(".data\n"); + printf("%s: ", string_name(string_count)); + printf(".string \"%s\"\n", string); + printf(".text\n"); + printf("leaq %s, %s", string_name(string_count), scratch_name(reg)); + return string_count; +} diff --git a/label.h b/label.h new file mode 100644 index 0000000..e20f765 --- /dev/null +++ b/label.h @@ -0,0 +1,17 @@ +#ifndef LABEL_H +#define LABEL_H + +/** Creates a general purpose private label. + * @returns the index of the label + * */ +int label_create(); + +/** @returns a static string representation of the label, e.g. .L1 */ +char* label_name(int index); + +/** Creates a private label for local string. The code is also generated. + * @return the index of the string label + * */ +int string_create(const char* string, int reg); + +#endif diff --git a/library.c b/library.c index f4574d6..6d9a7c8 100755 --- a/library.c +++ b/library.c @@ -25,35 +25,34 @@ x = integer_power(a,b); */ #include -#include -void print_integer( long x ) +void print_integer(long x) { - printf("%ld",x); + printf("%ld", x); } -void print_string( const char *s ) +void print_string(const char* s) { - printf("%s",s); + printf("%s", s); } -void print_boolean( int b ) +void print_boolean(int b) { - printf("%s",b?"true":"false"); + printf("%s", b ? "true" : "false"); } -void print_character( char c ) +void print_character(char c) { - printf("%c",c); + printf("%c", c); } -long integer_power( long x, long y ) +long integer_power(long x, long y) { long result = 1; - while(y>0) { + while (y > 0) + { result = result * x; - y = y -1; + y = y - 1; } return result; } - diff --git a/runtest.sh b/runtest.sh index 1e73143..46e5054 100755 --- a/runtest.sh +++ b/runtest.sh @@ -22,6 +22,18 @@ printer_test () { done } +codegen_test () { + for testfile in ./test/codegen/good*.bminor; do + if bminor --codegen "$testfile" > "$testfile.out" && + gcc -x assembler "$testfile.out" -o "$testfile.exe"; then + echo "$testfile success (as expected)" + else + echo "$testfile failure (INCORRECT)" + fi + rm "$testfile.exe" + done +} + case $module in "encoder") command="encode" @@ -42,6 +54,10 @@ case $module in "typechecker") command="typecheck" ;; + "codegen") + codegen_test + exit + ;; *) echo "Unknown module: $module" exit 1 diff --git a/scratch.c b/scratch.c new file mode 100644 index 0000000..346a209 --- /dev/null +++ b/scratch.c @@ -0,0 +1,54 @@ +#include +#include "scratch.h" + +// %rbx, %r10, %r11, %r12, %r13, %r14, %r15 +int regs[7] = { 0, 0, 0, 0, 0, 0, 0 }; + +extern int codegen_errors; + +int scratch_alloc() +{ + int i; + for (i = 0; i < 7; i++) + { + if (regs[i] == 0) + { + regs[i] = 1; + return i; + } + } + fprintf(stderr, "Codegen Error | Ran out of scratch registers\n"); + codegen_errors++; + return -1; +} + +void scratch_free(int r) +{ + if (r >=0 && r <= 6) + regs[r] = 0; +} + +const char* scratch_name(int r) +{ + switch (r) + { + case 0: + return "%rbx"; + case 1: + return "%r10"; + case 2: + return "%r11"; + case 3: + return "%r12"; + case 4: + return "%r13"; + case 5: + return "%r14"; + case 6: + return "%r15"; + default: + fprintf(stderr, "CodeGen Error | Invalid scratch register\n"); + codegen_errors++; + return NULL; + } +} diff --git a/scratch.h b/scratch.h new file mode 100644 index 0000000..fe94b8e --- /dev/null +++ b/scratch.h @@ -0,0 +1,16 @@ +#ifndef SCRATCH_H +#define SCRATCH_H + +/** + * Allocates a scratch register. %rbx and %r1{0..5} are used + * @return the index of the scratch register, -1 if none are available + * */ +int scratch_alloc(); + +/** Frees a scratch register. */ +void scratch_free(int r); + +/** @return the name of a scratch register, with % */ +const char* scratch_name(int r); + +#endif diff --git a/stmt.c b/stmt.c index ddcc705..b7d63bc 100644 --- a/stmt.c +++ b/stmt.c @@ -6,6 +6,8 @@ #include "decl.h" #include "stmt.h" #include "scope.h" +#include "scratch.h" +#include "label.h" struct stmt* stmt_create( stmt_t kind, @@ -191,6 +193,8 @@ void stmt_resolve(const struct stmt* s) void stmt_typecheck(const struct stmt* s) { if (!s) return; + extern struct type* rtype; + struct type* expr_type; switch (s->kind) { case STMT_DECL: @@ -218,9 +222,140 @@ void stmt_typecheck(const struct stmt* s) break; case STMT_EXPR: case STMT_PRINT: - case STMT_RETURN: expr_typecheck(s->expr); break; + case STMT_RETURN: + expr_type = expr_typecheck(s->expr); + if (!(rtype->kind == TYPE_VOID && expr_type == NULL) && + !type_equals(expr_type, rtype)) + { + printf("Type Error | return type mismatch, expected "); + type_print(rtype); + printf(", got"); + if (s->expr) + { + printf(" "); + expr_print(s->expr); + printf(" ("); + type_print(expr_typecheck(s->expr)); + printf(")\n"); + } + else + printf(" nothing\n"); + type_errors++; + } + break; } stmt_typecheck(s->next); -} \ No newline at end of file +} + +/** Generates code for print statement */ +void print_codegen(const struct stmt* s) +{ + struct expr* e = s->expr; + while (e) + { + expr_codegen(e->left); + switch (e->kind) + { + case EXPR_INTEGER_LITERAL: + printf(".data\n"); + printf(".formatter_d: .string \"%%d\"\n"); + printf("movq $%d, %%rsi\n", e->integer_literal); + printf("leaq .formatter_d(%%rip), %%rdi\n"); + break; + default: + // TODO: Others printf formats + break; + } + } +} + +void stmt_codegen(const struct stmt* s) +{ + if (!s) return; + int top, end; // Labels + extern const char* cur_func; + + switch (s->kind) + { + case STMT_DECL: + decl_codegen(s->decl); + break; + case STMT_EXPR: + if (s->expr) + { + expr_codegen(s->expr); + scratch_free(s->expr->reg); + } + break; + case STMT_IF_ELSE: + top = label_create(); + end = label_create(); + + printf("# if-else condition\n"); + if (s->expr) + { + expr_codegen(s->expr); + printf("cmp $0, %s\n", scratch_name(s->expr->reg)); + scratch_free(s->expr->reg); + printf("je %s\n", label_name(top)); // To false + } + + printf("# if-else body\n"); + stmt_codegen(s->body); + printf("jmp %s\n", label_name(end)); // To end + + printf("# if-else else body\n"); + printf("%s:\n", label_name(top)); // False label + stmt_codegen(s->else_body); + + printf("%s:\n", label_name(end)); // End label + break; + case STMT_FOR: + top = label_create(); + end = label_create(); + + printf("# for-loop init expr\n"); + expr_codegen(s->init_expr); + + printf("%s:\n", label_name(top)); // Loop top + + printf("# for-loop expr\n"); + if (s->expr) + { + expr_codegen(s->expr); + printf("cmp $0 %s\n", scratch_name(s->expr->reg)); + scratch_free(s->expr->reg); + printf("je %s\n", label_name(end)); + } + + printf("# for-loop body\n"); + stmt_codegen(s->body); + + printf("# for-loop next expr\n"); + expr_codegen(s->next_expr); + printf("jmp %s:\n", label_name(top)); + + printf("%s:\n", label_name(end)); // Loop end + break; + case STMT_PRINT: + // TODO: Implement register calling + // TODO: Calling runtime library + break; + case STMT_RETURN: + if (s->expr) + { + expr_codegen(s->expr); + printf("movq %s, %%rax\n", scratch_name(s->expr->reg)); + scratch_free(s->expr->reg); + } + printf("jmp .%s_epilogue\n", cur_func); + break; + case STMT_BLOCK: + stmt_codegen(s->body); + break; + } + + stmt_codegen(s->next); +} diff --git a/stmt.h b/stmt.h index 4b6e40f..b3fdfd9 100755 --- a/stmt.h +++ b/stmt.h @@ -39,5 +39,6 @@ struct stmt* stmt_create_empty(stmt_t kind); void stmt_print(const struct stmt* s, int indent); void stmt_resolve(const struct stmt* s); void stmt_typecheck(const struct stmt* s);; +void stmt_codegen(const struct stmt* s); #endif diff --git a/symbol.c b/symbol.c index 17014db..e830757 100644 --- a/symbol.c +++ b/symbol.c @@ -1,3 +1,4 @@ +#include #include #include "type.h" #include "symbol.h" @@ -11,3 +12,28 @@ struct symbol* symbol_create(symbol_t kind, struct type* type, char* name, int w s->which = which; return s; } + +const char* symbol_codegen(const struct symbol* s) +{ + static char buf[16]; + char sign; + switch (s->kind) + { + case SYMBOL_LOCAL: + sign = '-'; + break; + case SYMBOL_PARAM: + sign = ' '; + break; + case SYMBOL_GLOBAL: + return s->name; + } + unsigned long length = snprintf(buf, sizeof(buf), "%c%d(%%rbp)", sign, 8 * (s->which + 1)); + if (length > sizeof(buf)) + { + fprintf(stderr, "CodeGen Error | too many parameters, cannot generate symbol\n"); + extern int codegen_errors; + codegen_errors++; + } + return buf; +} diff --git a/symbol.h b/symbol.h index 80a18db..d0af642 100755 --- a/symbol.h +++ b/symbol.h @@ -14,9 +14,12 @@ struct symbol struct type* type; char* name; int which; - int prototype; + int prototype; // 1 if function prototype, 0 otherwise + int locals; // number of local variables }; struct symbol* symbol_create(symbol_t kind, struct type* type, char* name, int which); +const char* symbol_codegen(const struct symbol* s); + #endif diff --git a/test/codegen/good00.bminor b/test/codegen/good00.bminor new file mode 100644 index 0000000..136ba4a --- /dev/null +++ b/test/codegen/good00.bminor @@ -0,0 +1,3 @@ +/* Integer declarations */ +a: integer; +main: function integer () = { return 128; } diff --git a/test/codegen/good01.bminor b/test/codegen/good01.bminor new file mode 100644 index 0000000..dd832cb --- /dev/null +++ b/test/codegen/good01.bminor @@ -0,0 +1,3 @@ +/* Integer declaration with value */ +a: integer = 128; +main: function integer () = { return 0; } diff --git a/test/codegen/good02.bminor b/test/codegen/good02.bminor new file mode 100644 index 0000000..1f2f0ed --- /dev/null +++ b/test/codegen/good02.bminor @@ -0,0 +1,3 @@ +/* String declaration */ +s: string; +main: function integer () = { return 128; } diff --git a/test/codegen/good03.bminor b/test/codegen/good03.bminor new file mode 100644 index 0000000..81dc8ba --- /dev/null +++ b/test/codegen/good03.bminor @@ -0,0 +1,4 @@ +/* String declaration with value */ +s1: string = "Hello World"; +s2: string = "sam\n"; +main: function integer () = { return 128; } diff --git a/test/codegen/good04.bminor b/test/codegen/good04.bminor new file mode 100644 index 0000000..b8abb75 --- /dev/null +++ b/test/codegen/good04.bminor @@ -0,0 +1,5 @@ +/* Function referencing global var */ +a: integer = 128; +main: function integer () = { + return a; +} diff --git a/test/codegen/good05.bminor b/test/codegen/good05.bminor new file mode 100644 index 0000000..96de5dc --- /dev/null +++ b/test/codegen/good05.bminor @@ -0,0 +1,5 @@ +/* Function referencing local var */ +main: function integer () = { + a: integer = 128; + return a; +} diff --git a/test/codegen/good06.bminor b/test/codegen/good06.bminor new file mode 100644 index 0000000..29fe863 --- /dev/null +++ b/test/codegen/good06.bminor @@ -0,0 +1,6 @@ +/* Arithmetics */ +main: function integer () = { + a: integer = 128; + b: integer = 64; + return (a / b) + 1; // 3 +} diff --git a/test/codegen/good07.bminor b/test/codegen/good07.bminor new file mode 100644 index 0000000..78d509c --- /dev/null +++ b/test/codegen/good07.bminor @@ -0,0 +1,3 @@ +/* Array declaration */ +a: array [4] integer = {0, 1, 2, 3}; +main: function integer () = { return 128; } diff --git a/test/codegen/good08.bminor b/test/codegen/good08.bminor new file mode 100644 index 0000000..d6661da --- /dev/null +++ b/test/codegen/good08.bminor @@ -0,0 +1,5 @@ +/* Indexing */ +a: array [4] integer = {0, 1, 2, 3}; +main: function integer () = { + return a[0] + a[1] + a[2] + a[3]; // 6 +} diff --git a/test/codegen/good09.bminor b/test/codegen/good09.bminor new file mode 100644 index 0000000..32bdb48 --- /dev/null +++ b/test/codegen/good09.bminor @@ -0,0 +1,5 @@ +/* Indexing with expression */ +a: array [4] integer = {0, 1, 2, 3}; +main: function integer () = { + return a[1 + 1 + 1]; // 3 +} diff --git a/test/codegen/good10.bminor b/test/codegen/good10.bminor new file mode 100644 index 0000000..3f3ad4d --- /dev/null +++ b/test/codegen/good10.bminor @@ -0,0 +1,6 @@ +/* Assign to global variable */ +a: integer = 128; +main: function integer () = { + a = a - 1; + return a; +} diff --git a/test/codegen/good11.bminor b/test/codegen/good11.bminor new file mode 100644 index 0000000..f368ce2 --- /dev/null +++ b/test/codegen/good11.bminor @@ -0,0 +1,8 @@ +/* Function call */ +f: function integer () = { + return 128; +} + +main : function integer () = { + return f(); +} diff --git a/test/codegen/good12.bminor b/test/codegen/good12.bminor new file mode 100644 index 0000000..9d7f0a2 --- /dev/null +++ b/test/codegen/good12.bminor @@ -0,0 +1,8 @@ +/* Function call with arg */ +f: function integer (arg : integer) = { + return arg; +} + +main : function integer () = { + return f(0); +} diff --git a/test/codegen/good13.bminor b/test/codegen/good13.bminor new file mode 100644 index 0000000..9b8250a --- /dev/null +++ b/test/codegen/good13.bminor @@ -0,0 +1,8 @@ +/* Function call with args */ +f: function integer (arg1 : integer, arg2 : integer) = { + return arg1 + arg2; +} + +main : function integer () = { + return f(12, 24); // 36 +} diff --git a/test/codegen/good14.bminor b/test/codegen/good14.bminor new file mode 100644 index 0000000..ee8e56b --- /dev/null +++ b/test/codegen/good14.bminor @@ -0,0 +1,8 @@ +/* If-else */ +main : function integer () = { + cond: boolean = true; + if (cond) + return 128; + else + return 127; +} diff --git a/test/codegen/good15.bminor b/test/codegen/good15.bminor new file mode 100644 index 0000000..136ba4a --- /dev/null +++ b/test/codegen/good15.bminor @@ -0,0 +1,3 @@ +/* Integer declarations */ +a: integer; +main: function integer () = { return 128; } diff --git a/test/codegen/good16.bminor b/test/codegen/good16.bminor new file mode 100644 index 0000000..dd832cb --- /dev/null +++ b/test/codegen/good16.bminor @@ -0,0 +1,3 @@ +/* Integer declaration with value */ +a: integer = 128; +main: function integer () = { return 0; } diff --git a/test/codegen/good17.bminor b/test/codegen/good17.bminor new file mode 100644 index 0000000..1f2f0ed --- /dev/null +++ b/test/codegen/good17.bminor @@ -0,0 +1,3 @@ +/* String declaration */ +s: string; +main: function integer () = { return 128; } diff --git a/test/codegen/good18.bminor b/test/codegen/good18.bminor new file mode 100644 index 0000000..81dc8ba --- /dev/null +++ b/test/codegen/good18.bminor @@ -0,0 +1,4 @@ +/* String declaration with value */ +s1: string = "Hello World"; +s2: string = "sam\n"; +main: function integer () = { return 128; } diff --git a/test/codegen/good19.bminor b/test/codegen/good19.bminor new file mode 100644 index 0000000..b8abb75 --- /dev/null +++ b/test/codegen/good19.bminor @@ -0,0 +1,5 @@ +/* Function referencing global var */ +a: integer = 128; +main: function integer () = { + return a; +} diff --git a/test/typechecker/bad1.bminor b/test/typechecker/bad1.bminor index 29ed655..d87c034 100644 --- a/test/typechecker/bad1.bminor +++ b/test/typechecker/bad1.bminor @@ -6,5 +6,5 @@ a: function array [1] integer () = { b: function void () = { c: function integer(); - return; + return 1; } diff --git a/test/typechecker/bad3.bminor b/test/typechecker/bad3.bminor index 405aa6d..b633aff 100644 --- a/test/typechecker/bad3.bminor +++ b/test/typechecker/bad3.bminor @@ -19,4 +19,5 @@ main: function void () = { func(); func(c); (func(a) + b) * 2 == (func(c) % 2); + return 0; } diff --git a/test/typechecker/good0.bminor b/test/typechecker/good0.bminor index f6e2eea..a399ae2 100644 --- a/test/typechecker/good0.bminor +++ b/test/typechecker/good0.bminor @@ -1,7 +1,7 @@ /* Variable declarations */ -a: integer = a; -b: integer = a; -c: integer = a + b; +a: integer = 0; +b: integer = 1; +c: integer = -1; d: float = 45.67; e: boolean = false; f: char = 'q'; @@ -10,4 +10,4 @@ h: array [1] integer; i: array [2] boolean = {true, false}; j: array [2] array [b] boolean; k: array [b] array [2] boolean; -l: array [2] integer = {a + b, 1 + 3 + c}; +l: array [2] integer = {a, b}; diff --git a/type.c b/type.c index 5100ae7..aec9854 100644 --- a/type.c +++ b/type.c @@ -62,6 +62,7 @@ void type_print_primitive(const struct type* t) void type_print(const struct type* t) { + if (!t) return; switch (t->kind) { case TYPE_ARRAY: @@ -115,4 +116,3 @@ int type_equals(const struct type* t1, const struct type* t2) return 1; } } -