From 03b1de1b138bee4ce6581839df85de5e01db1f1a Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Tue, 4 Jun 2024 23:46:36 +0000 Subject: [PATCH 01/18] sketch out the initial design for the C IR --- compiler/backend/cir.nim | 124 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 compiler/backend/cir.nim diff --git a/compiler/backend/cir.nim b/compiler/backend/cir.nim new file mode 100644 index 00000000000..90f6679a355 --- /dev/null +++ b/compiler/backend/cir.nim @@ -0,0 +1,124 @@ +## Implements the intermediate representation the C code generator outputs, as +## well as routines for producing and querying it. +## +## The IR is an abstract syntax tree (=AST) representing pre-preprocessor C +## code. For the convenience of the code generator, the tree doesn't encode +## only syntax, but also includes some symbol information. Handling all +## complexities of C's syntax is explicitly a non-goal. Where reasonable, +## simplification are made. + +import + compiler/mir/[ + mirtrees + ] + +type + CNodeKind* = enum + cnkIdent ## raw identifier + cnkProcSym ## procedure symbol + cnkGlobalSym ## global location symbol + cnkConstSym ## constant location symbol + cnkType ## strong type reference + cnkWeakType ## weak type reference + + cnkCharLit + # C has no signed integer literals, but for convenience, we do + cnkIntLit + cnkUIntLit + cnkFloatLit + cnkDoubleLit + cnkStrLit + cnkVerbatim ## string that's interpreted as raw piece of C code + + cnkExpr ## local reference to an expression AST + + # ------------ end of atoms + + # expressions: + cnkAddrOf ## (operand: expr) + cnkDeref ## (operand: expr) + cnkMember ## (of: expr, member: ident) + cnkPtrMember ## (of: expr, member: ident) + cnkArrMember ## (of: expr, index: expr) + cnkCast ## (typ: typeexpr, opr: expr) + # XXX: it's likely easier/better to use dedicated node kinds for all + # needed operators + cnkPrefix ## (name: ident, a: expr) + cnkInfix ## (name: ident, a: expr, b: expr) + cnkPostfix ## (name: ident, a: expr) + + cnkCall ## (callee: expr, args...: expr) + cnkTernary ## (cond: expr, a: expr, b: expr) + # an assignment is an expression in C + cnkAsgn ## (lhs: expr, rhs: expr) + cnkBraced ## (items...: expr) + + # statements + cnkStmt ## (e: expr) + cnkGoto ## (label: ident) + cnkLabel ## (label: ident) + cnkBlock ## (body+: stmt) + cnkIf ## (cond: expr, body: stmt) + cnkWhile ## (cond: expr, body: stmt) + cnkReturn ## (operand?: expr) + cnkSwitch ## (selector: expr, branch+:case|default) + cnkCase ## (value: expr, body: stmt) + cnkDefault ## (body: stmt) + # TODO: an asm statement is missing + + # declaration grammar: + # doesn't cover everything that C supports, and also has to support macros + cnkSpecList ## (spec+: spec) + cnkDeclaration ## (spec: spec, decl: declarator, init?: expr) + cnkDefinition ## (spec: spec, decl: declarator, body: block) + cnkParamDecl ## (spec: spec, decl: declarator) + + cnkArrayDecl ## (name: ident, len?: expr) + cnkPtrDecl ## (name: ident) + cnkFuncDecl ## (name: ident, params...: decl) + cnkDeclList ## (decl...: declaration) + + cnkStructSpec ## (attr?: expr, name?: ident, body: decllist) + cnkUnionSpec ## (attr?: expr, name?: ident, body: decllist) + + # directives: + cnkEmit ## emit(args...: expr|stmt|verbatim) + +const + cnkAtoms = {cnkIdent .. cnkExpr} + cnkWithNodes = {low(CNodeKind) .. high(CNodeKind)} - cnkAtoms + + cnkWithNumber = {cnkIntLit, cnkUIntLit, cnkFloatLit, cnkDoubleLit} + cnkWithString = {cnkStrLit, cnkVerbatim} + cnkWithType = {cnkWeakType, cnkType} + +type + CNodeIndex* = distinct uint32 + CIdentifier* = distinct uint32 + + CNode* = object + ## Node in a flat tree structure. A node is either atomic or not. Atoms + ## have no children nodes. Nodes are layed out in depth first fashion. + case kind*: CNodeKind + of cnkIdent: ident*: uint32 + of cnkProcSym: prc*: ProcedureId + of cnkGlobalSym: global*: GlobalId + of cnkConstSym: cnst*: ConstId + of cnkWithType: typ*: TypeId + of cnkWithString: strId*: StringId + of cnkCharLit: charVal*: char + of cnkWithNumber: number*: NumberId + of cnkExpr: node*: CNodeIndex + of cnkWithNodes: len*: uint32 + + BufferType = enum + btExpr + btStmt + + CAst* = object + ## In-progress AST. Non-atomic expressions are stored in a separate buffer + ## from statements. + buf: array[BufferType, seq[CNode]] + + CombinedCAst* = seq[CNode] + ## Finalized AST where expressions and statement are combined. From 2ba2693b272226b3817727d184200b9753a46001 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Tue, 4 Jun 2024 23:46:36 +0000 Subject: [PATCH 02/18] implement the CIR formatter There's not much to it. The code could be shortened a bit using templates, but that can happen at a later point. The definition of `CodeGenEnv` is hand-waved into the future. --- compiler/backend/cformat.nim | 261 +++++++++++++++++++++++++++++++++++ 1 file changed, 261 insertions(+) create mode 100644 compiler/backend/cformat.nim diff --git a/compiler/backend/cformat.nim b/compiler/backend/cformat.nim new file mode 100644 index 00000000000..57140c90b98 --- /dev/null +++ b/compiler/backend/cformat.nim @@ -0,0 +1,261 @@ +## Implements the formatter that turns the CIR (`cir <#cir>`_) into textual C +## code. + +import + std/[ + tables + ], + compiler/backend/[ + # cgendata, + cir + ], + compiler/mir/[ + mirenv, + mirtrees + ], + compiler/utils/[ + containers, + idioms + ] + +# TODO: move these formatting procedures here (or somewhere else) -- they don't +# have anything to do with options +from compiler/front/options import toCChar, makeCString + +proc format(g: CodeGenEnv, ast: CombinedCAst, + result: var string, i: var int) = + # efficiency matters! This procedure potentially processes enormous amounts + # of data, and should thus be as fast as possible + # TODO: look into structuring `format` such that the C compiler can use tail + # calls + # TODO: indentation is not handled + template recurse() = + format(g, ast, result, i) + + template foreach(n: CNode, body: untyped) = + for i in 0.." + recurse() + of cnkArrMember: + recurse() + result.add "[" + recurse() + result.add "]" + of cnkCast: + result.add "(" + recurse() + result.add ")(" + recurse() + result.add ")" + of cnkInfix: + let sym = ast[i].ident + inc i + recurse() + result.add " " + result.add g.getStr(sym) + result.add " " + recurse() + of cnkPrefix: + recurse() + result.add "(" + recurse() + result.add ")" + of cnkPostfix: + let sym = ast[i].ident + inc i + recurse() + result.add g.getStr(sym) + of cnkAsgn: + recurse() + result.add " = " + recurse() + of cnkCall: + recurse() + result.add "(" + for j in 1.. 1: + result.add ", " + recurse() + result.add ")" + of cnkTernary: + result.add "(" + recurse() + result.add " ? " + recurse() + result.add " : " + recurse() + result.add ")" + of cnkBraced: + result.add "{" + for j in 0.. 0: + result.add ", " + recurse() + result.add "}" + + # statements: + of cnkStmt: + recurse() + result.add ";\n" + of cnkGoto: + result.add "goto " + recurse() + result.add ";\n" + of cnkLabel: + recurse() + result.add ":;\n" + of cnkBlock: + result.add "{\n" + foreach(n): + recurse() + result.add "}\n" + of cnkWhile: + result.add "while (" + recurse() + result.add ") " + recurse() + of cnkReturn: + if n.len == 0: + result.add "return;\n" + else: + result.add "return " + recurse() + result.add ";\n" + of cnkIf: + result.add "if (" + recurse() + result.add ") " + recurse() + of cnkSwitch: + result.add "switch (" + recurse() + result.add ") {\n" + for _ in 1.. 1: + result.add ", " + recurse() + result.add ")" + of cnkPtrDecl: + result.add "*" + recurse() + of cnkArrayDecl: + recurse() + result.add "[" + if n.len == 2: + recurse() + result.add "]" + of cnkDeclList: + result.add "{\n" + recurse() + result.add "}\n" + of cnkStructSpec: + result.add "struct " + foreach(n): + recurse() + of cnkUnionSpec: + result.add "union " + foreach(n): + recurse() + + # directives: + of cnkEmit: + # just format whatever is provided as the arguments + foreach(n): + recurse() + +proc format*(g: CodeGenEnv, ast: CombinedCAst, i: CNodeIndex, + result: var string) = + ## Formats `ast` starting at `i` into as textual C code, appending the + ## result to `result`. + var i = ord(i) + format(g, ast, result, i) From 6db0d2a007f1d036a5c88db11d4e6d8bff70748b Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Tue, 4 Jun 2024 23:46:36 +0000 Subject: [PATCH 03/18] add some temporary profiling facilities They're meant to be easy to use and have low overhead. --- compiler/utils/measure.nim | 85 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 compiler/utils/measure.nim diff --git a/compiler/utils/measure.nim b/compiler/utils/measure.nim new file mode 100644 index 00000000000..30d9f7866f5 --- /dev/null +++ b/compiler/utils/measure.nim @@ -0,0 +1,85 @@ +## Temporary profiling facilities. + +import std/[monotimes, exitprocs, times, db_sqlite] + +type Entry = object + name: string + count: int + time: Duration + alloc: AllocStats + +var counter {.compileTime.} = 0 +# allocate the sequence on the heap, to make sure it not gets destroyed +# prior to the exit proc being called +var storage {.noinit.}: ptr seq[Entry] + +template data*(): seq[Entry] = + storage[] + +proc register(i: int, name: string): int = + if storage.isNil: + storage = create(seq[Entry]) + + data.setLen(max(data.len, i + 1)) + data[i].name = name + i + +proc id(name: static string): int = + # compute a unique, 0-based ID for the name + const x = counter + static: inc counter + # we use a lifted global for running some ad-hoc code at startup + let ignore {.global, used.} = register(x, name) + result = x + +# the fields are not exported :( +template alloc*(s: AllocStats): int = + cast[ptr array[2, int]](addr s)[][0] +template dealloc*(s: AllocStats): int = + cast[ptr array[2, int]](addr s)[][1] + +proc `+=`(a: var AllocStats, b: AllocStats) {.inline.} = + a.alloc += b.alloc + a.dealloc += b.dealloc + +proc finish(id: int, time: Duration, stats: AllocStats) = + data[id].alloc += getAllocStats() - stats + data[id].time += time + inc data[id].count + +template measure*(name: static string) = + # needs to have as little overhead as possible (e.g., no costly table + # lookups) + let + start = getMonoTime() + stats = getAllocStats() + + defer: finish(id(name), getMonoTime() - start, stats) + +proc dump() = + echo "---- Measurements:" + for it in data.items: + if it.count > 0: + echo "'", it.name, "' took ", (it.time.inMilliseconds.int / 1000), "s (average: ", (it.time.inMicroseconds.int / it.count / 1000), "ms runs: ", it.count, ")" + when defined(nimAllocStats): + echo " allocations: ", $it.alloc + + # write to an sqlite DB, for easier analysis later on + var db = open("profile.db", "", "", "") + db.exec(sql"BEGIN IMMEDIATE TRANSACTION") + try: + db.exec(sql"CREATE TABLE IF NOT EXISTS runs (id INTEGER PRIMARY KEY, date)") + let run = db.tryInsertID(sql"INSERT INTO runs (date) VALUES (?)", now().format("YYYY-MM-dd HH:MM:ss")) + doAssert run != -1 + db.exec(sql"CREATE TABLE IF NOT EXISTS entries (run INTEGER, name, count, total, alloc, dealloc)") + for it in data.items: + db.exec(sql"INSERT INTO entries (run, name, count, total, alloc, dealloc) VALUES (?,?,?,?,?,?)", + run, it.name, it.count, it.time.inMicroseconds.int, it.alloc.alloc, it.alloc.dealloc) + db.exec(sql"COMMIT") + except: + echo "error: ", getCurrentExceptionMsg() + db.exec(sql"ROLLBACK") + finally: + db.close() + +addExitProc(proc() = dump()) \ No newline at end of file From daa5e54e9d98ca598de7eed7c3d0ef628ef41974 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Tue, 4 Jun 2024 23:46:37 +0000 Subject: [PATCH 04/18] get a clean slate All relevant C code generator modules are suffixed with a "2", in order to make room for the new modules. They're not yet removed, so that their code can still be referenced easily. --- compiler/backend/{cbackend.nim => cbackend2.nim} | 0 compiler/backend/{ccgcalls.nim => ccgcalls2.nim} | 0 compiler/backend/{ccgexprs.nim => ccgexprs2.nim} | 0 compiler/backend/{ccgliterals.nim => ccgliterals2.nim} | 0 compiler/backend/{ccgstmts.nim => ccgstmts2.nim} | 0 compiler/backend/{ccgthreadvars.nim => ccgthreadvars2.nim} | 0 compiler/backend/{ccgtypes.nim => ccgtypes2.nim} | 0 compiler/backend/{cgen.nim => cgen2.nim} | 0 compiler/backend/{cgendata.nim => cgendata2.nim} | 0 9 files changed, 0 insertions(+), 0 deletions(-) rename compiler/backend/{cbackend.nim => cbackend2.nim} (100%) rename compiler/backend/{ccgcalls.nim => ccgcalls2.nim} (100%) rename compiler/backend/{ccgexprs.nim => ccgexprs2.nim} (100%) rename compiler/backend/{ccgliterals.nim => ccgliterals2.nim} (100%) rename compiler/backend/{ccgstmts.nim => ccgstmts2.nim} (100%) rename compiler/backend/{ccgthreadvars.nim => ccgthreadvars2.nim} (100%) rename compiler/backend/{ccgtypes.nim => ccgtypes2.nim} (100%) rename compiler/backend/{cgen.nim => cgen2.nim} (100%) rename compiler/backend/{cgendata.nim => cgendata2.nim} (100%) diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend2.nim similarity index 100% rename from compiler/backend/cbackend.nim rename to compiler/backend/cbackend2.nim diff --git a/compiler/backend/ccgcalls.nim b/compiler/backend/ccgcalls2.nim similarity index 100% rename from compiler/backend/ccgcalls.nim rename to compiler/backend/ccgcalls2.nim diff --git a/compiler/backend/ccgexprs.nim b/compiler/backend/ccgexprs2.nim similarity index 100% rename from compiler/backend/ccgexprs.nim rename to compiler/backend/ccgexprs2.nim diff --git a/compiler/backend/ccgliterals.nim b/compiler/backend/ccgliterals2.nim similarity index 100% rename from compiler/backend/ccgliterals.nim rename to compiler/backend/ccgliterals2.nim diff --git a/compiler/backend/ccgstmts.nim b/compiler/backend/ccgstmts2.nim similarity index 100% rename from compiler/backend/ccgstmts.nim rename to compiler/backend/ccgstmts2.nim diff --git a/compiler/backend/ccgthreadvars.nim b/compiler/backend/ccgthreadvars2.nim similarity index 100% rename from compiler/backend/ccgthreadvars.nim rename to compiler/backend/ccgthreadvars2.nim diff --git a/compiler/backend/ccgtypes.nim b/compiler/backend/ccgtypes2.nim similarity index 100% rename from compiler/backend/ccgtypes.nim rename to compiler/backend/ccgtypes2.nim diff --git a/compiler/backend/cgen.nim b/compiler/backend/cgen2.nim similarity index 100% rename from compiler/backend/cgen.nim rename to compiler/backend/cgen2.nim diff --git a/compiler/backend/cgendata.nim b/compiler/backend/cgendata2.nim similarity index 100% rename from compiler/backend/cgendata.nim rename to compiler/backend/cgendata2.nim From 1aa5d98543a30ab7e569d17b2080f90e64338070 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Tue, 4 Jun 2024 23:46:37 +0000 Subject: [PATCH 05/18] restore a basic orchestrator skeleton The general structure is similar to the old `cbackend`, but with two important differences: * the global and per-module types are owned by orchestrator now, not `cgendata` * the output (i.e., the C files) are funnelled through a dedicated type (`Output`) --- compiler/backend/cbackend.nim | 116 ++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 compiler/backend/cbackend.nim diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim new file mode 100644 index 00000000000..e493a231a99 --- /dev/null +++ b/compiler/backend/cbackend.nim @@ -0,0 +1,116 @@ +## The code-generation orchestrator for the C backend. + +import + std/[ + tables + ], + compiler/ast/[ + ast_idgen, + lineinfos + ], + compiler/backend/[ + backends + ], + compiler/mir/[ + mirbodies, + mirenv, + mirtrees + ], + compiler/modules/[ + modulegraphs + ], + compiler/sem/[ + modulelowering + ], + compiler/utils/[ + containers, + idioms, + measure + ] + +type + ModuleId = FileIndex + + BModule = object + ## Per-module data. A ``BModule`` instance usually corresponds to a + ## |NimSkull| module, but doesn't necessarily have to. + idgen: IdGenerator + + BModuleList = object + ## The "top level" type for the orchestrator, owning all state related + ## to code generation. + graph: ModuleGraph + env: MirEnv + + modules: OrdinalSeq[ModuleId, BModule] + + PartialTable = Table[ProcedureId, MirBody] + ## Table for holding the incremental procedures + + Output* = ref object of RootObj + ## The interface with the legacy backend management. + +const NonMagics = {} + +proc processEvent(g: var BModuleList, discovery: DiscoveryData, + partial: var PartialTable, evt: sink BackendEvent) = + discard + +proc assemble(m: Module): string = + ## Combines the various AST fragments of the module and renders them into + ## C code. + +proc generateCode*(graph: ModuleGraph, g: sink BModuleList, + mlist: sink ModuleList): Output = + ## Implements the main part of the C code-generation orchestrator. Expects an + ## already populated ``BModuleList``. Returns the list with all code + ## generation artifacts. + measure("backend") + + # pre-process the init procedures: + for key, m in mlist.modules.pairs: + # TODO: assign the external names for the init procedures + discard + + # ----- main event processing ----- + let + config = BackendConfig(tconfig: TranslationConfig(magicsToKeep: NonMagics)) + + var + discovery: DiscoveryData + partial: PartialTable + + # discover and generate code for all alive entities: + for evt in process(graph, mlist, g.env, discovery, config): + processEvent(g, discovery, partial, evt) + + # finish the partial procedures: + for id, p in partial.pairs: + # TODO: implement me + discard + + # production of the CIR for all alive entities is done + + # TODO: generate the main procedure + # TODO: report the used dynamic libraries + # TODO: generate a header, if requested + + # assemble the final C code for each module: + for id, m in mlist.modules.pairs: + discard assemble(m) + # TODO: register in the Output structure + +proc generateCode*(graph: ModuleGraph, mlist: sink ModuleList) = + ## Entry point for C code generation. Only the C code is generated -- nothing + ## is written to disk yet. + var g = BModuleList(graph: graph, env: initMirEnv(graph)) + + # setup the module entries: + for key, m in mlist.modules.pairs: + # XXX: meh, not a good solution. The list should be setup up-front + if m.sym.position >= g.modules.len: + setLen(g.modules, m.sym.position + 1) + g.modules[key] = BModule(idgen: m.idgen) + + # the output is communicated through the module graph + graph.backend = generateCode(graph, g, mlist) From a014eb6af631bbddcaa22ac153a30a1de7cc5b3a Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Tue, 4 Jun 2024 23:46:37 +0000 Subject: [PATCH 06/18] restore the IC integration --- compiler/backend/cbackend.nim | 20 ++++++++++++++------ compiler/ic/cbackend.nim | 16 ++++++---------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim index e493a231a99..9062042f427 100644 --- a/compiler/backend/cbackend.nim +++ b/compiler/backend/cbackend.nim @@ -31,18 +31,18 @@ import type ModuleId = FileIndex - BModule = object + BModule* = object ## Per-module data. A ``BModule`` instance usually corresponds to a ## |NimSkull| module, but doesn't necessarily have to. - idgen: IdGenerator + idgen*: IdGenerator - BModuleList = object + BModuleList* = object ## The "top level" type for the orchestrator, owning all state related ## to code generation. graph: ModuleGraph env: MirEnv - modules: OrdinalSeq[ModuleId, BModule] + modules*: OrdinalSeq[ModuleId, BModule] PartialTable = Table[ProcedureId, MirBody] ## Table for holding the incremental procedures @@ -52,6 +52,14 @@ type const NonMagics = {} +proc initModuleList*(graph: ModuleGraph, num: Natural): BModuleList = + ## Sets up a backend module-list with `num` modules. + result = BModuleList(graph: graph, env: initMirEnv(graph)) + result.modules.newSeq(num) + +proc initModule*(idgen: IdGenerator): BModule = + BModule(idgen: idgen) + proc processEvent(g: var BModuleList, discovery: DiscoveryData, partial: var PartialTable, evt: sink BackendEvent) = discard @@ -103,14 +111,14 @@ proc generateCode*(graph: ModuleGraph, g: sink BModuleList, proc generateCode*(graph: ModuleGraph, mlist: sink ModuleList) = ## Entry point for C code generation. Only the C code is generated -- nothing ## is written to disk yet. - var g = BModuleList(graph: graph, env: initMirEnv(graph)) + var g = initModuleList(graph, 0) # setup the module entries: for key, m in mlist.modules.pairs: # XXX: meh, not a good solution. The list should be setup up-front if m.sym.position >= g.modules.len: setLen(g.modules, m.sym.position + 1) - g.modules[key] = BModule(idgen: m.idgen) + g.modules[key] = initModule(m.idgen) # the output is communicated through the module graph graph.backend = generateCode(graph, g, mlist) diff --git a/compiler/ic/cbackend.nim b/compiler/ic/cbackend.nim index a36f6d70b6b..364956188e1 100644 --- a/compiler/ic/cbackend.nim +++ b/compiler/ic/cbackend.nim @@ -27,11 +27,10 @@ import msgs ], compiler/utils/[ + containers, pathutils ], compiler/backend/[ - cgendata, - cgen, extccomp ], compiler/ic/[ @@ -52,9 +51,8 @@ proc unpackTree(g: ModuleGraph; thisModule: int; var decoder = initPackedDecoder(g.config, g.cache) result = loadNodes(decoder, g.packed, thisModule, tree, n) -proc setupBackendModule(g: BModuleList; m: var LoadedModule, alive: AliveSyms) = - var bmod = cgen.newModule(g, m.module, g.config) - bmod.idgen = idgenFromLoadedModule(m) +proc setupBackendModule(g: var BModuleList; m: var LoadedModule, alive: AliveSyms) = + g.modules[m.module.position.FileIndex] = initModule(idgenFromLoadedModule(m)) proc addFileToLink(config: ConfigRef; m: PSym) {.used.} = # XXX: currently unused, but kept in case it is needed again @@ -142,8 +140,7 @@ proc generateCode*(g: ModuleGraph) = # setup the module list and allocate space for all existing modules. # The slots for unchanged modules stay uninitialized. - let backend = cgendata.newModuleList(g) - backend.modules.setLen(g.packed.len) + var backend = initModuleList(g, g.packed.len) # Second pass: Setup all the backend modules for all the modules that have # changed: @@ -177,7 +174,7 @@ proc generateCode*(g: ModuleGraph) = let pos = m.module.position - c = pass.open(g, m.module, backend.modules[pos].idgen) + c = pass.open(g, m.module, backend.modules[pos.FileIndex].idgen) for p in allNodes(m.fromDisk.topLevel): let n = unpackTree(g, pos, m.fromDisk.topLevel, p) discard pass.process(c, n) @@ -196,8 +193,7 @@ proc generateCode*(g: ModuleGraph) = break # Fourth pass: Generate the code: - cbackend2.generateCode(g, backend, mlist) - g.backend = backend + g.backend = cbackend2.generateCode(g, backend, mlist) # Last pass: Write the rodfiles to disk. The code generator still modifies # their contents right up to this point, so this step currently cannot happen From 32f6f6483a42d24077cf036db9e0c17ce912d35b Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Tue, 4 Jun 2024 23:46:37 +0000 Subject: [PATCH 07/18] cbackend: implement the basic write-to-disk logic It works much like the previous version, but with more generalized support for header files. Compare to before, all the write-to-disk management is now fully handled by the orchestrator, not the code generator (i.e., `cgen`). The compiler compiles again (but the result cannot compile the compiler, for obvious reasons). --- compiler/backend/cbackend.nim | 79 +++++++++++++++++++++++++++++++++-- compiler/backend/cgen2.nim | 48 --------------------- compiler/front/main.nim | 7 +--- 3 files changed, 78 insertions(+), 56 deletions(-) diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim index 9062042f427..a9853085dfe 100644 --- a/compiler/backend/cbackend.nim +++ b/compiler/backend/cbackend.nim @@ -2,14 +2,20 @@ import std/[ + os, tables ], compiler/ast/[ ast_idgen, + ast_types, lineinfos ], compiler/backend/[ - backends + backends, + extccomp + ], + compiler/front/[ + options ], compiler/mir/[ mirbodies, @@ -25,9 +31,20 @@ import compiler/utils/[ containers, idioms, - measure + measure, + pathutils, + ropes ] +# XXX: move toFullPath somewhere else, like ``options`` (where ``ConfigRef`` +# resides) +from compiler/front/msgs import toFullPath, localReport + +# XXX: imports for the legacy reports +import compiler/ast/report_enums +from compiler/ast/reports_sem import SemReport, + reportStr + type ModuleId = FileIndex @@ -47,8 +64,11 @@ type PartialTable = Table[ProcedureId, MirBody] ## Table for holding the incremental procedures - Output* = ref object of RootObj + Output = ref object of RootObj ## The interface with the legacy backend management. + modules: seq[tuple[m: PSym, content: string]] + ## all modules to add to the build, together with their content + headers: seq[tuple[path: AbsoluteFile, content: string]] const NonMagics = {} @@ -103,6 +123,7 @@ proc generateCode*(graph: ModuleGraph, g: sink BModuleList, # TODO: report the used dynamic libraries # TODO: generate a header, if requested + result = Output() # assemble the final C code for each module: for id, m in mlist.modules.pairs: discard assemble(m) @@ -122,3 +143,55 @@ proc generateCode*(graph: ModuleGraph, mlist: sink ModuleList) = # the output is communicated through the module graph graph.backend = generateCode(graph, g, mlist) + +# --------------- +# output handling + +# XXX: consider moving this to a separate module. It's unrelated to code +# generation orchestration + +proc getCFile(config: ConfigRef, m: PSym): AbsoluteFile = + let p = AbsoluteFile toFullPath(config, m.position.FileIndex) + # XXX: toFullPath should return an AbsoluteFile already + result = changeFileExt(completeCfilePath(config, withPackageName(config, p)), + ".nim.c") + +proc writeFile(config: ConfigRef, cfile: Cfile, code: string): bool = + ## Writes `code` to `cfile`, and returns whether the C file needs to be + ## recompiled. + if optForceFullMake notin config.globalOptions: + if not equalsFile(code, cfile.cname): + if not writeRope(code, cfile.cname): + localReport(config, reportStr(rsemCannotOpenFile, cfile.cname.string)) + result = true + elif fileExists(cfile.obj) and + os.fileNewer(cfile.obj.string, cfile.cname.string): + result = false + else: + result = true + else: + if not writeRope(code, cfile.cname): + localReport(config, reportStr(rsemCannotOpenFile, cfile.cname.string)) + result = true + +proc writeModules*(backend: RootRef, config: ConfigRef) = + ## Writes the files previously collected into `backend` to disk and adds + ## them to the final build. + let output = Output backend + for m, code in output.modules.items: + measure("write module") + let cfile = getCFile(config, m) + var cf = Cfile(nimname: m.name.s, cname: cfile, + obj: completeCfilePath(config, toObjFile(config, cfile)), + flags: {}) + + # write to disk: + if not writeFile(config, cf, code): + cf.flags = {CfileFlag.Cached} # already up-to-date + + # add to the build: + addFileToCompile(config, cf) + + for (path, content) in output.headers: + # nothing to add to the compilation; just write header to disk + discard writeRope(content, path) diff --git a/compiler/backend/cgen2.nim b/compiler/backend/cgen2.nim index 02627a31d1e..83c404d8fb2 100644 --- a/compiler/backend/cgen2.nim +++ b/compiler/backend/cgen2.nim @@ -1149,60 +1149,12 @@ proc writeHeader(m: BModule) = if not writeRope(result, m.filename): localReport(m.config, reportStr(rsemCannotOpenFile, m.filename.string)) -proc getCFile(m: BModule): AbsoluteFile = - result = changeFileExt(completeCfilePath(m.config, withPackageName(m.config, m.cfilename)), ".nim.c") - -proc shouldRecompile(m: BModule; code: Rope, cfile: Cfile): bool = - if optForceFullMake notin m.config.globalOptions: - if not moduleHasChanged(m.g.graph, m.module): - result = false - elif not equalsFile(code, cfile.cname): - when false: - #m.config.symbolFiles == readOnlySf: #isDefined(m.config, "nimdiff"): - if fileExists(cfile.cname): - copyFile(cfile.cname.string, cfile.cname.string & ".backup") - echo "diff ", cfile.cname.string, ".backup ", cfile.cname.string - else: - echo "new file ", cfile.cname.string - if not writeRope(code, cfile.cname): - localReport(m.config, reportStr(rsemCannotOpenFile, cfile.cname.string)) - - result = true - elif fileExists(cfile.obj) and os.fileNewer(cfile.obj.string, cfile.cname.string): - result = false - else: - result = true - else: - if not writeRope(code, cfile.cname): - localReport(m.config, reportStr(rsemCannotOpenFile, cfile.cname.string)) - - result = true - proc finalizeModule*(m: BModule) = finishTypeDescriptions(m) proc finalizeMainModule*(m: BModule) = generateThreadVarsSize(m) # TODO: not the job of the code generator -proc writeModule(m: BModule) = - template onExit() = close(m.ndi, m.config) - let cfile = getCFile(m) - var cf = Cfile(nimname: m.module.name.s, cname: cfile, - obj: completeCfilePath(m.config, toObjFile(m.config, cfile)), flags: {}) - var code = genModule(m, cf) - if code != "" or m.config.symbolFiles != disabledSf: - when hasTinyCBackend: - if m.config.cmd == cmdTcc: - tccgen.compileCCode($code, m.config) - onExit() - return - - if not shouldRecompile(m, code, cf): - cf.flags = {CfileFlag.Cached} - - addFileToCompile(m.config, cf) - onExit() - proc cgenWriteModules*(backend: RootRef, config: ConfigRef) = let g = BModuleList(backend) g.config = config diff --git a/compiler/front/main.nim b/compiler/front/main.nim index 42cea67b373..d679d0d3244 100644 --- a/compiler/front/main.nim +++ b/compiler/front/main.nim @@ -44,7 +44,6 @@ import ], compiler/backend/[ extccomp, # Calling C compiler - cgen, # C code generation ], compiler/utils/[ platform, # Target platform data @@ -215,14 +214,12 @@ proc commandCompileToC(graph: ModuleGraph) = prepareForCodegen(graph) if conf.symbolFiles == disabledSf: cbackend2.generateCode(graph, graph.takeModuleList()) - cgenWriteModules(graph.backend, conf) else: if isDefined(conf, "nimIcIntegrityChecks"): checkIntegrity(graph) cbackend.generateCode(graph) - # graph.backend can be nil under IC when nothing changed at all: - if graph.backend != nil: - cgenWriteModules(graph.backend, conf) + + writeModules(graph.backend, conf) if conf.cmd != cmdTcc and graph.backend != nil: extccomp.callCCompiler(conf) extccomp.writeJsonBuildInstructions(conf) From ecd2b304a5435799851981a3d5474ddad7ab815a Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Fri, 14 Jun 2024 14:40:59 +0000 Subject: [PATCH 08/18] cir: use `CIdentifier` `CNode` erroneously used a raw `uint32` for `ident`. --- compiler/backend/cir.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/backend/cir.nim b/compiler/backend/cir.nim index 90f6679a355..3461bdc4834 100644 --- a/compiler/backend/cir.nim +++ b/compiler/backend/cir.nim @@ -100,7 +100,7 @@ type ## Node in a flat tree structure. A node is either atomic or not. Atoms ## have no children nodes. Nodes are layed out in depth first fashion. case kind*: CNodeKind - of cnkIdent: ident*: uint32 + of cnkIdent: ident*: CIdentifier of cnkProcSym: prc*: ProcedureId of cnkGlobalSym: global*: GlobalId of cnkConstSym: cnst*: ConstId From f72ef9449ac5713a4556a983ebb40d480194dc34 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Fri, 14 Jun 2024 14:40:59 +0000 Subject: [PATCH 09/18] sketch out the basic `cgen` interface --- compiler/backend/cgen.nim | 30 ++++++++++++++++++++++++++++++ compiler/backend/cgendata.nim | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 compiler/backend/cgen.nim create mode 100644 compiler/backend/cgendata.nim diff --git a/compiler/backend/cgen.nim b/compiler/backend/cgen.nim new file mode 100644 index 00000000000..4babdf3fe0d --- /dev/null +++ b/compiler/backend/cgen.nim @@ -0,0 +1,30 @@ +## Implements the main interface of the C code generator. Provides the +## routines for translating MIR bodies to CIR. + +import + compiler/backend/[ + cgendata, + cir + ], + compiler/mir/[ + mirbodies, + mirtrees + ] + +proc genDecl*(g: var CodeGenEnv, id: ProcedureId): CAst = + ## Generates the declaration for the given procedure. + +proc genDecl*(g: var CodeGenEnv, id: GlobalId): CAst = + ## Generates the declaration for the given global binding. + +proc genDecl*(g: var CodeGenEnv, id: ConstId): CAst = + ## Generates the declaration for the given constant. + +proc genProc*(g: var CodeGenEnv, id: ProcedureId, body: sink MirBody): CAst = + ## Generates the full C definition for the given procedure, with body `body`. + +proc genGlobal*(g: var CodeGenEnv, id: GlobalId): CAst = + ## Generates the definitions for the given global. + +proc genConst*(g: var CodeGenEnv, id: ConstId, body: MirTree): CAst = + ## Generates the definition for the given constant, with body `body`. diff --git a/compiler/backend/cgendata.nim b/compiler/backend/cgendata.nim new file mode 100644 index 00000000000..76347a247f3 --- /dev/null +++ b/compiler/backend/cgendata.nim @@ -0,0 +1,34 @@ +## Implements the data types shared across the modules that make up the C code +## generator. + +import + compiler/backend/[ + cir + ], + compiler/mir/[ + mirenv + ], + compiler/ic/[ + bitabs + ] + +type + CodeGenEnv* = object + ## Stores all the contextual state needed for C code generation, such as + ## the external data for the CIR. This is generally information that is + ## not local to single procedures. + ## + ## For convenience of the code generator, the ``MirEnv`` instance is also + ## owned by this type. + env*: MirEnv + + idents: BiTable[string] + ## all identifiers + +func getIdent*(env: CodeGenEnv, ident: CIdentifier): lent string = + env.idents[LitId ident] + +func addIdent*(env: var CodeGenEnv, ident: string): CIdentifier = + ## Adds `ident` to the environment and returns the unique ID to later look + ## it up with. + CIdentifier env.idents.getOrIncl(ident) From 0c6761d9737a53764958d1cc57a6114bdf44be46 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Fri, 14 Jun 2024 14:40:59 +0000 Subject: [PATCH 10/18] cgendata: store the entity names in `CodeGenEnv` The simplest solution for now. Moving them to a separate type might be better, but that can happen later. --- compiler/backend/cgendata.nim | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/compiler/backend/cgendata.nim b/compiler/backend/cgendata.nim index 76347a247f3..fbc5bf96b04 100644 --- a/compiler/backend/cgendata.nim +++ b/compiler/backend/cgendata.nim @@ -2,14 +2,21 @@ ## generator. import + std/[ + tables + ], compiler/backend/[ cir ], compiler/mir/[ - mirenv + mirenv, + mirtrees ], compiler/ic/[ bitabs + ], + compiler/utils/[ + containers ] type @@ -25,6 +32,19 @@ type idents: BiTable[string] ## all identifiers + # the names of the various entities are stored here + # XXX: the code generator itself doesn't need access to the names, + # storing them in a separate type might be better architecturally (but + # maybe worse for performance, due to the extra parameter passing?) + procs*: SeqMap[ProcedureId, CIdentifier] + globals*: SeqMap[GlobalId, CIdentifier] + constants*: SeqMap[ConstId, CIdentifier] + # TODO: anonymous constants need to be handled somehow. They use different + # names depending on the module they're place in, so storing them + # here won't work. A separate type for the names is likely the best + # solution + types*: Table[TypeId, CIdentifier] + func getIdent*(env: CodeGenEnv, ident: CIdentifier): lent string = env.idents[LitId ident] From 3f25ef2203e3dbcce82aa865a92442218ffcc1c9 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Fri, 14 Jun 2024 14:41:00 +0000 Subject: [PATCH 11/18] cformat: make the module compile Some field names were outdated. --- compiler/backend/cformat.nim | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/compiler/backend/cformat.nim b/compiler/backend/cformat.nim index 57140c90b98..51b6d41678b 100644 --- a/compiler/backend/cformat.nim +++ b/compiler/backend/cformat.nim @@ -6,7 +6,7 @@ import tables ], compiler/backend/[ - # cgendata, + cgendata, cir ], compiler/mir/[ @@ -42,15 +42,15 @@ proc format(g: CodeGenEnv, ast: CombinedCAst, inc i case n.kind of cnkIdent: - result.add g.getStr(n.ident) + result.add g.getIdent(n.ident) of cnkProcSym: - result.add g.getStr(g.procs[n.prc]) + result.add g.getIdent(g.procs[n.prc]) of cnkGlobalSym: - result.add g.getStr(g.globals[n.global]) + result.add g.getIdent(g.globals[n.global]) of cnkConstSym: - result.add g.getStr(g.consts[n.cnst]) + result.add g.getIdent(g.constants[n.cnst]) of cnkType, cnkWeakType: - result.add g.getStr(g.types[n.typ].name) + result.add g.getIdent(g.types[n.typ]) of cnkCharLit: # TODO: too inefficient result.add '\'' @@ -108,7 +108,7 @@ proc format(g: CodeGenEnv, ast: CombinedCAst, inc i recurse() result.add " " - result.add g.getStr(sym) + result.add g.getIdent(sym) result.add " " recurse() of cnkPrefix: @@ -120,7 +120,7 @@ proc format(g: CodeGenEnv, ast: CombinedCAst, let sym = ast[i].ident inc i recurse() - result.add g.getStr(sym) + result.add g.getIdent(sym) of cnkAsgn: recurse() result.add " = " From 17398c3772a0a29376ac1b1a1e051555d8936ec0 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Fri, 14 Jun 2024 14:41:00 +0000 Subject: [PATCH 12/18] instrument some key procedures This also includes some mid-end processing, like destructor call optimizations, in order to get a better relative feel for where time is spent. --- compiler/backend/backends.nim | 5 +++++ compiler/backend/cbackend.nim | 2 ++ compiler/backend/cformat.nim | 3 +++ compiler/mir/injecthooks.nim | 3 +++ compiler/mir/mirgen.nim | 3 +++ compiler/sem/injectdestructors.nim | 3 +++ 6 files changed, 19 insertions(+) diff --git a/compiler/backend/backends.nim b/compiler/backend/backends.nim index 157300f4aa1..ef716c7df3e 100644 --- a/compiler/backend/backends.nim +++ b/compiler/backend/backends.nim @@ -51,6 +51,8 @@ import idioms ] +import compiler/utils/measure + export TranslationConfig type @@ -319,6 +321,7 @@ proc preprocess*(queue: var WorkQueue, graph: ModuleGraph, idgen: IdGenerator, ## needed for fully processing the procedure. `module` is the module the ## step was queued from: it's used as the module the next processing is ## queued from. + measure("transf") let prc = env[id] if exfDynamicLib in prc.extFlags: # a procedure imported at runtime, it has no body @@ -374,6 +377,7 @@ proc process(body: var MirBody, prc: PSym, graph: ModuleGraph, of backendNimVm: targetVm of backendInvalid: unreachable() + measure("MIR passes") applyPasses(body, prc, env, graph, target) proc translate*(id: ProcedureId, body: PNode, graph: ModuleGraph, @@ -385,6 +389,7 @@ proc translate*(id: ProcedureId, body: PNode, graph: ModuleGraph, let prc = env[id] if optCursorInference in graph.config.options and shouldInjectDestructorCalls(prc): + measure("cursor inference") # TODO: turn cursor inference into a MIR pass and remove this part computeCursors(prc, body, graph) diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim index a9853085dfe..f08bc7c236a 100644 --- a/compiler/backend/cbackend.nim +++ b/compiler/backend/cbackend.nim @@ -82,11 +82,13 @@ proc initModule*(idgen: IdGenerator): BModule = proc processEvent(g: var BModuleList, discovery: DiscoveryData, partial: var PartialTable, evt: sink BackendEvent) = + measure("processEvent") discard proc assemble(m: Module): string = ## Combines the various AST fragments of the module and renders them into ## C code. + measure("assemble") proc generateCode*(graph: ModuleGraph, g: sink BModuleList, mlist: sink ModuleList): Output = diff --git a/compiler/backend/cformat.nim b/compiler/backend/cformat.nim index 51b6d41678b..ddcb261b936 100644 --- a/compiler/backend/cformat.nim +++ b/compiler/backend/cformat.nim @@ -22,6 +22,8 @@ import # have anything to do with options from compiler/front/options import toCChar, makeCString +import compiler/utils/measure + proc format(g: CodeGenEnv, ast: CombinedCAst, result: var string, i: var int) = # efficiency matters! This procedure potentially processes enormous amounts @@ -257,5 +259,6 @@ proc format*(g: CodeGenEnv, ast: CombinedCAst, i: CNodeIndex, result: var string) = ## Formats `ast` starting at `i` into as textual C code, appending the ## result to `result`. + measure("format") var i = ord(i) format(g, ast, result, i) diff --git a/compiler/mir/injecthooks.nim b/compiler/mir/injecthooks.nim index 8c1baca377a..f549d5718d4 100644 --- a/compiler/mir/injecthooks.nim +++ b/compiler/mir/injecthooks.nim @@ -44,6 +44,8 @@ from compiler/sem/injectdestructors import buildVoidCall from compiler/sem/liftdestructors import cyclicType +import compiler/utils/measure + type LocalDiagKind = enum ldkPassCopyToSink ## a copy is introduced in a consume context @@ -292,6 +294,7 @@ proc injectHooks*(body: var MirBody, graph: ModuleGraph, env: var MirEnv, owner: PSym) = ## Adapter for the legacy pass-application pipeline. Once possible, the pass ## needs to be treated as just another MIR pass. + measure("inject hooks") var c = initChangeset(body) injectHooks(body, graph, env, owner, c) body.apply(c) diff --git a/compiler/mir/mirgen.nim b/compiler/mir/mirgen.nim index 1ec2b007290..1d336a25c7d 100644 --- a/compiler/mir/mirgen.nim +++ b/compiler/mir/mirgen.nim @@ -98,6 +98,8 @@ import import std/options as std_options +import compiler/utils/measure + type DestFlag = enum ## Extra information about an assignment destination. The flags are used to @@ -2311,6 +2313,7 @@ proc generateCode*(graph: ModuleGraph, env: var MirEnv, owner: PSym, # XXX: this assertion can currently not be used, as the ``nfTransf`` flag # might no longer be present after the lambdalifting pass #assert nfTransf in body.flags, "transformed AST is expected as input" + measure("AST -> MIR") var c = initCtx(graph, config, owner, move env) c.sp.active = (body, c.sp.map.add(body)) diff --git a/compiler/sem/injectdestructors.nim b/compiler/sem/injectdestructors.nim index 2edee0e4c2c..74996f6fba8 100644 --- a/compiler/sem/injectdestructors.nim +++ b/compiler/sem/injectdestructors.nim @@ -113,6 +113,8 @@ import idioms ] +import compiler/utils/measure + type AnalyseCtx = object cfg: DataFlowGraph @@ -672,6 +674,7 @@ proc injectDestructorCalls*(tree: MirTree, g: ModuleGraph, env: var MirEnv, changes: var Changeset) = ## Collapses sink assignments into either copy or move assignments, and ## injects the destroy operations for all entities requiring destruction. + measure("destructors/sink") block: var actx = AnalyseCtx(graph: g, cfg: computeDfg(tree)) From 4383a81c8337c642c5e80e0e06bcf2253d07aa5d Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Fri, 14 Jun 2024 14:41:01 +0000 Subject: [PATCH 13/18] mirbodies: implement `append` The orchestrator will need it to concatenate partial MIR bodies. --- compiler/mir/mirbodies.nim | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/compiler/mir/mirbodies.nim b/compiler/mir/mirbodies.nim index 1105b95b1ae..e2d6e45c1d5 100644 --- a/compiler/mir/mirbodies.nim +++ b/compiler/mir/mirbodies.nim @@ -2,6 +2,9 @@ ## modifying it. import + std/[ + options + ], compiler/ast/[ ast_types ], @@ -62,3 +65,19 @@ func sourceFor*(body: MirBody, n: NodePosition): PNode {.inline.} = func `[]`*(body: MirBody, id: LocalId): lent Local {.inline.} = ## Returns the local corresponding to `id`. body.locals[id] + +func append*(body: var MirBody, other: sink MirBody): NodePosition = + ## Appends `other` to the end of `body`, returning the start position of + ## `other` in `body`. + result = body.code.len.NodePosition + let start = body.locals.merge(other.locals).get(LocalId 0).uint32 + # update the IDs of all local entities (locals and labels) in the source + # body: + for it in other.code.mitems: + if it.kind == mnkLocal: + uint32(it.local) += start + + # merge the source maps: + merge(body.source, other.code, body.source) + # append the code: + body.code.add other.code From 351d0995ae0c161764c41af46381197daeabdb8f Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Fri, 14 Jun 2024 14:41:01 +0000 Subject: [PATCH 14/18] cbackend: remove `MirEnv` instance from `BModuleList` The MIR environment is owned by the `CodeGenEnv` now. --- compiler/backend/cbackend.nim | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim index f08bc7c236a..ee267be0492 100644 --- a/compiler/backend/cbackend.nim +++ b/compiler/backend/cbackend.nim @@ -12,6 +12,7 @@ import ], compiler/backend/[ backends, + cgendata, extccomp ], compiler/front/[ @@ -57,7 +58,6 @@ type ## The "top level" type for the orchestrator, owning all state related ## to code generation. graph: ModuleGraph - env: MirEnv modules*: OrdinalSeq[ModuleId, BModule] @@ -74,13 +74,13 @@ const NonMagics = {} proc initModuleList*(graph: ModuleGraph, num: Natural): BModuleList = ## Sets up a backend module-list with `num` modules. - result = BModuleList(graph: graph, env: initMirEnv(graph)) + result = BModuleList(graph: graph) result.modules.newSeq(num) proc initModule*(idgen: IdGenerator): BModule = BModule(idgen: idgen) -proc processEvent(g: var BModuleList, discovery: DiscoveryData, +proc processEvent(g: var BModuleList, cg: var CodeGenEnv, partial: var PartialTable, evt: sink BackendEvent) = measure("processEvent") discard @@ -107,12 +107,13 @@ proc generateCode*(graph: ModuleGraph, g: sink BModuleList, config = BackendConfig(tconfig: TranslationConfig(magicsToKeep: NonMagics)) var + cg = CodeGenEnv(env: initMirEnv(graph)) discovery: DiscoveryData partial: PartialTable # discover and generate code for all alive entities: - for evt in process(graph, mlist, g.env, discovery, config): - processEvent(g, discovery, partial, evt) + for evt in process(graph, mlist, cg.env, discovery, config): + processEvent(g, cg, partial, evt) # finish the partial procedures: for id, p in partial.pairs: From d6dec74910dc0ed7141f50827422d450398b632b Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Fri, 14 Jun 2024 14:41:01 +0000 Subject: [PATCH 15/18] cbackend: append assembled C code to output Simple: if assembling produced some code, append it to output list, otherwise don't. In other words, much like before, no C file is created for modules that don't result in any code. --- compiler/backend/cbackend.nim | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim index ee267be0492..78ab9987890 100644 --- a/compiler/backend/cbackend.nim +++ b/compiler/backend/cbackend.nim @@ -129,8 +129,9 @@ proc generateCode*(graph: ModuleGraph, g: sink BModuleList, result = Output() # assemble the final C code for each module: for id, m in mlist.modules.pairs: - discard assemble(m) - # TODO: register in the Output structure + let code = assemble(m) + if code.len > 0: + result.modules.add (m.sym, code) proc generateCode*(graph: ModuleGraph, mlist: sink ModuleList) = ## Entry point for C code generation. Only the C code is generated -- nothing From d94f9f31b408bcbc98dd57edcbd108a2fd9558c8 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Fri, 14 Jun 2024 14:41:02 +0000 Subject: [PATCH 16/18] cbackend: sketch out the data layout/ownership Everything only needed within a single module is stored in `BModule`, things that are shared are stored globally (in `BModuleList`). This keeps the scopes of local entities small, and will make it easy to free memory early (by destroying a `BModule` instance once the C code for it has been generated). --- compiler/backend/cbackend.nim | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim index 78ab9987890..d1e3b420be1 100644 --- a/compiler/backend/cbackend.nim +++ b/compiler/backend/cbackend.nim @@ -13,6 +13,7 @@ import compiler/backend/[ backends, cgendata, + cir, extccomp ], compiler/front/[ @@ -54,6 +55,15 @@ type ## |NimSkull| module, but doesn't necessarily have to. idgen*: IdGenerator + all: CombinedCAst + ## the C AST of everything part of the module: functions, globals, etc. + procs: seq[tuple[id: ProcedureId, body: CNodeIndex]] + ## all procedures attached to the module (except inline procedures) + globals: seq[tuple[id: GlobalId, def: CNodeIndex]] + ## all globals attached to the module + constants: seq[tuple[id: ConstId, def: CNodeIndex]] + ## all constants attached to the module + BModuleList* = object ## The "top level" type for the orchestrator, owning all state related ## to code generation. @@ -61,6 +71,23 @@ type modules*: OrdinalSeq[ModuleId, BModule] + all: CombinedCAst + ## the C AST of everything not directly attached to a single module, + ## such as declarations, inline procedure bodies, etc. + + inline: Table[ProcedureId, CNodeIndex] + ## inline procedure -> body. Inline procedures are emitted into all C + ## TUs they're used in, so their bodies are stored globally + types: Table[TypeId, tuple[hash: Hash; decl, def: CNodeIndex]] + + # the declarations for the various entities are needed across modules. + # They're generated once and are then cached here + procs: SeqMap[ProcedureId, CNodeIndex] + consts: SeqMap[ConstId, CNodeIndex] + globals: SeqMap[GlobalId, CNodeIndex] + data: Table[DataId, tuple[hash: Hash, node: CNodeIndex]] + ## not all data entries need to be used in practice, so a table is used + PartialTable = Table[ProcedureId, MirBody] ## Table for holding the incremental procedures From e03c6a18b9da9c68c0e254e6418abb88ca69f347 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Fri, 14 Jun 2024 14:41:02 +0000 Subject: [PATCH 17/18] cbackend: implement `processEvent` and `assemble` Some details are still missing, but the general flow is there. CIR is generated for the various entities, which is then put into either the global or module-local AST. When all CIR was generated, `assemble` gathers everything the TU needs into a single place and renders the result. --- compiler/backend/cbackend.nim | 207 ++++++++++++++++++++++++++++++++-- compiler/backend/cir.nim | 32 ++++++ 2 files changed, 232 insertions(+), 7 deletions(-) diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim index d1e3b420be1..b6c6f3e4615 100644 --- a/compiler/backend/cbackend.nim +++ b/compiler/backend/cbackend.nim @@ -2,6 +2,9 @@ import std/[ + algorithm, + hashes, + intsets, os, tables ], @@ -12,7 +15,9 @@ import ], compiler/backend/[ backends, + cformat, cgendata, + cgen, cir, extccomp ], @@ -97,8 +102,25 @@ type ## all modules to add to the build, together with their content headers: seq[tuple[path: AbsoluteFile, content: string]] + UniqueId = distinct uint32 + ## 2 bit namespace, 30 bit ID. Combines procedure, global, const, and data + ## IDs into a single ID type. Falls apart if there are ever more than 2^30 + ## entities per namespace, which seems unlikely. + const NonMagics = {} +template toUnique(x: ProcedureId): UniqueId = + UniqueId((0 shl 30) or uint32(x)) +template toUnique(x: GlobalId): UniqueId = + UniqueId((1 shl 30) or uint32(x)) +template toUnique(x: ConstId): UniqueId = + UniqueId((2 shl 30) or uint32(x)) +template toUnique(x: DataId): UniqueId = + UniqueId((3 shl 30) or uint32(x)) + +template module(g: BModuleList, s: PSym): BModule = + g.modules[s.moduleId.FileIndex] + proc initModuleList*(graph: ModuleGraph, num: Natural): BModuleList = ## Sets up a backend module-list with `num` modules. result = BModuleList(graph: graph) @@ -110,12 +132,182 @@ proc initModule*(idgen: IdGenerator): BModule = proc processEvent(g: var BModuleList, cg: var CodeGenEnv, partial: var PartialTable, evt: sink BackendEvent) = measure("processEvent") - discard -proc assemble(m: Module): string = - ## Combines the various AST fragments of the module and renders them into - ## C code. + template append(body: CAst, id, list: untyped) = + let b = body + let m = cg.env[id].moduleId.FileIndex + g.modules[m].list.add (id, g.modules[m].all.append(b)) + + case evt.kind + of bekDiscovered: + if evt.entity.kind == mnkGlobal: + let id = evt.entity.global + append genGlobal(cg, id), id, globals + + of bekModule: + discard "nothing to do" + of bekConstant: + let id = evt.cnst + append genConst(cg, id, cg.env[cg.env.bodies[id]]), id, constants + of bekPartial: + # append to the in-progress body -- code generation happens once complete + discard partial.mgetOrPut(evt.id, MirBody()).append(evt.body) + of bekProcedure: + # TODO: integrate MIR output with ``--showir`` + let code = genProc(cg, evt.id, evt.body) + # TODO: integrate CIR output with ``--showir`` + # TODO: scan the body for referenced types and data; those are generated + # on use + + if cg.env[evt.id].typ.callConv == ccInline: + # add to the global AST + g.inline[evt.id] = g.all.append(code) + else: + append code, evt.id, procs + of bekImported: + # TODO: implement me + discard + +proc assemble(g: BModuleList, cg: CodeGenEnv, m: BModule, + current: ModuleId): string = + ## Gathers everything that needs to be in the final C translation unit (=TU), + ## brings these entities into a stable order, and renders the result into + ## C code. This is the final step for processing module `m`. measure("assemble") + type + StructEnt = tuple[hash: Hash, node: CNodeIndex] + ## global entity; order established by structural hash + GlobalEnt = tuple[item: ItemId, node: CNodeIndex] + ## global entity; order established by module + item ID + LocalEnt = tuple[item: int32, node: CNodeIndex] + ## module-local entity; order established by item ID + + var + fwdTypes: seq[StructEnt] + types: seq[StructEnt] + data: seq[StructEnt] + externDecls: seq[GlobalEnt] + defs: seq[LocalEnt] + fwd: seq[GlobalEnt] + inline: seq[GlobalEnt] + procs: seq[LocalEnt] + + symMarker: PackedSet[UniqueId] + typeFwdMarker, typeMarker: PackedSet[TypeId] + + proc scan(g: BModuleList, cg: CodeGenEnv, ast: CombinedCAst, + n: CNodeIndex) {.closure.} = + # XXX: meh, a closure + template guard(id, body: untyped) = + if not containsOrIncl(symMarker, toUnique id): + body + + # TODO: imported symbols and types, as well as header dependencies need to + # be considered here + for it in all(ast, n): + case it.kind + of cnkWeakType: + # only a forward declaration is needed + if not containsOrIncl(typeFwdMarker, it.typ): + let (hash, _, n) = g.types[it.typ] + fwdTypes.add (hash, n) + of cnkType: + if not containsOrIncl(typeMarker, it.typ): + let (hash, n, _) = g.types[it.typ] + types.add (hash, n) + of cnkProcSym: + let s = cg.env[it.prc] + if s.typ.callConv == ccInline: + guard it.prc: + inline.add (s.itemId, g.inline[it.prc]) + elif cg.env[it.prc].moduleId.ModuleId != current: + guard it.prc: + fwd.add (s.itemId, g.procs[it.prc]) + of cnkGlobalSym: + if cg.env[it.global].moduleId.ModuleId != current: + guard it.global: + externDecls.add (cg.env[it.global].itemId, g.globals[it.global]) + of cnkConstSym: + if it.cnst.isAnon(): + let id = extract(it.cnst) + guard id: + data.add g.data[id] + elif cg.env[it.cnst].moduleId.ModuleId != current: + guard it.cnst: + externDecls.add (cg.env[it.cnst].itemId, g.consts[it.cnst]) + else: + discard "not relevant" + + # add the local entities to the lists and scan them for their dependencies: + template addAll(src, dst: untyped) = + for (id, n) in src.items: + dst.add (cg.env[id].itemId.item, n) + scan(g, cg, m.all, n) + + addAll(m.procs, procs) + addAll(m.globals, defs) + addAll(m.constants, defs) + + # scan the inline procedures for their dependencies (which might discover + # new inline procedure dependencies) + var i = 0 + while i < inline.len: + scan(g, cg, g.all, inline[i][1]) + inc i + + # scan the types: + i = 0 + while i < types.len: + # TODO: use a dedicated scanning procedure; only types can be referenced + # from types + scan(g, cg, g.all, types[i][1]) + inc i + + # TODO: forward declarations for procedures also need to be pulled in here. + # The most simple (and efficient) solution would be emitting one for + # *every* procedure, though this would result in larger artifacts... + + # ------ + # except for function forward declarations, the content of the TU is known + # now. Sort everything + + proc cmp(a, b: LocalEnt): int = a.item - b.item + proc cmp(a, b: StructEnt): int = a.hash - b.hash + proc cmp(a, b: GlobalEnt): int = + if a.item.module == b.item.module: a.item.item - b.item.item + else: a.item.module - b.item.module + + sort(fwdTypes, cmp) + sort(types, cmp) + sort(data, cmp) + sort(externDecls, cmp) + sort(fwd, cmp) + sort(inline, cmp) + sort(defs, cmp) + sort(procs, cmp) + + # ------ + # sorting is done, now format everything + + # TODO: data entries are super special: their name is based on the final + # position in the module, meaning that we can only now compute it. Do + # so + + # TODO: emit the preamble (i.e., "generated by...") + # TODO: emit the includes + + template format(ast: CombinedCAst, list: untyped) = + for (_, it) in list.items: + format(cg, ast, it, result) + + format(g.all, fwdTypes) + format(g.all, types) + format(g.all, data) + format(g.all, externDecls) + format(m.all, defs) + format(g.all, fwd) + format(g.all, inline) + format(m.all, procs) proc generateCode*(graph: ModuleGraph, g: sink BModuleList, mlist: sink ModuleList): Output = @@ -144,8 +336,9 @@ proc generateCode*(graph: ModuleGraph, g: sink BModuleList, # finish the partial procedures: for id, p in partial.pairs: - # TODO: implement me - discard + # generate the code and append to the attached-to module: + let idx = g.module(cg.env[id]).all.append(genProc(cg, id, p)) + g.module(cg.env[id]).procs.add (id, idx) # production of the CIR for all alive entities is done @@ -156,7 +349,7 @@ proc generateCode*(graph: ModuleGraph, g: sink BModuleList, result = Output() # assemble the final C code for each module: for id, m in mlist.modules.pairs: - let code = assemble(m) + let code = assemble(g, cg, g.modules[id], id) if code.len > 0: result.modules.add (m.sym, code) diff --git a/compiler/backend/cir.nim b/compiler/backend/cir.nim index 3461bdc4834..06f3a06d95a 100644 --- a/compiler/backend/cir.nim +++ b/compiler/backend/cir.nim @@ -12,6 +12,8 @@ import mirtrees ] +import compiler/utils/measure + type CNodeKind* = enum cnkIdent ## raw identifier @@ -122,3 +124,33 @@ type CombinedCAst* = seq[CNode] ## Finalized AST where expressions and statement are combined. + +func `==`*(a, b: CNodeIndex): bool {.borrow.} +func `<`*(a, b: CNodeIndex): bool {.borrow.} + +iterator all*(ast: CombinedCAst, start: CNodeIndex): CNode = + ## Returns all nodes in the tree starting at `start`. + var i = uint32(start) + var last = i + while i <= last: + let n = ast[i] + yield n + if ord(n.kind) > ord(cnkExpr): # not an atom? + last += n.len + inc i + +proc append*(a: var CombinedCAst, b: sink CAst): CNodeIndex = + ## Combines the expressions and statements of `b` into a single AST and + ## appends the result to `a`. + measure("append") + let off = a.len.uint32 + for it in b.buf[btExpr].mitems: + if it.kind == cnkExpr: + uint32(it.node) += off + for it in b.buf[btStmt].mitems: + if it.kind == cnkExpr: + uint32(it.node) += off + + a.add b.buf[btExpr] + result = a.len.CNodeIndex + a.add b.buf[btStmt] From 8fde6c1ad6f037eaf6316d00d2399e9475437a80 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Fri, 14 Jun 2024 14:41:02 +0000 Subject: [PATCH 18/18] cgen: emit some placeholder AST The genX procedure are expected to output at least *something*, otherwise sadness ensues, so an empty block is temporarily emitted. --- compiler/backend/cgen.nim | 3 +++ compiler/backend/cir.nim | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/compiler/backend/cgen.nim b/compiler/backend/cgen.nim index 4babdf3fe0d..b22004e2f70 100644 --- a/compiler/backend/cgen.nim +++ b/compiler/backend/cgen.nim @@ -22,9 +22,12 @@ proc genDecl*(g: var CodeGenEnv, id: ConstId): CAst = proc genProc*(g: var CodeGenEnv, id: ProcedureId, body: sink MirBody): CAst = ## Generates the full C definition for the given procedure, with body `body`. + result.add(cnkBlock) proc genGlobal*(g: var CodeGenEnv, id: GlobalId): CAst = ## Generates the definitions for the given global. + result.add(cnkBlock) proc genConst*(g: var CodeGenEnv, id: ConstId, body: MirTree): CAst = ## Generates the definition for the given constant, with body `body`. + result.add(cnkBlock) diff --git a/compiler/backend/cir.nim b/compiler/backend/cir.nim index 06f3a06d95a..e493ab8c985 100644 --- a/compiler/backend/cir.nim +++ b/compiler/backend/cir.nim @@ -154,3 +154,7 @@ proc append*(a: var CombinedCAst, b: sink CAst): CNodeIndex = a.add b.buf[btExpr] result = a.len.CNodeIndex a.add b.buf[btStmt] + +proc add*(ast: var CAst, kind: CNodeKind) = + # XXX: temporary procedure + ast.buf[btStmt].add(CNode(kind: kind))