From 03b1de1b138bee4ce6581839df85de5e01db1f1a Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Tue, 4 Jun 2024 23:46:36 +0000
Subject: [PATCH 01/18] sketch out the initial design for the C IR

---
 compiler/backend/cir.nim | 124 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 compiler/backend/cir.nim

diff --git a/compiler/backend/cir.nim b/compiler/backend/cir.nim
new file mode 100644
index 00000000000..90f6679a355
--- /dev/null
+++ b/compiler/backend/cir.nim
@@ -0,0 +1,124 @@
+## Implements the intermediate representation the C code generator outputs, as
+## well as routines for producing and querying it.
+##
+## The IR is an abstract syntax tree (=AST) representing pre-preprocessor C
+## code. For the convenience of the code generator, the tree doesn't encode
+## only syntax, but also includes some symbol information. Handling all
+## complexities of C's syntax is explicitly a non-goal. Where reasonable,
+## simplification are made.
+
+import
+  compiler/mir/[
+    mirtrees
+  ]
+
+type
+  CNodeKind* = enum
+    cnkIdent     ## raw identifier
+    cnkProcSym   ## procedure symbol
+    cnkGlobalSym ## global location symbol
+    cnkConstSym  ## constant location symbol
+    cnkType      ## strong type reference
+    cnkWeakType  ## weak type reference
+
+    cnkCharLit
+    # C has no signed integer literals, but for convenience, we do
+    cnkIntLit
+    cnkUIntLit
+    cnkFloatLit
+    cnkDoubleLit
+    cnkStrLit
+    cnkVerbatim  ## string that's interpreted as raw piece of C code
+
+    cnkExpr ## local reference to an expression AST
+
+    # ------------ end of atoms
+
+    # expressions:
+    cnkAddrOf    ## (operand: expr)
+    cnkDeref     ## (operand: expr)
+    cnkMember    ## (of: expr, member: ident)
+    cnkPtrMember ## (of: expr, member: ident)
+    cnkArrMember ## (of: expr, index: expr)
+    cnkCast      ## (typ: typeexpr, opr: expr)
+    # XXX: it's likely easier/better to use dedicated node kinds for all
+    #      needed operators
+    cnkPrefix    ## (name: ident, a: expr)
+    cnkInfix     ## (name: ident, a: expr, b: expr)
+    cnkPostfix   ## (name: ident, a: expr)
+
+    cnkCall      ## (callee: expr, args...: expr)
+    cnkTernary   ## (cond: expr, a: expr, b: expr)
+    # an assignment is an expression in C
+    cnkAsgn      ## (lhs: expr, rhs: expr)
+    cnkBraced    ## (items...: expr)
+
+    # statements
+    cnkStmt    ## (e: expr)
+    cnkGoto    ## (label: ident)
+    cnkLabel   ## (label: ident)
+    cnkBlock   ## (body+: stmt)
+    cnkIf      ## (cond: expr, body: stmt)
+    cnkWhile   ## (cond: expr, body: stmt)
+    cnkReturn  ## (operand?: expr)
+    cnkSwitch  ## (selector: expr, branch+:case|default)
+    cnkCase    ## (value: expr, body: stmt)
+    cnkDefault ## (body: stmt)
+    # TODO: an asm statement is missing
+
+    # declaration grammar:
+    # doesn't cover everything that C supports, and also has to support macros
+    cnkSpecList    ## (spec+: spec)
+    cnkDeclaration ## (spec: spec, decl: declarator, init?: expr)
+    cnkDefinition  ## (spec: spec, decl: declarator, body: block)
+    cnkParamDecl   ## (spec: spec, decl: declarator)
+
+    cnkArrayDecl   ## (name: ident, len?: expr)
+    cnkPtrDecl     ## (name: ident)
+    cnkFuncDecl    ## (name: ident, params...: decl)
+    cnkDeclList    ## (decl...: declaration)
+
+    cnkStructSpec  ## (attr?: expr, name?: ident, body: decllist)
+    cnkUnionSpec   ## (attr?: expr, name?: ident, body: decllist)
+
+    # directives:
+    cnkEmit  ## emit(args...: expr|stmt|verbatim)
+
+const
+  cnkAtoms      = {cnkIdent .. cnkExpr}
+  cnkWithNodes  = {low(CNodeKind) .. high(CNodeKind)} - cnkAtoms
+
+  cnkWithNumber = {cnkIntLit, cnkUIntLit, cnkFloatLit, cnkDoubleLit}
+  cnkWithString = {cnkStrLit, cnkVerbatim}
+  cnkWithType   = {cnkWeakType, cnkType}
+
+type
+  CNodeIndex* = distinct uint32
+  CIdentifier* = distinct uint32
+
+  CNode* = object
+    ## Node in a flat tree structure. A node is either atomic or not. Atoms
+    ## have no children nodes. Nodes are layed out in depth first fashion.
+    case kind*: CNodeKind
+    of cnkIdent:      ident*: uint32
+    of cnkProcSym:    prc*: ProcedureId
+    of cnkGlobalSym:  global*: GlobalId
+    of cnkConstSym:   cnst*: ConstId
+    of cnkWithType:   typ*: TypeId
+    of cnkWithString: strId*: StringId
+    of cnkCharLit:    charVal*: char
+    of cnkWithNumber: number*: NumberId
+    of cnkExpr:       node*: CNodeIndex
+    of cnkWithNodes:  len*: uint32
+
+  BufferType = enum
+    btExpr
+    btStmt
+
+  CAst* = object
+    ## In-progress AST. Non-atomic expressions are stored in a separate buffer
+    ## from statements.
+    buf: array[BufferType, seq[CNode]]
+
+  CombinedCAst* = seq[CNode]
+    ## Finalized AST where expressions and statement are combined.

From 2ba2693b272226b3817727d184200b9753a46001 Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Tue, 4 Jun 2024 23:46:36 +0000
Subject: [PATCH 02/18] implement the CIR formatter

There's not much to it. The code could be shortened a bit using
templates, but that can happen at a later point.

The definition of `CodeGenEnv` is hand-waved into the future.
---
 compiler/backend/cformat.nim | 261 +++++++++++++++++++++++++++++++++++
 1 file changed, 261 insertions(+)
 create mode 100644 compiler/backend/cformat.nim

diff --git a/compiler/backend/cformat.nim b/compiler/backend/cformat.nim
new file mode 100644
index 00000000000..57140c90b98
--- /dev/null
+++ b/compiler/backend/cformat.nim
@@ -0,0 +1,261 @@
+## Implements the formatter that turns the CIR (`cir <#cir>`_) into textual C
+## code.
+
+import
+  std/[
+    tables
+  ],
+  compiler/backend/[
+    # cgendata,
+    cir
+  ],
+  compiler/mir/[
+    mirenv,
+    mirtrees
+  ],
+  compiler/utils/[
+    containers,
+    idioms
+  ]
+
+# TODO: move these formatting procedures here (or somewhere else) -- they don't
+#       have anything to do with options
+from compiler/front/options import toCChar, makeCString
+
+proc format(g: CodeGenEnv, ast: CombinedCAst,
+            result: var string, i: var int) =
+  # efficiency matters! This procedure potentially processes enormous amounts
+  # of data, and should thus be as fast as possible
+  # TODO: look into structuring `format` such that the C compiler can use tail
+  #       calls
+  # TODO: indentation is not handled
+  template recurse() =
+    format(g, ast, result, i)
+
+  template foreach(n: CNode, body: untyped) =
+    for i in 0..<n.len:
+      body
+
+  # TODO: parenthesis are currently used to prevent precedence issues. Look into
+  #       some way to efficiently detect where the parenthesis can be omitted
+  let n = ast[i]
+  inc i
+  case n.kind
+  of cnkIdent:
+    result.add g.getStr(n.ident)
+  of cnkProcSym:
+    result.add g.getStr(g.procs[n.prc])
+  of cnkGlobalSym:
+    result.add g.getStr(g.globals[n.global])
+  of cnkConstSym:
+    result.add g.getStr(g.consts[n.cnst])
+  of cnkType, cnkWeakType:
+    result.add g.getStr(g.types[n.typ].name)
+  of cnkCharLit:
+    # TODO: too inefficient
+    result.add '\''
+    toCChar(n.charVal, result)
+    result.add '\''
+  of cnkIntLit:
+    result.addInt g.env.getInt(n.number)
+  of cnkUIntLit:
+    result.addInt g.env.getUInt(n.number)
+  of cnkFloatLit:
+    result.addFloat g.env.getFloat(n.number)
+    result.add "f"
+  of cnkDoubleLit:
+    result.addFloat g.env.getFloat(n.number)
+  of cnkVerbatim:
+    result.add g.env[n.strId]
+  of cnkStrLit:
+    # TODO: improve; don't allocate a separate string
+    result.add makeCString(g.env[n.strId])
+  of cnkExpr:
+    # temporarily move the cursor
+    var i = ord(n.node)
+    recurse()
+
+  # expressions
+  of cnkAddrOf:
+    result.add "(&"
+    recurse()
+    result.add ")"
+  of cnkDeref:
+    result.add "(*"
+    recurse()
+    result.add ")"
+  of cnkMember:
+    recurse()
+    result.add "."
+    recurse()
+  of cnkPtrMember:
+    recurse()
+    result.add "->"
+    recurse()
+  of cnkArrMember:
+    recurse()
+    result.add "["
+    recurse()
+    result.add "]"
+  of cnkCast:
+    result.add "("
+    recurse()
+    result.add ")("
+    recurse()
+    result.add ")"
+  of cnkInfix:
+    let sym = ast[i].ident
+    inc i
+    recurse()
+    result.add " "
+    result.add g.getStr(sym)
+    result.add " "
+    recurse()
+  of cnkPrefix:
+    recurse()
+    result.add "("
+    recurse()
+    result.add ")"
+  of cnkPostfix:
+    let sym = ast[i].ident
+    inc i
+    recurse()
+    result.add g.getStr(sym)
+  of cnkAsgn:
+    recurse()
+    result.add " = "
+    recurse()
+  of cnkCall:
+    recurse()
+    result.add "("
+    for j in 1..<n.len:
+      if j > 1:
+        result.add ", "
+      recurse()
+    result.add ")"
+  of cnkTernary:
+    result.add "("
+    recurse()
+    result.add " ? "
+    recurse()
+    result.add " : "
+    recurse()
+    result.add ")"
+  of cnkBraced:
+    result.add "{"
+    for j in 0..<n.len:
+      if j > 0:
+        result.add ", "
+      recurse()
+    result.add "}"
+
+  # statements:
+  of cnkStmt:
+    recurse()
+    result.add ";\n"
+  of cnkGoto:
+    result.add "goto "
+    recurse()
+    result.add ";\n"
+  of cnkLabel:
+    recurse()
+    result.add ":;\n"
+  of cnkBlock:
+    result.add "{\n"
+    foreach(n):
+      recurse()
+    result.add "}\n"
+  of cnkWhile:
+    result.add "while ("
+    recurse()
+    result.add ") "
+    recurse()
+  of cnkReturn:
+    if n.len == 0:
+      result.add "return;\n"
+    else:
+      result.add "return "
+      recurse()
+      result.add ";\n"
+  of cnkIf:
+    result.add "if ("
+    recurse()
+    result.add ") "
+    recurse()
+  of cnkSwitch:
+    result.add "switch ("
+    recurse()
+    result.add ") {\n"
+    for _ in 1..<n.len:
+      recurse()
+    result.add "}\n"
+  of cnkCase:
+    result.add "case "
+    recurse()
+    result.add ": "
+  of cnkDefault:
+    result.add "default: "
+
+  # declaration grammar:
+  of cnkDeclaration:
+    recurse() # specifiers/qualifiers
+    recurse() # declarator
+    if n.len == 3:
+      # optional initializer
+      result.add " = "
+      recurse()
+    result.add ";\n"
+  of cnkParamDecl:
+    recurse() # specifiers/qualifiers
+    recurse() # name
+  of cnkDefinition:
+    recurse() # specifiers/qualifiers
+    recurse() # function declarator
+    result.add " "
+    recurse() # body
+  of cnkSpecList:
+    foreach(n):
+      recurse()
+      result.add "\n"
+  of cnkFuncDecl:
+    recurse() # name
+    result.add "("
+    for j in 1..<n.len:
+      if j > 1:
+        result.add ", "
+      recurse()
+    result.add ")"
+  of cnkPtrDecl:
+    result.add "*"
+    recurse()
+  of cnkArrayDecl:
+    recurse()
+    result.add "["
+    if n.len == 2:
+      recurse()
+    result.add "]"
+  of cnkDeclList:
+    result.add "{\n"
+    recurse()
+    result.add "}\n"
+  of cnkStructSpec:
+    result.add "struct "
+    foreach(n):
+      recurse()
+  of cnkUnionSpec:
+    result.add "union "
+    foreach(n):
+      recurse()
+
+  # directives:
+  of cnkEmit:
+    # just format whatever is provided as the arguments
+    foreach(n):
+      recurse()
+
+proc format*(g: CodeGenEnv, ast: CombinedCAst, i: CNodeIndex,
+             result: var string) =
+  ## Formats `ast` starting at `i` into as textual C code, appending the
+  ## result to `result`.
+  var i = ord(i)
+  format(g, ast, result, i)

From 6db0d2a007f1d036a5c88db11d4e6d8bff70748b Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Tue, 4 Jun 2024 23:46:36 +0000
Subject: [PATCH 03/18] add some temporary profiling facilities

They're meant to be easy to use and have low overhead.
---
 compiler/utils/measure.nim | 85 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 compiler/utils/measure.nim

diff --git a/compiler/utils/measure.nim b/compiler/utils/measure.nim
new file mode 100644
index 00000000000..30d9f7866f5
--- /dev/null
+++ b/compiler/utils/measure.nim
@@ -0,0 +1,85 @@
+## Temporary profiling facilities.
+
+import std/[monotimes, exitprocs, times, db_sqlite]
+
+type Entry = object
+  name: string
+  count: int
+  time: Duration
+  alloc: AllocStats
+
+var counter {.compileTime.} = 0
+# allocate the sequence on the heap, to make sure it not gets destroyed
+# prior to the exit proc being called
+var storage {.noinit.}: ptr seq[Entry]
+
+template data*(): seq[Entry] =
+  storage[]
+
+proc register(i: int, name: string): int =
+  if storage.isNil:
+    storage = create(seq[Entry])
+
+  data.setLen(max(data.len, i + 1))
+  data[i].name = name
+  i
+
+proc id(name: static string): int =
+  # compute a unique, 0-based ID for the name
+  const x = counter
+  static: inc counter
+  # we use a lifted global for running some ad-hoc code at startup
+  let ignore {.global, used.} = register(x, name)
+  result = x
+
+# the fields are not exported :(
+template alloc*(s: AllocStats): int =
+  cast[ptr array[2, int]](addr s)[][0]
+template dealloc*(s: AllocStats): int =
+  cast[ptr array[2, int]](addr s)[][1]
+
+proc `+=`(a: var AllocStats, b: AllocStats) {.inline.} =
+  a.alloc += b.alloc
+  a.dealloc += b.dealloc
+
+proc finish(id: int, time: Duration, stats: AllocStats) =
+  data[id].alloc += getAllocStats() - stats
+  data[id].time += time
+  inc data[id].count
+
+template measure*(name: static string) =
+  # needs to have as little overhead as possible (e.g., no costly table
+  # lookups)
+  let
+    start = getMonoTime()
+    stats = getAllocStats()
+
+  defer: finish(id(name), getMonoTime() - start, stats)
+
+proc dump() =
+  echo "---- Measurements:"
+  for it in data.items:
+    if it.count > 0:
+      echo "'", it.name, "' took ", (it.time.inMilliseconds.int / 1000), "s (average: ", (it.time.inMicroseconds.int / it.count / 1000), "ms runs: ", it.count, ")"
+      when defined(nimAllocStats):
+        echo "  allocations: ", $it.alloc
+
+  # write to an sqlite DB, for easier analysis later on
+  var db = open("profile.db", "", "", "")
+  db.exec(sql"BEGIN IMMEDIATE TRANSACTION")
+  try:
+    db.exec(sql"CREATE TABLE IF NOT EXISTS runs (id INTEGER PRIMARY KEY, date)")
+    let run = db.tryInsertID(sql"INSERT INTO runs (date) VALUES (?)", now().format("YYYY-MM-dd HH:MM:ss"))
+    doAssert run != -1
+    db.exec(sql"CREATE TABLE IF NOT EXISTS entries (run INTEGER, name, count, total, alloc, dealloc)")
+    for it in data.items:
+      db.exec(sql"INSERT INTO entries (run, name, count, total, alloc, dealloc) VALUES (?,?,?,?,?,?)",
+              run, it.name, it.count, it.time.inMicroseconds.int, it.alloc.alloc, it.alloc.dealloc)
+    db.exec(sql"COMMIT")
+  except:
+    echo "error: ", getCurrentExceptionMsg()
+    db.exec(sql"ROLLBACK")
+  finally:
+    db.close()
+
+addExitProc(proc() = dump())
\ No newline at end of file

From daa5e54e9d98ca598de7eed7c3d0ef628ef41974 Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Tue, 4 Jun 2024 23:46:37 +0000
Subject: [PATCH 04/18] get a clean slate

All relevant C code generator modules are suffixed with a "2", in
order to make room for the new modules. They're not yet removed, so
that their code can still be referenced easily.
---
 compiler/backend/{cbackend.nim => cbackend2.nim}           | 0
 compiler/backend/{ccgcalls.nim => ccgcalls2.nim}           | 0
 compiler/backend/{ccgexprs.nim => ccgexprs2.nim}           | 0
 compiler/backend/{ccgliterals.nim => ccgliterals2.nim}     | 0
 compiler/backend/{ccgstmts.nim => ccgstmts2.nim}           | 0
 compiler/backend/{ccgthreadvars.nim => ccgthreadvars2.nim} | 0
 compiler/backend/{ccgtypes.nim => ccgtypes2.nim}           | 0
 compiler/backend/{cgen.nim => cgen2.nim}                   | 0
 compiler/backend/{cgendata.nim => cgendata2.nim}           | 0
 9 files changed, 0 insertions(+), 0 deletions(-)
 rename compiler/backend/{cbackend.nim => cbackend2.nim} (100%)
 rename compiler/backend/{ccgcalls.nim => ccgcalls2.nim} (100%)
 rename compiler/backend/{ccgexprs.nim => ccgexprs2.nim} (100%)
 rename compiler/backend/{ccgliterals.nim => ccgliterals2.nim} (100%)
 rename compiler/backend/{ccgstmts.nim => ccgstmts2.nim} (100%)
 rename compiler/backend/{ccgthreadvars.nim => ccgthreadvars2.nim} (100%)
 rename compiler/backend/{ccgtypes.nim => ccgtypes2.nim} (100%)
 rename compiler/backend/{cgen.nim => cgen2.nim} (100%)
 rename compiler/backend/{cgendata.nim => cgendata2.nim} (100%)

diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend2.nim
similarity index 100%
rename from compiler/backend/cbackend.nim
rename to compiler/backend/cbackend2.nim
diff --git a/compiler/backend/ccgcalls.nim b/compiler/backend/ccgcalls2.nim
similarity index 100%
rename from compiler/backend/ccgcalls.nim
rename to compiler/backend/ccgcalls2.nim
diff --git a/compiler/backend/ccgexprs.nim b/compiler/backend/ccgexprs2.nim
similarity index 100%
rename from compiler/backend/ccgexprs.nim
rename to compiler/backend/ccgexprs2.nim
diff --git a/compiler/backend/ccgliterals.nim b/compiler/backend/ccgliterals2.nim
similarity index 100%
rename from compiler/backend/ccgliterals.nim
rename to compiler/backend/ccgliterals2.nim
diff --git a/compiler/backend/ccgstmts.nim b/compiler/backend/ccgstmts2.nim
similarity index 100%
rename from compiler/backend/ccgstmts.nim
rename to compiler/backend/ccgstmts2.nim
diff --git a/compiler/backend/ccgthreadvars.nim b/compiler/backend/ccgthreadvars2.nim
similarity index 100%
rename from compiler/backend/ccgthreadvars.nim
rename to compiler/backend/ccgthreadvars2.nim
diff --git a/compiler/backend/ccgtypes.nim b/compiler/backend/ccgtypes2.nim
similarity index 100%
rename from compiler/backend/ccgtypes.nim
rename to compiler/backend/ccgtypes2.nim
diff --git a/compiler/backend/cgen.nim b/compiler/backend/cgen2.nim
similarity index 100%
rename from compiler/backend/cgen.nim
rename to compiler/backend/cgen2.nim
diff --git a/compiler/backend/cgendata.nim b/compiler/backend/cgendata2.nim
similarity index 100%
rename from compiler/backend/cgendata.nim
rename to compiler/backend/cgendata2.nim

From 1aa5d98543a30ab7e569d17b2080f90e64338070 Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Tue, 4 Jun 2024 23:46:37 +0000
Subject: [PATCH 05/18] restore a basic orchestrator skeleton

The general structure is similar to the old `cbackend`, but with two
important differences:
* the global and per-module types are owned by orchestrator now, not
  `cgendata`
* the output (i.e., the C files) are funnelled through a dedicated type
  (`Output`)
---
 compiler/backend/cbackend.nim | 116 ++++++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)
 create mode 100644 compiler/backend/cbackend.nim

diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim
new file mode 100644
index 00000000000..e493a231a99
--- /dev/null
+++ b/compiler/backend/cbackend.nim
@@ -0,0 +1,116 @@
+## The code-generation orchestrator for the C backend.
+
+import
+  std/[
+    tables
+  ],
+  compiler/ast/[
+    ast_idgen,
+    lineinfos
+  ],
+  compiler/backend/[
+    backends
+  ],
+  compiler/mir/[
+    mirbodies,
+    mirenv,
+    mirtrees
+  ],
+  compiler/modules/[
+    modulegraphs
+  ],
+  compiler/sem/[
+    modulelowering
+  ],
+  compiler/utils/[
+    containers,
+    idioms,
+    measure
+  ]
+
+type
+  ModuleId = FileIndex
+
+  BModule = object
+    ## Per-module data. A ``BModule`` instance usually corresponds to a
+    ## |NimSkull| module, but doesn't necessarily have to.
+    idgen: IdGenerator
+
+  BModuleList = object
+    ## The "top level" type for the orchestrator, owning all state related
+    ## to code generation.
+    graph: ModuleGraph
+    env: MirEnv
+
+    modules: OrdinalSeq[ModuleId, BModule]
+
+  PartialTable = Table[ProcedureId, MirBody]
+    ## Table for holding the incremental procedures
+
+  Output* = ref object of RootObj
+    ## The interface with the legacy backend management.
+
+const NonMagics = {}
+
+proc processEvent(g: var BModuleList, discovery: DiscoveryData,
+                  partial: var PartialTable, evt: sink BackendEvent) =
+  discard
+
+proc assemble(m: Module): string =
+  ## Combines the various AST fragments of the module and renders them into
+  ## C code.
+
+proc generateCode*(graph: ModuleGraph, g: sink BModuleList,
+                   mlist: sink ModuleList): Output =
+  ## Implements the main part of the C code-generation orchestrator. Expects an
+  ## already populated ``BModuleList``. Returns the list with all code
+  ## generation artifacts.
+  measure("backend")
+
+  # pre-process the init procedures:
+  for key, m in mlist.modules.pairs:
+    # TODO: assign the external names for the init procedures
+    discard
+
+  # ----- main event processing -----
+  let
+    config = BackendConfig(tconfig: TranslationConfig(magicsToKeep: NonMagics))
+
+  var
+    discovery: DiscoveryData
+    partial:   PartialTable
+
+  # discover and generate code for all alive entities:
+  for evt in process(graph, mlist, g.env, discovery, config):
+    processEvent(g, discovery, partial, evt)
+
+  # finish the partial procedures:
+  for id, p in partial.pairs:
+    # TODO: implement me
+    discard
+
+  # production of the CIR for all alive entities is done
+
+  # TODO: generate the main procedure
+  # TODO: report the used dynamic libraries
+  # TODO: generate a header, if requested
+
+  # assemble the final C code for each module:
+  for id, m in mlist.modules.pairs:
+    discard assemble(m)
+    # TODO: register in the Output structure
+
+proc generateCode*(graph: ModuleGraph, mlist: sink ModuleList) =
+  ## Entry point for C code generation. Only the C code is generated -- nothing
+  ## is written to disk yet.
+  var g = BModuleList(graph: graph, env: initMirEnv(graph))
+
+  # setup the module entries:
+  for key, m in mlist.modules.pairs:
+    # XXX: meh, not a good solution. The list should be setup up-front
+    if m.sym.position >= g.modules.len:
+      setLen(g.modules, m.sym.position + 1)
+    g.modules[key] = BModule(idgen: m.idgen)
+
+  # the output is communicated through the module graph
+  graph.backend = generateCode(graph, g, mlist)

From a014eb6af631bbddcaa22ac153a30a1de7cc5b3a Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Tue, 4 Jun 2024 23:46:37 +0000
Subject: [PATCH 06/18] restore the IC integration

---
 compiler/backend/cbackend.nim | 20 ++++++++++++++------
 compiler/ic/cbackend.nim      | 16 ++++++----------
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim
index e493a231a99..9062042f427 100644
--- a/compiler/backend/cbackend.nim
+++ b/compiler/backend/cbackend.nim
@@ -31,18 +31,18 @@ import
 type
   ModuleId = FileIndex
 
-  BModule = object
+  BModule* = object
     ## Per-module data. A ``BModule`` instance usually corresponds to a
     ## |NimSkull| module, but doesn't necessarily have to.
-    idgen: IdGenerator
+    idgen*: IdGenerator
 
-  BModuleList = object
+  BModuleList* = object
     ## The "top level" type for the orchestrator, owning all state related
     ## to code generation.
     graph: ModuleGraph
     env: MirEnv
 
-    modules: OrdinalSeq[ModuleId, BModule]
+    modules*: OrdinalSeq[ModuleId, BModule]
 
   PartialTable = Table[ProcedureId, MirBody]
     ## Table for holding the incremental procedures
@@ -52,6 +52,14 @@ type
 
 const NonMagics = {}
 
+proc initModuleList*(graph: ModuleGraph, num: Natural): BModuleList =
+  ## Sets up a backend module-list with `num` modules.
+  result = BModuleList(graph: graph, env: initMirEnv(graph))
+  result.modules.newSeq(num)
+
+proc initModule*(idgen: IdGenerator): BModule =
+  BModule(idgen: idgen)
+
 proc processEvent(g: var BModuleList, discovery: DiscoveryData,
                   partial: var PartialTable, evt: sink BackendEvent) =
   discard
@@ -103,14 +111,14 @@ proc generateCode*(graph: ModuleGraph, g: sink BModuleList,
 proc generateCode*(graph: ModuleGraph, mlist: sink ModuleList) =
   ## Entry point for C code generation. Only the C code is generated -- nothing
   ## is written to disk yet.
-  var g = BModuleList(graph: graph, env: initMirEnv(graph))
+  var g = initModuleList(graph, 0)
 
   # setup the module entries:
   for key, m in mlist.modules.pairs:
     # XXX: meh, not a good solution. The list should be setup up-front
     if m.sym.position >= g.modules.len:
       setLen(g.modules, m.sym.position + 1)
-    g.modules[key] = BModule(idgen: m.idgen)
+    g.modules[key] = initModule(m.idgen)
 
   # the output is communicated through the module graph
   graph.backend = generateCode(graph, g, mlist)
diff --git a/compiler/ic/cbackend.nim b/compiler/ic/cbackend.nim
index a36f6d70b6b..364956188e1 100644
--- a/compiler/ic/cbackend.nim
+++ b/compiler/ic/cbackend.nim
@@ -27,11 +27,10 @@ import
     msgs
   ],
   compiler/utils/[
+    containers,
     pathutils
   ],
   compiler/backend/[
-    cgendata,
-    cgen,
     extccomp
   ],
   compiler/ic/[
@@ -52,9 +51,8 @@ proc unpackTree(g: ModuleGraph; thisModule: int;
   var decoder = initPackedDecoder(g.config, g.cache)
   result = loadNodes(decoder, g.packed, thisModule, tree, n)
 
-proc setupBackendModule(g: BModuleList; m: var LoadedModule, alive: AliveSyms) =
-  var bmod = cgen.newModule(g, m.module, g.config)
-  bmod.idgen = idgenFromLoadedModule(m)
+proc setupBackendModule(g: var BModuleList; m: var LoadedModule, alive: AliveSyms) =
+  g.modules[m.module.position.FileIndex] = initModule(idgenFromLoadedModule(m))
 
 proc addFileToLink(config: ConfigRef; m: PSym) {.used.} =
   # XXX: currently unused, but kept in case it is needed again
@@ -142,8 +140,7 @@ proc generateCode*(g: ModuleGraph) =
 
   # setup the module list and allocate space for all existing modules.
   # The slots for unchanged modules stay uninitialized.
-  let backend = cgendata.newModuleList(g)
-  backend.modules.setLen(g.packed.len)
+  var backend = initModuleList(g, g.packed.len)
 
   # Second pass: Setup all the backend modules for all the modules that have
   # changed:
@@ -177,7 +174,7 @@ proc generateCode*(g: ModuleGraph) =
 
     let
       pos = m.module.position
-      c = pass.open(g, m.module, backend.modules[pos].idgen)
+      c = pass.open(g, m.module, backend.modules[pos.FileIndex].idgen)
     for p in allNodes(m.fromDisk.topLevel):
       let n = unpackTree(g, pos, m.fromDisk.topLevel, p)
       discard pass.process(c, n)
@@ -196,8 +193,7 @@ proc generateCode*(g: ModuleGraph) =
       break
 
   # Fourth pass: Generate the code:
-  cbackend2.generateCode(g, backend, mlist)
-  g.backend = backend
+  g.backend = cbackend2.generateCode(g, backend, mlist)
 
   # Last pass: Write the rodfiles to disk. The code generator still modifies
   # their contents right up to this point, so this step currently cannot happen

From 32f6f6483a42d24077cf036db9e0c17ce912d35b Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Tue, 4 Jun 2024 23:46:37 +0000
Subject: [PATCH 07/18] cbackend: implement the basic write-to-disk logic

It works much like the previous version, but with more generalized
support for header files. Compare to before, all the write-to-disk
management is now fully handled by the orchestrator, not the code
generator (i.e., `cgen`).

The compiler compiles again (but the result cannot compile the
compiler, for obvious reasons).
---
 compiler/backend/cbackend.nim | 79 +++++++++++++++++++++++++++++++++--
 compiler/backend/cgen2.nim    | 48 ---------------------
 compiler/front/main.nim       |  7 +---
 3 files changed, 78 insertions(+), 56 deletions(-)

diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim
index 9062042f427..a9853085dfe 100644
--- a/compiler/backend/cbackend.nim
+++ b/compiler/backend/cbackend.nim
@@ -2,14 +2,20 @@
 
 import
   std/[
+    os,
     tables
   ],
   compiler/ast/[
     ast_idgen,
+    ast_types,
     lineinfos
   ],
   compiler/backend/[
-    backends
+    backends,
+    extccomp
+  ],
+  compiler/front/[
+    options
   ],
   compiler/mir/[
     mirbodies,
@@ -25,9 +31,20 @@ import
   compiler/utils/[
     containers,
     idioms,
-    measure
+    measure,
+    pathutils,
+    ropes
   ]
 
+# XXX: move toFullPath somewhere else, like ``options`` (where ``ConfigRef``
+#      resides)
+from compiler/front/msgs import toFullPath, localReport
+
+# XXX: imports for the legacy reports
+import compiler/ast/report_enums
+from compiler/ast/reports_sem import SemReport,
+  reportStr
+
 type
   ModuleId = FileIndex
 
@@ -47,8 +64,11 @@ type
   PartialTable = Table[ProcedureId, MirBody]
     ## Table for holding the incremental procedures
 
-  Output* = ref object of RootObj
+  Output = ref object of RootObj
     ## The interface with the legacy backend management.
+    modules: seq[tuple[m: PSym, content: string]]
+      ## all modules to add to the build, together with their content
+    headers: seq[tuple[path: AbsoluteFile, content: string]]
 
 const NonMagics = {}
 
@@ -103,6 +123,7 @@ proc generateCode*(graph: ModuleGraph, g: sink BModuleList,
   # TODO: report the used dynamic libraries
   # TODO: generate a header, if requested
 
+  result = Output()
   # assemble the final C code for each module:
   for id, m in mlist.modules.pairs:
     discard assemble(m)
@@ -122,3 +143,55 @@ proc generateCode*(graph: ModuleGraph, mlist: sink ModuleList) =
 
   # the output is communicated through the module graph
   graph.backend = generateCode(graph, g, mlist)
+
+# ---------------
+# output handling
+
+# XXX: consider moving this to a separate module. It's unrelated to code
+#      generation orchestration
+
+proc getCFile(config: ConfigRef, m: PSym): AbsoluteFile =
+  let p = AbsoluteFile toFullPath(config, m.position.FileIndex)
+  # XXX: toFullPath should return an AbsoluteFile already
+  result = changeFileExt(completeCfilePath(config, withPackageName(config, p)),
+                         ".nim.c")
+
+proc writeFile(config: ConfigRef, cfile: Cfile, code: string): bool =
+  ## Writes `code` to `cfile`, and returns whether the C file needs to be
+  ## recompiled.
+  if optForceFullMake notin config.globalOptions:
+    if not equalsFile(code, cfile.cname):
+      if not writeRope(code, cfile.cname):
+        localReport(config, reportStr(rsemCannotOpenFile, cfile.cname.string))
+      result = true
+    elif fileExists(cfile.obj) and
+         os.fileNewer(cfile.obj.string, cfile.cname.string):
+      result = false
+    else:
+      result = true
+  else:
+    if not writeRope(code, cfile.cname):
+      localReport(config, reportStr(rsemCannotOpenFile, cfile.cname.string))
+    result = true
+
+proc writeModules*(backend: RootRef, config: ConfigRef) =
+  ## Writes the files previously collected into `backend` to disk and adds
+  ## them to the final build.
+  let output = Output backend
+  for m, code in output.modules.items:
+    measure("write module")
+    let cfile = getCFile(config, m)
+    var cf = Cfile(nimname: m.name.s, cname: cfile,
+                   obj: completeCfilePath(config, toObjFile(config, cfile)),
+                   flags: {})
+
+    # write to disk:
+    if not writeFile(config, cf, code):
+      cf.flags = {CfileFlag.Cached} # already up-to-date
+
+    # add to the build:
+    addFileToCompile(config, cf)
+
+  for (path, content) in output.headers:
+    # nothing to add to the compilation; just write header to disk
+    discard writeRope(content, path)
diff --git a/compiler/backend/cgen2.nim b/compiler/backend/cgen2.nim
index 02627a31d1e..83c404d8fb2 100644
--- a/compiler/backend/cgen2.nim
+++ b/compiler/backend/cgen2.nim
@@ -1149,60 +1149,12 @@ proc writeHeader(m: BModule) =
   if not writeRope(result, m.filename):
     localReport(m.config, reportStr(rsemCannotOpenFile, m.filename.string))
 
-proc getCFile(m: BModule): AbsoluteFile =
-  result = changeFileExt(completeCfilePath(m.config, withPackageName(m.config, m.cfilename)), ".nim.c")
-
-proc shouldRecompile(m: BModule; code: Rope, cfile: Cfile): bool =
-  if optForceFullMake notin m.config.globalOptions:
-    if not moduleHasChanged(m.g.graph, m.module):
-      result = false
-    elif not equalsFile(code, cfile.cname):
-      when false:
-        #m.config.symbolFiles == readOnlySf: #isDefined(m.config, "nimdiff"):
-        if fileExists(cfile.cname):
-          copyFile(cfile.cname.string, cfile.cname.string & ".backup")
-          echo "diff ", cfile.cname.string, ".backup ", cfile.cname.string
-        else:
-          echo "new file ", cfile.cname.string
-      if not writeRope(code, cfile.cname):
-        localReport(m.config, reportStr(rsemCannotOpenFile, cfile.cname.string))
-
-      result = true
-    elif fileExists(cfile.obj) and os.fileNewer(cfile.obj.string, cfile.cname.string):
-      result = false
-    else:
-      result = true
-  else:
-    if not writeRope(code, cfile.cname):
-      localReport(m.config, reportStr(rsemCannotOpenFile, cfile.cname.string))
-
-    result = true
-
 proc finalizeModule*(m: BModule) =
   finishTypeDescriptions(m)
 
 proc finalizeMainModule*(m: BModule) =
   generateThreadVarsSize(m) # TODO: not the job of the code generator
 
-proc writeModule(m: BModule) =
-  template onExit() = close(m.ndi, m.config)
-  let cfile = getCFile(m)
-  var cf = Cfile(nimname: m.module.name.s, cname: cfile,
-                  obj: completeCfilePath(m.config, toObjFile(m.config, cfile)), flags: {})
-  var code = genModule(m, cf)
-  if code != "" or m.config.symbolFiles != disabledSf:
-    when hasTinyCBackend:
-      if m.config.cmd == cmdTcc:
-        tccgen.compileCCode($code, m.config)
-        onExit()
-        return
-
-    if not shouldRecompile(m, code, cf):
-      cf.flags = {CfileFlag.Cached}
-
-    addFileToCompile(m.config, cf)
-  onExit()
-
 proc cgenWriteModules*(backend: RootRef, config: ConfigRef) =
   let g = BModuleList(backend)
   g.config = config
diff --git a/compiler/front/main.nim b/compiler/front/main.nim
index 42cea67b373..d679d0d3244 100644
--- a/compiler/front/main.nim
+++ b/compiler/front/main.nim
@@ -44,7 +44,6 @@ import
   ],
   compiler/backend/[
     extccomp,    # Calling C compiler
-    cgen,        # C code generation
   ],
   compiler/utils/[
     platform,    # Target platform data
@@ -215,14 +214,12 @@ proc commandCompileToC(graph: ModuleGraph) =
   prepareForCodegen(graph)
   if conf.symbolFiles == disabledSf:
     cbackend2.generateCode(graph, graph.takeModuleList())
-    cgenWriteModules(graph.backend, conf)
   else:
     if isDefined(conf, "nimIcIntegrityChecks"):
       checkIntegrity(graph)
     cbackend.generateCode(graph)
-    # graph.backend can be nil under IC when nothing changed at all:
-    if graph.backend != nil:
-      cgenWriteModules(graph.backend, conf)
+
+  writeModules(graph.backend, conf)
   if conf.cmd != cmdTcc and graph.backend != nil:
     extccomp.callCCompiler(conf)
     extccomp.writeJsonBuildInstructions(conf)

From ecd2b304a5435799851981a3d5474ddad7ab815a Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Fri, 14 Jun 2024 14:40:59 +0000
Subject: [PATCH 08/18] cir: use `CIdentifier`

`CNode` erroneously used a raw `uint32` for `ident`.
---
 compiler/backend/cir.nim | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler/backend/cir.nim b/compiler/backend/cir.nim
index 90f6679a355..3461bdc4834 100644
--- a/compiler/backend/cir.nim
+++ b/compiler/backend/cir.nim
@@ -100,7 +100,7 @@ type
     ## Node in a flat tree structure. A node is either atomic or not. Atoms
     ## have no children nodes. Nodes are layed out in depth first fashion.
     case kind*: CNodeKind
-    of cnkIdent:      ident*: uint32
+    of cnkIdent:      ident*: CIdentifier
     of cnkProcSym:    prc*: ProcedureId
     of cnkGlobalSym:  global*: GlobalId
     of cnkConstSym:   cnst*: ConstId

From f72ef9449ac5713a4556a983ebb40d480194dc34 Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Fri, 14 Jun 2024 14:40:59 +0000
Subject: [PATCH 09/18] sketch out the basic `cgen` interface

---
 compiler/backend/cgen.nim     | 30 ++++++++++++++++++++++++++++++
 compiler/backend/cgendata.nim | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)
 create mode 100644 compiler/backend/cgen.nim
 create mode 100644 compiler/backend/cgendata.nim

diff --git a/compiler/backend/cgen.nim b/compiler/backend/cgen.nim
new file mode 100644
index 00000000000..4babdf3fe0d
--- /dev/null
+++ b/compiler/backend/cgen.nim
@@ -0,0 +1,30 @@
+## Implements the main interface of the C code generator. Provides the
+## routines for translating MIR bodies to CIR.
+
+import
+  compiler/backend/[
+    cgendata,
+    cir
+  ],
+  compiler/mir/[
+    mirbodies,
+    mirtrees
+  ]
+
+proc genDecl*(g: var CodeGenEnv, id: ProcedureId): CAst =
+  ## Generates the declaration for the given procedure.
+
+proc genDecl*(g: var CodeGenEnv, id: GlobalId): CAst =
+  ## Generates the declaration for the given global binding.
+
+proc genDecl*(g: var CodeGenEnv, id: ConstId): CAst =
+  ## Generates the declaration for the given constant.
+
+proc genProc*(g: var CodeGenEnv, id: ProcedureId, body: sink MirBody): CAst =
+  ## Generates the full C definition for the given procedure, with body `body`.
+
+proc genGlobal*(g: var CodeGenEnv, id: GlobalId): CAst =
+  ## Generates the definitions for the given global.
+
+proc genConst*(g: var CodeGenEnv, id: ConstId, body: MirTree): CAst =
+  ## Generates the definition for the given constant, with body `body`.
diff --git a/compiler/backend/cgendata.nim b/compiler/backend/cgendata.nim
new file mode 100644
index 00000000000..76347a247f3
--- /dev/null
+++ b/compiler/backend/cgendata.nim
@@ -0,0 +1,34 @@
+## Implements the data types shared across the modules that make up the C code
+## generator.
+
+import
+  compiler/backend/[
+    cir
+  ],
+  compiler/mir/[
+    mirenv
+  ],
+  compiler/ic/[
+    bitabs
+  ]
+
+type
+  CodeGenEnv* = object
+    ## Stores all the contextual state needed for C code generation, such as
+    ## the external data for the CIR. This is generally information that is
+    ## not local to single procedures.
+    ##
+    ## For convenience of the code generator, the ``MirEnv`` instance is also
+    ## owned by this type.
+    env*: MirEnv
+
+    idents: BiTable[string]
+      ## all identifiers
+
+func getIdent*(env: CodeGenEnv, ident: CIdentifier): lent string =
+  env.idents[LitId ident]
+
+func addIdent*(env: var CodeGenEnv, ident: string): CIdentifier =
+  ## Adds `ident` to the environment and returns the unique ID to later look
+  ## it up with.
+  CIdentifier env.idents.getOrIncl(ident)

From 0c6761d9737a53764958d1cc57a6114bdf44be46 Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Fri, 14 Jun 2024 14:40:59 +0000
Subject: [PATCH 10/18] cgendata: store the entity names in `CodeGenEnv`

The simplest solution for now. Moving them to a separate type might be
better, but that can happen later.
---
 compiler/backend/cgendata.nim | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/compiler/backend/cgendata.nim b/compiler/backend/cgendata.nim
index 76347a247f3..fbc5bf96b04 100644
--- a/compiler/backend/cgendata.nim
+++ b/compiler/backend/cgendata.nim
@@ -2,14 +2,21 @@
 ## generator.
 
 import
+  std/[
+    tables
+  ],
   compiler/backend/[
     cir
   ],
   compiler/mir/[
-    mirenv
+    mirenv,
+    mirtrees
   ],
   compiler/ic/[
     bitabs
+  ],
+  compiler/utils/[
+    containers
   ]
 
 type
@@ -25,6 +32,19 @@ type
     idents: BiTable[string]
       ## all identifiers
 
+    # the names of the various entities are stored here
+    # XXX: the code generator itself doesn't need access to the names,
+    #      storing them in a separate type might be better architecturally (but
+    #      maybe worse for performance, due to the extra parameter passing?)
+    procs*: SeqMap[ProcedureId, CIdentifier]
+    globals*: SeqMap[GlobalId, CIdentifier]
+    constants*: SeqMap[ConstId, CIdentifier]
+    # TODO: anonymous constants need to be handled somehow. They use different
+    #       names depending on the module they're place in, so storing them
+    #       here won't work. A separate type for the names is likely the best
+    #       solution
+    types*: Table[TypeId, CIdentifier]
+
 func getIdent*(env: CodeGenEnv, ident: CIdentifier): lent string =
   env.idents[LitId ident]
 

From 3f25ef2203e3dbcce82aa865a92442218ffcc1c9 Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Fri, 14 Jun 2024 14:41:00 +0000
Subject: [PATCH 11/18] cformat: make the module compile

Some field names were outdated.
---
 compiler/backend/cformat.nim | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/compiler/backend/cformat.nim b/compiler/backend/cformat.nim
index 57140c90b98..51b6d41678b 100644
--- a/compiler/backend/cformat.nim
+++ b/compiler/backend/cformat.nim
@@ -6,7 +6,7 @@ import
     tables
   ],
   compiler/backend/[
-    # cgendata,
+    cgendata,
     cir
   ],
   compiler/mir/[
@@ -42,15 +42,15 @@ proc format(g: CodeGenEnv, ast: CombinedCAst,
   inc i
   case n.kind
   of cnkIdent:
-    result.add g.getStr(n.ident)
+    result.add g.getIdent(n.ident)
   of cnkProcSym:
-    result.add g.getStr(g.procs[n.prc])
+    result.add g.getIdent(g.procs[n.prc])
   of cnkGlobalSym:
-    result.add g.getStr(g.globals[n.global])
+    result.add g.getIdent(g.globals[n.global])
   of cnkConstSym:
-    result.add g.getStr(g.consts[n.cnst])
+    result.add g.getIdent(g.constants[n.cnst])
   of cnkType, cnkWeakType:
-    result.add g.getStr(g.types[n.typ].name)
+    result.add g.getIdent(g.types[n.typ])
   of cnkCharLit:
     # TODO: too inefficient
     result.add '\''
@@ -108,7 +108,7 @@ proc format(g: CodeGenEnv, ast: CombinedCAst,
     inc i
     recurse()
     result.add " "
-    result.add g.getStr(sym)
+    result.add g.getIdent(sym)
     result.add " "
     recurse()
   of cnkPrefix:
@@ -120,7 +120,7 @@ proc format(g: CodeGenEnv, ast: CombinedCAst,
     let sym = ast[i].ident
     inc i
     recurse()
-    result.add g.getStr(sym)
+    result.add g.getIdent(sym)
   of cnkAsgn:
     recurse()
     result.add " = "

From 17398c3772a0a29376ac1b1a1e051555d8936ec0 Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Fri, 14 Jun 2024 14:41:00 +0000
Subject: [PATCH 12/18] instrument some key procedures

This also includes some mid-end processing, like destructor call
optimizations, in order to get a better relative feel for where time is
spent.
---
 compiler/backend/backends.nim      | 5 +++++
 compiler/backend/cbackend.nim      | 2 ++
 compiler/backend/cformat.nim       | 3 +++
 compiler/mir/injecthooks.nim       | 3 +++
 compiler/mir/mirgen.nim            | 3 +++
 compiler/sem/injectdestructors.nim | 3 +++
 6 files changed, 19 insertions(+)

diff --git a/compiler/backend/backends.nim b/compiler/backend/backends.nim
index 157300f4aa1..ef716c7df3e 100644
--- a/compiler/backend/backends.nim
+++ b/compiler/backend/backends.nim
@@ -51,6 +51,8 @@ import
     idioms
   ]
 
+import compiler/utils/measure
+
 export TranslationConfig
 
 type
@@ -319,6 +321,7 @@ proc preprocess*(queue: var WorkQueue, graph: ModuleGraph, idgen: IdGenerator,
   ## needed for fully processing the procedure. `module` is the module the
   ## step was queued from: it's used as the module the next processing is
   ## queued from.
+  measure("transf")
   let prc = env[id]
   if exfDynamicLib in prc.extFlags:
     # a procedure imported at runtime, it has no body
@@ -374,6 +377,7 @@ proc process(body: var MirBody, prc: PSym, graph: ModuleGraph,
     of backendNimVm:   targetVm
     of backendInvalid: unreachable()
 
+  measure("MIR passes")
   applyPasses(body, prc, env, graph, target)
 
 proc translate*(id: ProcedureId, body: PNode, graph: ModuleGraph,
@@ -385,6 +389,7 @@ proc translate*(id: ProcedureId, body: PNode, graph: ModuleGraph,
   let prc = env[id]
   if optCursorInference in graph.config.options and
       shouldInjectDestructorCalls(prc):
+    measure("cursor inference")
     # TODO: turn cursor inference into a MIR pass and remove this part
     computeCursors(prc, body, graph)
 
diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim
index a9853085dfe..f08bc7c236a 100644
--- a/compiler/backend/cbackend.nim
+++ b/compiler/backend/cbackend.nim
@@ -82,11 +82,13 @@ proc initModule*(idgen: IdGenerator): BModule =
 
 proc processEvent(g: var BModuleList, discovery: DiscoveryData,
                   partial: var PartialTable, evt: sink BackendEvent) =
+  measure("processEvent")
   discard
 
 proc assemble(m: Module): string =
   ## Combines the various AST fragments of the module and renders them into
   ## C code.
+  measure("assemble")
 
 proc generateCode*(graph: ModuleGraph, g: sink BModuleList,
                    mlist: sink ModuleList): Output =
diff --git a/compiler/backend/cformat.nim b/compiler/backend/cformat.nim
index 51b6d41678b..ddcb261b936 100644
--- a/compiler/backend/cformat.nim
+++ b/compiler/backend/cformat.nim
@@ -22,6 +22,8 @@ import
 #       have anything to do with options
 from compiler/front/options import toCChar, makeCString
 
+import compiler/utils/measure
+
 proc format(g: CodeGenEnv, ast: CombinedCAst,
             result: var string, i: var int) =
   # efficiency matters! This procedure potentially processes enormous amounts
@@ -257,5 +259,6 @@ proc format*(g: CodeGenEnv, ast: CombinedCAst, i: CNodeIndex,
              result: var string) =
   ## Formats `ast` starting at `i` into as textual C code, appending the
   ## result to `result`.
+  measure("format")
   var i = ord(i)
   format(g, ast, result, i)
diff --git a/compiler/mir/injecthooks.nim b/compiler/mir/injecthooks.nim
index 8c1baca377a..f549d5718d4 100644
--- a/compiler/mir/injecthooks.nim
+++ b/compiler/mir/injecthooks.nim
@@ -44,6 +44,8 @@ from compiler/sem/injectdestructors import buildVoidCall
 
 from compiler/sem/liftdestructors import cyclicType
 
+import compiler/utils/measure
+
 type
   LocalDiagKind = enum
     ldkPassCopyToSink       ## a copy is introduced in a consume context
@@ -292,6 +294,7 @@ proc injectHooks*(body: var MirBody, graph: ModuleGraph, env: var MirEnv,
                   owner: PSym) =
   ## Adapter for the legacy pass-application pipeline. Once possible, the pass
   ## needs to be treated as just another MIR pass.
+  measure("inject hooks")
   var c = initChangeset(body)
   injectHooks(body, graph, env, owner, c)
   body.apply(c)
diff --git a/compiler/mir/mirgen.nim b/compiler/mir/mirgen.nim
index 1ec2b007290..1d336a25c7d 100644
--- a/compiler/mir/mirgen.nim
+++ b/compiler/mir/mirgen.nim
@@ -98,6 +98,8 @@ import
 
 import std/options as std_options
 
+import compiler/utils/measure
+
 type
   DestFlag = enum
     ## Extra information about an assignment destination. The flags are used to
@@ -2311,6 +2313,7 @@ proc generateCode*(graph: ModuleGraph, env: var MirEnv, owner: PSym,
   # XXX: this assertion can currently not be used, as the ``nfTransf`` flag
   #      might no longer be present after the lambdalifting pass
   #assert nfTransf in body.flags, "transformed AST is expected as input"
+  measure("AST -> MIR")
 
   var c = initCtx(graph, config, owner, move env)
   c.sp.active = (body, c.sp.map.add(body))
diff --git a/compiler/sem/injectdestructors.nim b/compiler/sem/injectdestructors.nim
index 2edee0e4c2c..74996f6fba8 100644
--- a/compiler/sem/injectdestructors.nim
+++ b/compiler/sem/injectdestructors.nim
@@ -113,6 +113,8 @@ import
     idioms
   ]
 
+import compiler/utils/measure
+
 type
   AnalyseCtx = object
     cfg: DataFlowGraph
@@ -672,6 +674,7 @@ proc injectDestructorCalls*(tree: MirTree, g: ModuleGraph, env: var MirEnv,
                             changes: var Changeset) =
   ## Collapses sink assignments into either copy or move assignments, and
   ## injects the destroy operations for all entities requiring destruction.
+  measure("destructors/sink")
   block:
     var
       actx = AnalyseCtx(graph: g, cfg: computeDfg(tree))

From 4383a81c8337c642c5e80e0e06bcf2253d07aa5d Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Fri, 14 Jun 2024 14:41:01 +0000
Subject: [PATCH 13/18] mirbodies: implement `append`

The orchestrator will need it to concatenate partial MIR bodies.
---
 compiler/mir/mirbodies.nim | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/compiler/mir/mirbodies.nim b/compiler/mir/mirbodies.nim
index 1105b95b1ae..e2d6e45c1d5 100644
--- a/compiler/mir/mirbodies.nim
+++ b/compiler/mir/mirbodies.nim
@@ -2,6 +2,9 @@
 ## modifying it.
 
 import
+  std/[
+    options
+  ],
   compiler/ast/[
     ast_types
   ],
@@ -62,3 +65,19 @@ func sourceFor*(body: MirBody, n: NodePosition): PNode {.inline.} =
 func `[]`*(body: MirBody, id: LocalId): lent Local {.inline.} =
   ## Returns the local corresponding to `id`.
   body.locals[id]
+
+func append*(body: var MirBody, other: sink MirBody): NodePosition =
+  ## Appends `other` to the end of `body`, returning the start position of
+  ## `other` in `body`.
+  result = body.code.len.NodePosition
+  let start = body.locals.merge(other.locals).get(LocalId 0).uint32
+  # update the IDs of all local entities (locals and labels) in the source
+  # body:
+  for it in other.code.mitems:
+    if it.kind == mnkLocal:
+      uint32(it.local) += start
+
+  # merge the source maps:
+  merge(body.source, other.code, body.source)
+  # append the code:
+  body.code.add other.code

From 351d0995ae0c161764c41af46381197daeabdb8f Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Fri, 14 Jun 2024 14:41:01 +0000
Subject: [PATCH 14/18] cbackend: remove `MirEnv` instance from `BModuleList`

The MIR environment is owned by the `CodeGenEnv` now.
---
 compiler/backend/cbackend.nim | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim
index f08bc7c236a..ee267be0492 100644
--- a/compiler/backend/cbackend.nim
+++ b/compiler/backend/cbackend.nim
@@ -12,6 +12,7 @@ import
   ],
   compiler/backend/[
     backends,
+    cgendata,
     extccomp
   ],
   compiler/front/[
@@ -57,7 +58,6 @@ type
     ## The "top level" type for the orchestrator, owning all state related
     ## to code generation.
     graph: ModuleGraph
-    env: MirEnv
 
     modules*: OrdinalSeq[ModuleId, BModule]
 
@@ -74,13 +74,13 @@ const NonMagics = {}
 
 proc initModuleList*(graph: ModuleGraph, num: Natural): BModuleList =
   ## Sets up a backend module-list with `num` modules.
-  result = BModuleList(graph: graph, env: initMirEnv(graph))
+  result = BModuleList(graph: graph)
   result.modules.newSeq(num)
 
 proc initModule*(idgen: IdGenerator): BModule =
   BModule(idgen: idgen)
 
-proc processEvent(g: var BModuleList, discovery: DiscoveryData,
+proc processEvent(g: var BModuleList, cg: var CodeGenEnv,
                   partial: var PartialTable, evt: sink BackendEvent) =
   measure("processEvent")
   discard
@@ -107,12 +107,13 @@ proc generateCode*(graph: ModuleGraph, g: sink BModuleList,
     config = BackendConfig(tconfig: TranslationConfig(magicsToKeep: NonMagics))
 
   var
+    cg = CodeGenEnv(env: initMirEnv(graph))
     discovery: DiscoveryData
     partial:   PartialTable
 
   # discover and generate code for all alive entities:
-  for evt in process(graph, mlist, g.env, discovery, config):
-    processEvent(g, discovery, partial, evt)
+  for evt in process(graph, mlist, cg.env, discovery, config):
+    processEvent(g, cg, partial, evt)
 
   # finish the partial procedures:
   for id, p in partial.pairs:

From d6dec74910dc0ed7141f50827422d450398b632b Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Fri, 14 Jun 2024 14:41:01 +0000
Subject: [PATCH 15/18] cbackend: append assembled C code to output

Simple: if assembling produced some code, append it to output list,
otherwise don't. In other words, much like before, no C file is created
for modules that don't result in any code.
---
 compiler/backend/cbackend.nim | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim
index ee267be0492..78ab9987890 100644
--- a/compiler/backend/cbackend.nim
+++ b/compiler/backend/cbackend.nim
@@ -129,8 +129,9 @@ proc generateCode*(graph: ModuleGraph, g: sink BModuleList,
   result = Output()
   # assemble the final C code for each module:
   for id, m in mlist.modules.pairs:
-    discard assemble(m)
-    # TODO: register in the Output structure
+    let code = assemble(m)
+    if code.len > 0:
+      result.modules.add (m.sym, code)
 
 proc generateCode*(graph: ModuleGraph, mlist: sink ModuleList) =
   ## Entry point for C code generation. Only the C code is generated -- nothing

From d94f9f31b408bcbc98dd57edcbd108a2fd9558c8 Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Fri, 14 Jun 2024 14:41:02 +0000
Subject: [PATCH 16/18] cbackend: sketch out the data layout/ownership

Everything only needed within a single module is stored in `BModule`,
things that are shared are stored globally (in `BModuleList`).

This keeps the scopes of local entities small, and will make it easy to
free memory early (by destroying a `BModule` instance once the C code
for it has been generated).
---
 compiler/backend/cbackend.nim | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim
index 78ab9987890..d1e3b420be1 100644
--- a/compiler/backend/cbackend.nim
+++ b/compiler/backend/cbackend.nim
@@ -13,6 +13,7 @@ import
   compiler/backend/[
     backends,
     cgendata,
+    cir,
     extccomp
   ],
   compiler/front/[
@@ -54,6 +55,15 @@ type
     ## |NimSkull| module, but doesn't necessarily have to.
     idgen*: IdGenerator
 
+    all: CombinedCAst
+      ## the C AST of everything part of the module: functions, globals, etc.
+    procs: seq[tuple[id: ProcedureId, body: CNodeIndex]]
+      ## all procedures attached to the module (except inline procedures)
+    globals: seq[tuple[id: GlobalId, def: CNodeIndex]]
+      ## all globals attached to the module
+    constants: seq[tuple[id: ConstId, def: CNodeIndex]]
+      ## all constants attached to the module
+
   BModuleList* = object
     ## The "top level" type for the orchestrator, owning all state related
     ## to code generation.
@@ -61,6 +71,23 @@ type
 
     modules*: OrdinalSeq[ModuleId, BModule]
 
+    all: CombinedCAst
+      ## the C AST of everything not directly attached to a single module,
+      ## such as declarations, inline procedure bodies, etc.
+
+    inline: Table[ProcedureId, CNodeIndex]
+      ## inline procedure -> body. Inline procedures are emitted into all C
+      ## TUs they're used in, so their bodies are stored globally
+    types: Table[TypeId, tuple[hash: Hash; decl, def: CNodeIndex]]
+
+    # the declarations for the various entities are needed across modules.
+    # They're generated once and are then cached here
+    procs: SeqMap[ProcedureId, CNodeIndex]
+    consts: SeqMap[ConstId, CNodeIndex]
+    globals: SeqMap[GlobalId, CNodeIndex]
+    data: Table[DataId, tuple[hash: Hash, node: CNodeIndex]]
+      ## not all data entries need to be used in practice, so a table is used
+
   PartialTable = Table[ProcedureId, MirBody]
     ## Table for holding the incremental procedures
 

From e03c6a18b9da9c68c0e254e6418abb88ca69f347 Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Fri, 14 Jun 2024 14:41:02 +0000
Subject: [PATCH 17/18] cbackend: implement `processEvent` and `assemble`

Some details are still missing, but the general flow is there. CIR is
generated for the various entities, which is then put into either the
global or module-local AST. When all CIR was generated, `assemble`
gathers everything the TU needs into a single place and renders the
result.
---
 compiler/backend/cbackend.nim | 207 ++++++++++++++++++++++++++++++++--
 compiler/backend/cir.nim      |  32 ++++++
 2 files changed, 232 insertions(+), 7 deletions(-)

diff --git a/compiler/backend/cbackend.nim b/compiler/backend/cbackend.nim
index d1e3b420be1..b6c6f3e4615 100644
--- a/compiler/backend/cbackend.nim
+++ b/compiler/backend/cbackend.nim
@@ -2,6 +2,9 @@
 
 import
   std/[
+    algorithm,
+    hashes,
+    intsets,
     os,
     tables
   ],
@@ -12,7 +15,9 @@ import
   ],
   compiler/backend/[
     backends,
+    cformat,
     cgendata,
+    cgen,
     cir,
     extccomp
   ],
@@ -97,8 +102,25 @@ type
       ## all modules to add to the build, together with their content
     headers: seq[tuple[path: AbsoluteFile, content: string]]
 
+  UniqueId = distinct uint32
+    ## 2 bit namespace, 30 bit ID. Combines procedure, global, const, and data
+    ## IDs into a single ID type. Falls apart if there are ever more than 2^30
+    ## entities per namespace, which seems unlikely.
+
 const NonMagics = {}
 
+template toUnique(x: ProcedureId): UniqueId =
+  UniqueId((0 shl 30) or uint32(x))
+template toUnique(x: GlobalId): UniqueId =
+  UniqueId((1 shl 30) or uint32(x))
+template toUnique(x: ConstId): UniqueId =
+  UniqueId((2 shl 30) or uint32(x))
+template toUnique(x: DataId): UniqueId =
+  UniqueId((3 shl 30) or uint32(x))
+
+template module(g: BModuleList, s: PSym): BModule =
+  g.modules[s.moduleId.FileIndex]
+
 proc initModuleList*(graph: ModuleGraph, num: Natural): BModuleList =
   ## Sets up a backend module-list with `num` modules.
   result = BModuleList(graph: graph)
@@ -110,12 +132,182 @@ proc initModule*(idgen: IdGenerator): BModule =
 proc processEvent(g: var BModuleList, cg: var CodeGenEnv,
                   partial: var PartialTable, evt: sink BackendEvent) =
   measure("processEvent")
-  discard
 
-proc assemble(m: Module): string =
-  ## Combines the various AST fragments of the module and renders them into
-  ## C code.
+  template append(body: CAst, id, list: untyped) =
+    let b = body
+    let m = cg.env[id].moduleId.FileIndex
+    g.modules[m].list.add (id, g.modules[m].all.append(b))
+
+  case evt.kind
+  of bekDiscovered:
+    if evt.entity.kind == mnkGlobal:
+      let id = evt.entity.global
+      append genGlobal(cg, id), id, globals
+
+  of bekModule:
+    discard "nothing to do"
+  of bekConstant:
+    let id = evt.cnst
+    append genConst(cg, id, cg.env[cg.env.bodies[id]]), id, constants
+  of bekPartial:
+    # append to the in-progress body -- code generation happens once complete
+    discard partial.mgetOrPut(evt.id, MirBody()).append(evt.body)
+  of bekProcedure:
+    # TODO: integrate MIR output with ``--showir``
+    let code = genProc(cg, evt.id, evt.body)
+    # TODO: integrate CIR output with ``--showir``
+    # TODO: scan the body for referenced types and data; those are generated
+    #       on use
+
+    if cg.env[evt.id].typ.callConv == ccInline:
+      # add to the global AST
+      g.inline[evt.id] = g.all.append(code)
+    else:
+      append code, evt.id, procs
+  of bekImported:
+    # TODO: implement me
+    discard
+
+proc assemble(g: BModuleList, cg: CodeGenEnv, m: BModule,
+              current: ModuleId): string =
+  ## Gathers everything that needs to be in the final C translation unit (=TU),
+  ## brings these entities into a stable order, and renders the result into
+  ## C code. This is the final step for processing module `m`.
   measure("assemble")
+  type
+    StructEnt = tuple[hash: Hash, node: CNodeIndex]
+      ## global entity; order established by structural hash
+    GlobalEnt = tuple[item: ItemId, node: CNodeIndex]
+      ## global entity; order established by module + item ID
+    LocalEnt  = tuple[item: int32, node: CNodeIndex]
+      ## module-local entity; order established by item ID
+
+  var
+    fwdTypes:    seq[StructEnt]
+    types:       seq[StructEnt]
+    data:        seq[StructEnt]
+    externDecls: seq[GlobalEnt]
+    defs:        seq[LocalEnt]
+    fwd:         seq[GlobalEnt]
+    inline:      seq[GlobalEnt]
+    procs:       seq[LocalEnt]
+
+    symMarker: PackedSet[UniqueId]
+    typeFwdMarker, typeMarker: PackedSet[TypeId]
+
+  proc scan(g: BModuleList, cg: CodeGenEnv, ast: CombinedCAst,
+            n: CNodeIndex) {.closure.} =
+    # XXX: meh, a closure
+    template guard(id, body: untyped) =
+      if not containsOrIncl(symMarker, toUnique id):
+        body
+
+    # TODO: imported symbols and types, as well as header dependencies need to
+    #       be considered here
+    for it in all(ast, n):
+      case it.kind
+      of cnkWeakType:
+        # only a forward declaration is needed
+        if not containsOrIncl(typeFwdMarker, it.typ):
+          let (hash, _, n) = g.types[it.typ]
+          fwdTypes.add (hash, n)
+      of cnkType:
+        if not containsOrIncl(typeMarker, it.typ):
+          let (hash, n, _) = g.types[it.typ]
+          types.add (hash, n)
+      of cnkProcSym:
+        let s = cg.env[it.prc]
+        if s.typ.callConv == ccInline:
+          guard it.prc:
+            inline.add (s.itemId, g.inline[it.prc])
+        elif cg.env[it.prc].moduleId.ModuleId != current:
+          guard it.prc:
+            fwd.add (s.itemId, g.procs[it.prc])
+      of cnkGlobalSym:
+        if cg.env[it.global].moduleId.ModuleId != current:
+          guard it.global:
+            externDecls.add (cg.env[it.global].itemId, g.globals[it.global])
+      of cnkConstSym:
+        if it.cnst.isAnon():
+          let id = extract(it.cnst)
+          guard id:
+            data.add g.data[id]
+        elif cg.env[it.cnst].moduleId.ModuleId != current:
+          guard it.cnst:
+            externDecls.add (cg.env[it.cnst].itemId, g.consts[it.cnst])
+      else:
+        discard "not relevant"
+
+  # add the local entities to the lists and scan them for their dependencies:
+  template addAll(src, dst: untyped) =
+    for (id, n) in src.items:
+      dst.add (cg.env[id].itemId.item, n)
+      scan(g, cg, m.all, n)
+
+  addAll(m.procs, procs)
+  addAll(m.globals, defs)
+  addAll(m.constants, defs)
+
+  # scan the inline procedures for their dependencies (which might discover
+  # new inline procedure dependencies)
+  var i = 0
+  while i < inline.len:
+    scan(g, cg, g.all, inline[i][1])
+    inc i
+
+  # scan the types:
+  i = 0
+  while i < types.len:
+    # TODO: use a dedicated scanning procedure; only types can be referenced
+    #       from types
+    scan(g, cg, g.all, types[i][1])
+    inc i
+
+  # TODO: forward declarations for procedures also need to be pulled in here.
+  #       The most simple (and efficient) solution would be emitting one for
+  #       *every* procedure, though this would result in larger artifacts...
+
+  # ------
+  # except for function forward declarations, the content of the TU is known
+  # now. Sort everything
+
+  proc cmp(a, b: LocalEnt): int  = a.item - b.item
+  proc cmp(a, b: StructEnt): int = a.hash - b.hash
+  proc cmp(a, b: GlobalEnt): int =
+    if a.item.module == b.item.module:  a.item.item - b.item.item
+    else:                               a.item.module - b.item.module
+
+  sort(fwdTypes, cmp)
+  sort(types, cmp)
+  sort(data, cmp)
+  sort(externDecls, cmp)
+  sort(fwd, cmp)
+  sort(inline, cmp)
+  sort(defs, cmp)
+  sort(procs, cmp)
+
+  # ------
+  # sorting is done, now format everything
+
+  # TODO: data entries are super special: their name is based on the final
+  #       position in the module, meaning that we can only now compute it. Do
+  #       so
+
+  # TODO: emit the preamble (i.e., "generated by...")
+  # TODO: emit the includes
+
+  template format(ast: CombinedCAst, list: untyped) =
+    for (_, it) in list.items:
+      format(cg, ast, it, result)
+
+  format(g.all, fwdTypes)
+  format(g.all, types)
+  format(g.all, data)
+  format(g.all, externDecls)
+  format(m.all, defs)
+  format(g.all, fwd)
+  format(g.all, inline)
+  format(m.all, procs)
 
 proc generateCode*(graph: ModuleGraph, g: sink BModuleList,
                    mlist: sink ModuleList): Output =
@@ -144,8 +336,9 @@ proc generateCode*(graph: ModuleGraph, g: sink BModuleList,
 
   # finish the partial procedures:
   for id, p in partial.pairs:
-    # TODO: implement me
-    discard
+    # generate the code and append to the attached-to module:
+    let idx = g.module(cg.env[id]).all.append(genProc(cg, id, p))
+    g.module(cg.env[id]).procs.add (id, idx)
 
   # production of the CIR for all alive entities is done
 
@@ -156,7 +349,7 @@ proc generateCode*(graph: ModuleGraph, g: sink BModuleList,
   result = Output()
   # assemble the final C code for each module:
   for id, m in mlist.modules.pairs:
-    let code = assemble(m)
+    let code = assemble(g, cg, g.modules[id], id)
     if code.len > 0:
       result.modules.add (m.sym, code)
 
diff --git a/compiler/backend/cir.nim b/compiler/backend/cir.nim
index 3461bdc4834..06f3a06d95a 100644
--- a/compiler/backend/cir.nim
+++ b/compiler/backend/cir.nim
@@ -12,6 +12,8 @@ import
     mirtrees
   ]
 
+import compiler/utils/measure
+
 type
   CNodeKind* = enum
     cnkIdent     ## raw identifier
@@ -122,3 +124,33 @@ type
 
   CombinedCAst* = seq[CNode]
     ## Finalized AST where expressions and statement are combined.
+
+func `==`*(a, b: CNodeIndex): bool {.borrow.}
+func `<`*(a, b: CNodeIndex): bool {.borrow.}
+
+iterator all*(ast: CombinedCAst, start: CNodeIndex): CNode =
+  ## Returns all nodes in the tree starting at `start`.
+  var i = uint32(start)
+  var last = i
+  while i <= last:
+    let n = ast[i]
+    yield n
+    if ord(n.kind) > ord(cnkExpr): # not an atom?
+      last += n.len
+    inc i
+
+proc append*(a: var CombinedCAst, b: sink CAst): CNodeIndex =
+  ## Combines the expressions and statements of `b` into a single AST and
+  ## appends the result to `a`.
+  measure("append")
+  let off = a.len.uint32
+  for it in b.buf[btExpr].mitems:
+    if it.kind == cnkExpr:
+      uint32(it.node) += off
+  for it in b.buf[btStmt].mitems:
+    if it.kind == cnkExpr:
+      uint32(it.node) += off
+
+  a.add b.buf[btExpr]
+  result = a.len.CNodeIndex
+  a.add b.buf[btStmt]

From 8fde6c1ad6f037eaf6316d00d2399e9475437a80 Mon Sep 17 00:00:00 2001
From: zerbina <100542850+zerbina@users.noreply.github.com>
Date: Fri, 14 Jun 2024 14:41:02 +0000
Subject: [PATCH 18/18] cgen: emit some placeholder AST

The genX procedure are expected to output at least *something*,
otherwise sadness ensues, so an empty block is temporarily emitted.
---
 compiler/backend/cgen.nim | 3 +++
 compiler/backend/cir.nim  | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/compiler/backend/cgen.nim b/compiler/backend/cgen.nim
index 4babdf3fe0d..b22004e2f70 100644
--- a/compiler/backend/cgen.nim
+++ b/compiler/backend/cgen.nim
@@ -22,9 +22,12 @@ proc genDecl*(g: var CodeGenEnv, id: ConstId): CAst =
 
 proc genProc*(g: var CodeGenEnv, id: ProcedureId, body: sink MirBody): CAst =
   ## Generates the full C definition for the given procedure, with body `body`.
+  result.add(cnkBlock)
 
 proc genGlobal*(g: var CodeGenEnv, id: GlobalId): CAst =
   ## Generates the definitions for the given global.
+  result.add(cnkBlock)
 
 proc genConst*(g: var CodeGenEnv, id: ConstId, body: MirTree): CAst =
   ## Generates the definition for the given constant, with body `body`.
+  result.add(cnkBlock)
diff --git a/compiler/backend/cir.nim b/compiler/backend/cir.nim
index 06f3a06d95a..e493ab8c985 100644
--- a/compiler/backend/cir.nim
+++ b/compiler/backend/cir.nim
@@ -154,3 +154,7 @@ proc append*(a: var CombinedCAst, b: sink CAst): CNodeIndex =
   a.add b.buf[btExpr]
   result = a.len.CNodeIndex
   a.add b.buf[btStmt]
+
+proc add*(ast: var CAst, kind: CNodeKind) =
+  # XXX: temporary procedure
+  ast.buf[btStmt].add(CNode(kind: kind))