bootlin · fstachura · Oct 11, 2024
diff --git a/elixir/data.py b/elixir/data.py
@@ -25,6 +25,8 @@
 import os.path
 import errno
 
+import msgpack._cmsgpack
+
 deflist_regex = re.compile(b'(\d*)(\w)(\d*)(\w),?')
 deflist_macro_regex = re.compile('\dM\d+(\w)')
 
@@ -48,6 +50,36 @@
 
 defTypeD = {v: k for k, v in defTypeR.items()}
 
+defTypeToInt = {
+    'config': 0,
+    'define': 1,
+    'enum': 2,
+    'enumerator': 3,
+    'function': 4,
+    'label': 5,
+    'macro': 6,
+    'member': 7,
+    'prototype': 8,
+    'struct': 9,
+    'typedef': 10,
+    'union': 11,
+    'variable': 12,
+    'externvar': 13
+}
+
+intToDefType = {v: k for k, v in defTypeToInt.items()}
+
+familyToInt = {
+    'A': 0,
+    'B': 1,
+    'C': 2,
+    'D': 3,
+    'K': 4,
+    'M': 5,
+}
+
+intToFamily = {v: k for k, v in familyToInt.items()}
+
 ##################################################################################
 
 maxId = 999999999
@@ -56,93 +88,97 @@ class DefList:
     '''Stores associations between a blob ID, a type (e.g., "function"),
         a line number and a file family.
         Also stores in which families the ident exists for faster tests.'''
-    def __init__(self, data=b'#'):
-        self.data, self.families = data.split(b'#')
+    def __init__(self, data: bytes | None = None):
+        if data is not None:
+            parsed_data = msgpack.loads(data)
+            self.entries = parsed_data[0]
+            self.families = parsed_data[1]
+        else:
+            self.entries = []
+            self.families = ""
 
     def iter(self, dummy=False):
-        # Get all element in a list of sublists and sort them
-        entries = deflist_regex.findall(self.data)
-        entries.sort(key=lambda x:int(x[0]))
-        for id, type, line, family in entries:
-            id = int(id)
-            type = defTypeR [type.decode()]
-            line = int(line)
-            family = family.decode()
-            yield id, type, line, family
+        # return ((id, defTypeR[type], line, family) for (id, type, line, family) in self.data)
+
+        self.entries.sort(key=lambda x: x[0])
+
+        for id, type, line, family in self.entries:
+            yield id, intToDefType[type], line, intToFamily[family]
+
         if dummy:
             yield maxId, None, None, None
 
-    def append(self, id, type, line, family):
+    def append(self, id: int, type: str, line: int, family: str):
+        # if family not in self.family: self.family.append(family)
+        # self.data.append((id, defTypeD[type], line, family))
+
         if type not in defTypeD:
             return
-        p = str(id) + defTypeD[type] + str(line) + family
-        if self.data != b'':
-            p = ',' + p
-        self.data += p.encode()
-        self.add_family(family)
 
-    def pack(self):
-        return self.data + b'#' + self.families
+        self.entries.append((id, defTypeToInt[type], line, familyToInt[family]))
 
-    def add_family(self, family):
-        family = family.encode()
-        if not family in self.families.split(b','):
-            if self.families != b'':
-                family = b',' + family
+        if family not in self.families:
             self.families += family
 
+    def pack(self):
+        return msgpack.dumps([self.entries, self.families])
+
     def get_families(self):
-        return self.families.decode().split(',')
+        return self.families
 
     def get_macros(self):
-        return deflist_macro_regex.findall(self.data.decode()) or ''
+        return [intToFamily[family] for _, typ, _, family in self.entries if typ == defTypeToInt['macro']]
 
 class PathList:
     '''Stores associations between a blob ID and a file path.
         Inserted by update.py sorted by blob ID.'''
-    def __init__(self, data=b''):
-        self.data = data
+    def __init__(self, data: bytes | None=None):
+        if data is not None:
+            # [(id, path)]
+            self.data = msgpack.loads(data)
+        else:
+            self.data = []
 
     def iter(self, dummy=False):
-        for p in self.data.split(b'\n')[:-1]:
-            id, path = p.split(b' ',maxsplit=1)
-            id = int(id)
-            path = path.decode()
+        for id, path in self.data:
             yield id, path
         if dummy:
             yield maxId, None
 
-    def append(self, id, path):
-        p = str(id).encode() + b' ' + path + b'\n'
-        self.data += p
+    def append(self, id: int, path: str):
+        self.data.append((id, path))
 
     def pack(self):
-        return self.data
+        return msgpack.dumps(self.data)
 
 class RefList:
     '''Stores a mapping from blob ID to list of lines
         and the corresponding family.'''
-    def __init__(self, data=b''):
-        self.data = data
+    def __init__(self, data=None):
+        # {(blob_id, family): [line]}
+        if data is not None:
+            self.data = msgpack.loads(data, strict_map_key=False)
+        else:
+            self.data = {}
 
     def iter(self, dummy=False):
         # Split all elements in a list of sublists and sort them
-        entries = [x.split(b':') for x in self.data.split(b'\n')[:-1]]
-        entries.sort(key=lambda x:int(x[0]))
-        for b, c, d in entries:
-            b = int(b.decode())
-            c = c.decode()
-            d = d.decode()
-            yield b, c, d
+        for id, family_dict in self.data.items():
+            for family, lines in family_dict.items():
+                yield id, lines, family
         if dummy:
             yield maxId, None, None
 
     def append(self, id, lines, family):
-        p = str(id) + ':' + lines + ':' + family + '\n'
-        self.data += p.encode()
+        if id not in self.data:
+            self.data[id] = {}
+        if family not in self.data[id]:
+            self.data[id][family] = []
+
+        self.data[id][family] += lines
 
     def pack(self):
-        return self.data
+        return msgpack.dumps(self.data)
 
 class BsdDB:
     def __init__(self, filename, readonly, contentType, shared=False):
@@ -158,24 +194,43 @@ def __init__(self, filename, readonly, contentType, shared=False):
             self.db.open(filename, flags=flags, mode=0o644, dbtype=berkeleydb.db.DB_BTREE)
         self.ctype = contentType
 
-    def exists(self, key):
-        key = autoBytes(key)
+    def exists(self, key: str|bytes|int):
+        if type(key) is str:
+            key = key.encode()
+        elif type(key) is int:
+            key = msgpack.dumps(key)
+
         return self.db.exists(key)
 
-    def get(self, key):
-        key = autoBytes(key)
+    def get(self, key: str|bytes|int):
+        if type(key) is str:
+            key = key.encode()
+        elif type(key) is int:
+            key = msgpack.dumps(key)
+
         p = self.db.get(key)
-        p = self.ctype(p)
-        return p
+        if p is not None:
+            if self.ctype is None:
+                return msgpack.loads(p)
+            else:
+                return self.ctype(p)
+        else:
+            return None
 
     def get_keys(self):
         return self.db.keys()
 
-    def put(self, key, val, sync=False):
-        key = autoBytes(key)
-        val = autoBytes(val)
-        if type(val) is not bytes:
+    def put(self, key: str|bytes|int, val, sync=False):
+        if type(key) is str:
+            key = key.encode()
+        elif type(key) is int:
+            key = msgpack.dumps(key)
+
+        if self.ctype is None:
+            val = msgpack.dumps(val)
+        else:
             val = val.pack()
+
         self.db.put(key, val)
         if sync:
             self.db.sync()
@@ -192,13 +247,13 @@ def __init__(self, dir, readonly=True, dtscomp=False, shared=False):
 
         ro = readonly
 
-        self.vars = BsdDB(dir + '/variables.db', ro, lambda x: int(x.decode()), shared=shared)
+        self.vars = BsdDB(dir + '/variables.db', ro, shared=shared)
             # Key-value store of basic information
-        self.blob = BsdDB(dir + '/blobs.db', ro, lambda x: int(x.decode()), shared=shared)
+        self.blob = BsdDB(dir + '/blobs.db', ro, shared=shared)
             # Map hash to sequential integer serial number
-        self.hash = BsdDB(dir + '/hashes.db', ro, lambda x: x, shared=shared)
+        self.hash = BsdDB(dir + '/hashes.db', ro, shared=shared)
             # Map serial number back to hash
-        self.file = BsdDB(dir + '/filenames.db', ro, lambda x: x.decode(), shared=shared)
+        self.file = BsdDB(dir + '/filenames.db', ro, shared=shared)
             # Map serial number to filename
         self.vers = BsdDB(dir + '/versions.db', ro, PathList, shared=shared)
         self.defs = BsdDB(dir + '/definitions.db', ro, DefList, shared=shared)

diff --git a/elixir/lib.py b/elixir/lib.py
@@ -21,6 +21,7 @@
 import sys
 import logging
 import subprocess, os
+import msgpack._cmsgpack
 
 logger = logging.getLogger(__name__)
 
@@ -189,9 +190,9 @@ def isIdent(bstr):
 
 def autoBytes(arg):
     if type(arg) is str:
-        arg = arg.encode()
+        arg = msgpack.dumps(arg)
     elif type(arg) is int:
-        arg = str(arg).encode()
+        arg = msgpack.dumps(arg)
     return arg
 
 def getDataDir():

diff --git a/static/dynamic-references.js b/static/dynamic-references.js
@@ -54,7 +54,7 @@ function generateSymbolDefinitionsHTML(symbolDefinitions, project, version) {
       result += '<ul>';
       previous_type = sd.type;
     }
-    let ln = sd.line.toString().split(',');
+    let ln = [sd.line];
     if (ln.length == 1) {
       let n = ln[0];
       result += `<li><a href="/${project}/${version}/source/${sd.path}#L${n}"><strong>${sd.path}</strong>, line ${n} <em>(as a ${sd.type})</em></a>`;
@@ -87,7 +87,7 @@ function generateSymbolReferencesHTML(symbolReferences, project, version) {
   result += '<h2>Referenced in ' + symbolReferences.length.toString() + ' files:</h2>';
   result += '<ul>';
   for (let sr of symbolReferences) {
-    let ln = sr.line.split(',');
+    let ln = sr.line;
     if (ln.length == 1) {
       let n = ln[0];
       result += `<li><a href="/${project}/${version}/source/${sr.path}#L${n}"><strong>${sr.path}</strong>, line ${n}</a>`;
@@ -117,7 +117,7 @@ function generateDocCommentsHTML(symbolDocComments, project, version) {
   result += '<h2>Documented in ' + symbolDocComments.length.toString() + ' files:</h2>';
   result += '<ul>';
   for(let sd of symbolDocComments) {
-    let ln = sd.line.split(',');
+    let ln = sd.line;
     if(ln.length == 1) {
       let n = ln[0];
       result += `<li><a href="/${project}/${version}/source/${sd.path}#L${n}"><strong>${sd.path}</strong>, line ${n}</a>`;