diff --git a/elixir/data.py b/elixir/data.py index 5c939be0..8ded0abf 100755 --- a/elixir/data.py +++ b/elixir/data.py @@ -25,6 +25,8 @@ import os.path import errno +import msgpack._cmsgpack + deflist_regex = re.compile(b'(\d*)(\w)(\d*)(\w),?') deflist_macro_regex = re.compile('\dM\d+(\w)') @@ -48,6 +50,36 @@ defTypeD = {v: k for k, v in defTypeR.items()} +defTypeToInt = { + 'config': 0, + 'define': 1, + 'enum': 2, + 'enumerator': 3, + 'function': 4, + 'label': 5, + 'macro': 6, + 'member': 7, + 'prototype': 8, + 'struct': 9, + 'typedef': 10, + 'union': 11, + 'variable': 12, + 'externvar': 13 +} + +intToDefType = {v: k for k, v in defTypeToInt.items()} + +familyToInt = { + 'A': 0, + 'B': 1, + 'C': 2, + 'D': 3, + 'K': 4, + 'M': 5, +} + +intToFamily = {v: k for k, v in familyToInt.items()} + ################################################################################## maxId = 999999999 @@ -56,93 +88,97 @@ class DefList: '''Stores associations between a blob ID, a type (e.g., "function"), a line number and a file family. Also stores in which families the ident exists for faster tests.''' - def __init__(self, data=b'#'): - self.data, self.families = data.split(b'#') + def __init__(self, data: bytes | None = None): + if data is not None: + parsed_data = msgpack.loads(data) + self.entries = parsed_data[0] + self.families = parsed_data[1] + else: + self.entries = [] + self.families = "" def iter(self, dummy=False): - # Get all element in a list of sublists and sort them - entries = deflist_regex.findall(self.data) - entries.sort(key=lambda x:int(x[0])) - for id, type, line, family in entries: - id = int(id) - type = defTypeR [type.decode()] - line = int(line) - family = family.decode() - yield id, type, line, family + # return ((id, defTypeR[type], line, family) for (id, type, line, family) in self.data) + + self.entries.sort(key=lambda x: x[0]) + + for id, type, line, family in self.entries: + yield id, intToDefType[type], line, intToFamily[family] + if dummy: yield maxId, None, None, None - def append(self, id, type, line, family): + def append(self, id: int, type: str, line: int, family: str): + # if family not in self.family: self.family.append(family) + # self.data.append((id, defTypeD[type], line, family)) + if type not in defTypeD: return - p = str(id) + defTypeD[type] + str(line) + family - if self.data != b'': - p = ',' + p - self.data += p.encode() - self.add_family(family) - def pack(self): - return self.data + b'#' + self.families + self.entries.append((id, defTypeToInt[type], line, familyToInt[family])) - def add_family(self, family): - family = family.encode() - if not family in self.families.split(b','): - if self.families != b'': - family = b',' + family + if family not in self.families: self.families += family + def pack(self): + return msgpack.dumps([self.entries, self.families]) + def get_families(self): - return self.families.decode().split(',') + return self.families def get_macros(self): - return deflist_macro_regex.findall(self.data.decode()) or '' + return [intToFamily[family] for _, typ, _, family in self.entries if typ == defTypeToInt['macro']] class PathList: '''Stores associations between a blob ID and a file path. Inserted by update.py sorted by blob ID.''' - def __init__(self, data=b''): - self.data = data + def __init__(self, data: bytes | None=None): + if data is not None: + # [(id, path)] + self.data = msgpack.loads(data) + else: + self.data = [] def iter(self, dummy=False): - for p in self.data.split(b'\n')[:-1]: - id, path = p.split(b' ',maxsplit=1) - id = int(id) - path = path.decode() + for id, path in self.data: yield id, path if dummy: yield maxId, None - def append(self, id, path): - p = str(id).encode() + b' ' + path + b'\n' - self.data += p + def append(self, id: int, path: str): + self.data.append((id, path)) def pack(self): - return self.data + return msgpack.dumps(self.data) class RefList: '''Stores a mapping from blob ID to list of lines and the corresponding family.''' - def __init__(self, data=b''): - self.data = data + def __init__(self, data=None): + # {(blob_id, family): [line]} + if data is not None: + self.data = msgpack.loads(data, strict_map_key=False) + else: + self.data = {} def iter(self, dummy=False): # Split all elements in a list of sublists and sort them - entries = [x.split(b':') for x in self.data.split(b'\n')[:-1]] - entries.sort(key=lambda x:int(x[0])) - for b, c, d in entries: - b = int(b.decode()) - c = c.decode() - d = d.decode() - yield b, c, d + for id, family_dict in self.data.items(): + for family, lines in family_dict.items(): + yield id, lines, family if dummy: yield maxId, None, None def append(self, id, lines, family): - p = str(id) + ':' + lines + ':' + family + '\n' - self.data += p.encode() + if id not in self.data: + self.data[id] = {} + if family not in self.data[id]: + self.data[id][family] = [] + + self.data[id][family] += lines def pack(self): - return self.data + return msgpack.dumps(self.data) class BsdDB: def __init__(self, filename, readonly, contentType, shared=False): @@ -158,24 +194,43 @@ def __init__(self, filename, readonly, contentType, shared=False): self.db.open(filename, flags=flags, mode=0o644, dbtype=berkeleydb.db.DB_BTREE) self.ctype = contentType - def exists(self, key): - key = autoBytes(key) + def exists(self, key: str|bytes|int): + if type(key) is str: + key = key.encode() + elif type(key) is int: + key = msgpack.dumps(key) + return self.db.exists(key) - def get(self, key): - key = autoBytes(key) + def get(self, key: str|bytes|int): + if type(key) is str: + key = key.encode() + elif type(key) is int: + key = msgpack.dumps(key) + p = self.db.get(key) - p = self.ctype(p) - return p + if p is not None: + if self.ctype is None: + return msgpack.loads(p) + else: + return self.ctype(p) + else: + return None def get_keys(self): return self.db.keys() - def put(self, key, val, sync=False): - key = autoBytes(key) - val = autoBytes(val) - if type(val) is not bytes: + def put(self, key: str|bytes|int, val, sync=False): + if type(key) is str: + key = key.encode() + elif type(key) is int: + key = msgpack.dumps(key) + + if self.ctype is None: + val = msgpack.dumps(val) + else: val = val.pack() + self.db.put(key, val) if sync: self.db.sync() @@ -192,13 +247,13 @@ def __init__(self, dir, readonly=True, dtscomp=False, shared=False): ro = readonly - self.vars = BsdDB(dir + '/variables.db', ro, lambda x: int(x.decode()), shared=shared) + self.vars = BsdDB(dir + '/variables.db', ro, shared=shared) # Key-value store of basic information - self.blob = BsdDB(dir + '/blobs.db', ro, lambda x: int(x.decode()), shared=shared) + self.blob = BsdDB(dir + '/blobs.db', ro, shared=shared) # Map hash to sequential integer serial number - self.hash = BsdDB(dir + '/hashes.db', ro, lambda x: x, shared=shared) + self.hash = BsdDB(dir + '/hashes.db', ro, shared=shared) # Map serial number back to hash - self.file = BsdDB(dir + '/filenames.db', ro, lambda x: x.decode(), shared=shared) + self.file = BsdDB(dir + '/filenames.db', ro, shared=shared) # Map serial number to filename self.vers = BsdDB(dir + '/versions.db', ro, PathList, shared=shared) self.defs = BsdDB(dir + '/definitions.db', ro, DefList, shared=shared) diff --git a/elixir/lib.py b/elixir/lib.py index b7ca21be..48a08934 100755 --- a/elixir/lib.py +++ b/elixir/lib.py @@ -21,6 +21,7 @@ import sys import logging import subprocess, os +import msgpack._cmsgpack logger = logging.getLogger(__name__) @@ -189,9 +190,9 @@ def isIdent(bstr): def autoBytes(arg): if type(arg) is str: - arg = arg.encode() + arg = msgpack.dumps(arg) elif type(arg) is int: - arg = str(arg).encode() + arg = msgpack.dumps(arg) return arg def getDataDir(): diff --git a/static/dynamic-references.js b/static/dynamic-references.js index c2816bac..5eb62f7c 100644 --- a/static/dynamic-references.js +++ b/static/dynamic-references.js @@ -54,7 +54,7 @@ function generateSymbolDefinitionsHTML(symbolDefinitions, project, version) { result += '