Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

data: Replace custom formats with msgpack #374

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 117 additions & 62 deletions elixir/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import os.path
import errno

import msgpack._cmsgpack

deflist_regex = re.compile(b'(\d*)(\w)(\d*)(\w),?')
deflist_macro_regex = re.compile('\dM\d+(\w)')

Expand All @@ -48,6 +50,36 @@

defTypeD = {v: k for k, v in defTypeR.items()}

defTypeToInt = {
'config': 0,
'define': 1,
'enum': 2,
'enumerator': 3,
'function': 4,
'label': 5,
'macro': 6,
'member': 7,
'prototype': 8,
'struct': 9,
'typedef': 10,
'union': 11,
'variable': 12,
'externvar': 13
}

intToDefType = {v: k for k, v in defTypeToInt.items()}

familyToInt = {
'A': 0,
'B': 1,
'C': 2,
'D': 3,
'K': 4,
'M': 5,
}

intToFamily = {v: k for k, v in familyToInt.items()}

##################################################################################

maxId = 999999999
Expand All @@ -56,93 +88,97 @@ class DefList:
'''Stores associations between a blob ID, a type (e.g., "function"),
a line number and a file family.
Also stores in which families the ident exists for faster tests.'''
def __init__(self, data=b'#'):
self.data, self.families = data.split(b'#')
def __init__(self, data: bytes | None = None):
if data is not None:
parsed_data = msgpack.loads(data)
self.entries = parsed_data[0]
self.families = parsed_data[1]
else:
self.entries = []
self.families = ""

def iter(self, dummy=False):
# Get all element in a list of sublists and sort them
entries = deflist_regex.findall(self.data)
entries.sort(key=lambda x:int(x[0]))
for id, type, line, family in entries:
id = int(id)
type = defTypeR [type.decode()]
line = int(line)
family = family.decode()
yield id, type, line, family
# return ((id, defTypeR[type], line, family) for (id, type, line, family) in self.data)

self.entries.sort(key=lambda x: x[0])

for id, type, line, family in self.entries:
yield id, intToDefType[type], line, intToFamily[family]

if dummy:
yield maxId, None, None, None

def append(self, id, type, line, family):
def append(self, id: int, type: str, line: int, family: str):
# if family not in self.family: self.family.append(family)
# self.data.append((id, defTypeD[type], line, family))

if type not in defTypeD:
return
p = str(id) + defTypeD[type] + str(line) + family
if self.data != b'':
p = ',' + p
self.data += p.encode()
self.add_family(family)

def pack(self):
return self.data + b'#' + self.families
self.entries.append((id, defTypeToInt[type], line, familyToInt[family]))

def add_family(self, family):
family = family.encode()
if not family in self.families.split(b','):
if self.families != b'':
family = b',' + family
if family not in self.families:
self.families += family

def pack(self):
return msgpack.dumps([self.entries, self.families])

def get_families(self):
return self.families.decode().split(',')
return self.families

def get_macros(self):
return deflist_macro_regex.findall(self.data.decode()) or ''
return [intToFamily[family] for _, typ, _, family in self.entries if typ == defTypeToInt['macro']]

class PathList:
'''Stores associations between a blob ID and a file path.
Inserted by update.py sorted by blob ID.'''
def __init__(self, data=b''):
self.data = data
def __init__(self, data: bytes | None=None):
if data is not None:
# [(id, path)]
self.data = msgpack.loads(data)
else:
self.data = []

def iter(self, dummy=False):
for p in self.data.split(b'\n')[:-1]:
id, path = p.split(b' ',maxsplit=1)
id = int(id)
path = path.decode()
for id, path in self.data:
yield id, path
if dummy:
yield maxId, None

def append(self, id, path):
p = str(id).encode() + b' ' + path + b'\n'
self.data += p
def append(self, id: int, path: str):
self.data.append((id, path))

def pack(self):
return self.data
return msgpack.dumps(self.data)

class RefList:
'''Stores a mapping from blob ID to list of lines
and the corresponding family.'''
def __init__(self, data=b''):
self.data = data
def __init__(self, data=None):
# {(blob_id, family): [line]}
if data is not None:
self.data = msgpack.loads(data, strict_map_key=False)
else:
self.data = {}

def iter(self, dummy=False):
# Split all elements in a list of sublists and sort them
entries = [x.split(b':') for x in self.data.split(b'\n')[:-1]]
entries.sort(key=lambda x:int(x[0]))
for b, c, d in entries:
b = int(b.decode())
c = c.decode()
d = d.decode()
yield b, c, d
for id, family_dict in self.data.items():
for family, lines in family_dict.items():
yield id, lines, family
if dummy:
yield maxId, None, None

def append(self, id, lines, family):
p = str(id) + ':' + lines + ':' + family + '\n'
self.data += p.encode()
if id not in self.data:
self.data[id] = {}
if family not in self.data[id]:
self.data[id][family] = []

self.data[id][family] += lines

def pack(self):
return self.data
return msgpack.dumps(self.data)

class BsdDB:
def __init__(self, filename, readonly, contentType, shared=False):
Expand All @@ -158,24 +194,43 @@ def __init__(self, filename, readonly, contentType, shared=False):
self.db.open(filename, flags=flags, mode=0o644, dbtype=berkeleydb.db.DB_BTREE)
self.ctype = contentType

def exists(self, key):
key = autoBytes(key)
def exists(self, key: str|bytes|int):
if type(key) is str:
key = key.encode()
elif type(key) is int:
key = msgpack.dumps(key)

return self.db.exists(key)

def get(self, key):
key = autoBytes(key)
def get(self, key: str|bytes|int):
if type(key) is str:
key = key.encode()
elif type(key) is int:
key = msgpack.dumps(key)

p = self.db.get(key)
p = self.ctype(p)
return p
if p is not None:
if self.ctype is None:
return msgpack.loads(p)
else:
return self.ctype(p)
else:
return None

def get_keys(self):
return self.db.keys()

def put(self, key, val, sync=False):
key = autoBytes(key)
val = autoBytes(val)
if type(val) is not bytes:
def put(self, key: str|bytes|int, val, sync=False):
if type(key) is str:
key = key.encode()
elif type(key) is int:
key = msgpack.dumps(key)

if self.ctype is None:
val = msgpack.dumps(val)
else:
val = val.pack()

self.db.put(key, val)
if sync:
self.db.sync()
Expand All @@ -192,13 +247,13 @@ def __init__(self, dir, readonly=True, dtscomp=False, shared=False):

ro = readonly

self.vars = BsdDB(dir + '/variables.db', ro, lambda x: int(x.decode()), shared=shared)
self.vars = BsdDB(dir + '/variables.db', ro, shared=shared)
# Key-value store of basic information
self.blob = BsdDB(dir + '/blobs.db', ro, lambda x: int(x.decode()), shared=shared)
self.blob = BsdDB(dir + '/blobs.db', ro, shared=shared)
# Map hash to sequential integer serial number
self.hash = BsdDB(dir + '/hashes.db', ro, lambda x: x, shared=shared)
self.hash = BsdDB(dir + '/hashes.db', ro, shared=shared)
# Map serial number back to hash
self.file = BsdDB(dir + '/filenames.db', ro, lambda x: x.decode(), shared=shared)
self.file = BsdDB(dir + '/filenames.db', ro, shared=shared)
# Map serial number to filename
self.vers = BsdDB(dir + '/versions.db', ro, PathList, shared=shared)
self.defs = BsdDB(dir + '/definitions.db', ro, DefList, shared=shared)
Expand Down
5 changes: 3 additions & 2 deletions elixir/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import sys
import logging
import subprocess, os
import msgpack._cmsgpack

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -189,9 +190,9 @@ def isIdent(bstr):

def autoBytes(arg):
if type(arg) is str:
arg = arg.encode()
arg = msgpack.dumps(arg)
elif type(arg) is int:
arg = str(arg).encode()
arg = msgpack.dumps(arg)
return arg

def getDataDir():
Expand Down
6 changes: 3 additions & 3 deletions static/dynamic-references.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ function generateSymbolDefinitionsHTML(symbolDefinitions, project, version) {
result += '<ul>';
previous_type = sd.type;
}
let ln = sd.line.toString().split(',');
let ln = [sd.line];
if (ln.length == 1) {
let n = ln[0];
result += `<li><a href="/${project}/${version}/source/${sd.path}#L${n}"><strong>${sd.path}</strong>, line ${n} <em>(as a ${sd.type})</em></a>`;
Expand Down Expand Up @@ -87,7 +87,7 @@ function generateSymbolReferencesHTML(symbolReferences, project, version) {
result += '<h2>Referenced in ' + symbolReferences.length.toString() + ' files:</h2>';
result += '<ul>';
for (let sr of symbolReferences) {
let ln = sr.line.split(',');
let ln = sr.line;
if (ln.length == 1) {
let n = ln[0];
result += `<li><a href="/${project}/${version}/source/${sr.path}#L${n}"><strong>${sr.path}</strong>, line ${n}</a>`;
Expand Down Expand Up @@ -117,7 +117,7 @@ function generateDocCommentsHTML(symbolDocComments, project, version) {
result += '<h2>Documented in ' + symbolDocComments.length.toString() + ' files:</h2>';
result += '<ul>';
for(let sd of symbolDocComments) {
let ln = sd.line.split(',');
let ln = sd.line;
if(ln.length == 1) {
let n = ln[0];
result += `<li><a href="/${project}/${version}/source/${sd.path}#L${n}"><strong>${sd.path}</strong>, line ${n}</a>`;
Expand Down
Loading