Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New db to speed up full text queries and library updates #114

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/update_components.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ jobs:

rm -f web/build/data/cache.z*
zip -s 50m web/build/data/cache.zip cache.sqlite3

- name: Tar artifact # Artifact are case insensitive, this is workaround
run: tar -czf web_build.tar.gz web/build/
- name: Upload artifact
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
.idea
*.zip
*.z*
*.tar
.vscode/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
138 changes: 91 additions & 47 deletions jlcparts/datatables.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,23 @@
from jlcparts.common import sha256file
from jlcparts import attributes, descriptionAttributes

def saveJson(object, filename, hash=False, pretty=False, compress=False):
openFn = gzip.open if compress else open
with openFn(filename, "wt", encoding="utf-8") as f:
if pretty:
json.dump(object, f, indent=4, sort_keys=True)
else:
json.dump(object, f, separators=(',', ':'), sort_keys=True)
if hash:
with open(filename + ".sha256", "w") as f:
hash = sha256file(filename)
f.write(hash)
return hash
import tarfile

from time import time

def saveDatabaseFile(database, outpath, outfilename):
for key, value in database.items():
filename = os.path.join(outpath, key + ".jsonlines.gz")
with gzip.open(filename, "wt", encoding="utf-8") as f:
for entry in value:
json.dump(entry, f, separators=(',', ':'), sort_keys=False)
f.write("\n")

with tarfile.open(os.path.join(outpath, outfilename), 'w') as tar:
for key, value in database.items():
filename = os.path.join(outpath, key + ".jsonlines.gz")
tar.add(filename, arcname=os.path.relpath(filename, start=outpath))
os.unlink(filename)

def weakUpdateParameters(attrs, newParameters):
for attr, value in newParameters.items():
Expand Down Expand Up @@ -260,6 +265,8 @@ def extractComponent(component, schema):
elif schItem == "url":
url = component.get("extra", {}).get("url", None)
propertyList.append(trimLcscUrl(url, component["lcsc"]))
elif schItem == "stock":
propertyList.append(component["stock"])
elif schItem in component:
item = component[schItem]
if isinstance(item, str):
Expand All @@ -273,15 +280,12 @@ def extractComponent(component, schema):

def buildDatatable(components):
schema = ["lcsc", "mfr", "joints", "description",
"datasheet", "price", "img", "url", "attributes"]
"datasheet", "price", "img", "url", "attributes", "stock"]
return {
"schema": schema,
"components": [extractComponent(x, schema) for x in components]
}

def buildStocktable(components):
return {component["lcsc"]: component["stock"] for component in components }

def clearDir(directory):
"""
Delete everything inside a directory
Expand All @@ -293,6 +297,28 @@ def clearDir(directory):
elif os.path.isdir(file_path):
shutil.rmtree(file_path)

def schemaToLookup(schema):
lut = {}
for idx, key in enumerate(schema):
lut[key] = idx
return lut

def updateLut(lut, item):
key = json.dumps(item, separators=(',', ':'), sort_keys=True)
if not key in lut:
index = len(lut)
lut[key] = index
return index
return lut[key]

# Inverts the lut so that the Map becomes an array, with the key being the value.
# Values must be 0-based, numeric, and contiguous, or everything will be wrong.
def lutToArray(lutMap):
arr = [None] * len(lutMap)
for key, value in lutMap.items():
arr[value] = key
return arr


@dataclasses.dataclass
class MapCategoryParams:
Expand All @@ -316,26 +342,10 @@ def _map_category(val: MapCategoryParams):
components = lib.getCategoryComponents(val.catName, val.subcatName, stockNewerThan=val.ignoreoldstock)
if not components:
return None

filebase = val.catName + val.subcatName
filebase = filebase.replace("&", "and").replace("/", "aka")
filebase = re.sub('[^A-Za-z0-9]', '_', filebase)

dataTable = buildDatatable(components)
dataTable = buildDatatable(components)
dataTable.update({"category": val.catName, "subcategory": val.subcatName})
dataHash = saveJson(dataTable, os.path.join(val.outdir, f"{filebase}.json.gz"),
hash=True, compress=True)

stockTable = buildStocktable(components)
stockHash = saveJson(stockTable, os.path.join(val.outdir, f"{filebase}.stock.json"), hash=True)

return {
"catName": val.catName,
"subcatName": val.subcatName,
"sourcename": filebase,
"datahash": dataHash,
"stockhash": stockHash
}
return dataTable

@click.command()
@click.argument("library", type=click.Path(dir_okay=False))
Expand All @@ -348,6 +358,8 @@ def buildtables(library, outdir, ignoreoldstock, jobs):
"""
Build datatables out of the LIBRARY and save them in OUTDIR
"""
t0 = time()

lib = PartLibraryDb(library)
Path(outdir).mkdir(parents=True, exist_ok=True)
clearDir(outdir)
Expand All @@ -367,18 +379,50 @@ def buildtables(library, outdir, ignoreoldstock, jobs):
for i, result in enumerate(pool.imap_unordered(_map_category, params)):
if result is None:
continue
catName, subcatName = result["catName"], result["subcatName"]
catName = result["category"] #.lower()
subcatName = result["subcategory"] #.lower()
sourceName = f"{catName}__x__{subcatName}"
print(f"{((i) / total * 100):.2f} % {catName}: {subcatName}")
if catName not in categoryIndex:
categoryIndex[catName] = {}
assert subcatName not in categoryIndex[catName]
categoryIndex[catName][subcatName] = {
"sourcename": result["sourcename"],
"datahash": result["datahash"],
"stockhash": result["stockhash"]
}
index = {
"categories": categoryIndex,
"created": datetime.datetime.now().astimezone().replace(microsecond=0).isoformat()
if sourceName not in categoryIndex:
categoryIndex[sourceName] = result
else:
categoryIndex[sourceName]["components"] += result["components"] # combine for categories that are only different because of case

t1 = time()
# db holds the data we're putting into our database file
db = {
"subcategories": [schemaToLookup(['subcategory', 'category', 'subcategoryIdx'])],
"components": [schemaToLookup(['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url'])],
"attributes-lut": {}
}
saveJson(index, os.path.join(outdir, "index.json"), hash=True)

# fill database
s = None # schema lookup
subcatIndex = 0
for sourceName, subcatEntry in categoryIndex.items():
if s is None:
s = schemaToLookup(subcatEntry["schema"]) # all schema will be the same

subcatIndex += 1
db["subcategories"] += [[subcatEntry["subcategory"], subcatEntry["category"], subcatIndex]]

for comp in subcatEntry["components"]:
db["components"] += [[
comp[s["lcsc"]],
comp[s["mfr"]],
comp[s["description"]],
[updateLut(db["attributes-lut"], [attrName, value]) for attrName,value in comp[s["attributes"]].items()],
comp[s["stock"]],
subcatIndex,
comp[s["joints"]],
comp[s["datasheet"]],
comp[s["price"]],
comp[s["img"]],
comp[s["url"]]
]]

# invert the lut
db["attributes-lut"] = [json.loads(str) for str in lutToArray(db["attributes-lut"])]
saveDatabaseFile(db, outdir, "all.jsonlines.tar")

print(f"Table extraction took {(t1 - t0)}, reformat into one file took {time() - t1}")
4 changes: 4 additions & 0 deletions web/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
/.pnp
.pnp.js

*.json
*.gz
*.jsonlines

# testing
/coverage

Expand Down
11 changes: 11 additions & 0 deletions web/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"@testing-library/user-event": "^7.2.1",
"dexie": "^3.0.2",
"immer": "^7.0.8",
"js-untar": "^2.0.0",
"pako": "^2.0.4",
"react": "^16.13.1",
"react-copy-to-clipboard": "^5.0.2",
Expand Down
17 changes: 10 additions & 7 deletions web/src/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ import {
NavLink
} from "react-router-dom";


import { library } from '@fortawesome/fontawesome-svg-core'
import { fas } from '@fortawesome/free-solid-svg-icons'
import { far } from '@fortawesome/free-regular-svg-icons'
import { fab } from '@fortawesome/free-brands-svg-icons'

import './main.css';
import { updateComponentLibrary, checkForComponentLibraryUpdate, db } from './db'
import { updateComponentLibrary, checkForComponentLibraryUpdate, db, unpackLinesAsArray } from './db'
import { ComponentOverview } from './componentTable'
import { History } from './history'

Expand Down Expand Up @@ -79,8 +80,8 @@ class FirstTimeNote extends React.Component {
}

componentDidMount() {
db.components.count().then(x => {
this.setState({componentCount: x});
unpackLinesAsArray('components').then(components => {
this.setState({componentCount: Math.max(0, components.length - 1)}); // don't count the schema entry
})
}

Expand Down Expand Up @@ -110,9 +111,11 @@ class NewComponentFormatWarning extends React.Component {
}

componentDidMount() {
db.components.toCollection().first().then(x => {
if (x !== undefined && typeof x.attributes[Object.keys(x.attributes)[0]] !== 'object')
this.setState({newComponentFormat: false});
// I don't know if newComponentFormat will work like this
unpackLinesAsArray('subcategories').then(cats => {
if (cats.size > 1) {
this.setState({newComponentFormat: false});
}
});
}

Expand Down Expand Up @@ -142,7 +145,7 @@ class UpdateBar extends React.Component {
this.setState({updateAvailable});
});
db.settings.get("lastUpdate").then(lastUpdate => {
this.setState({lastUpdate});
this.setState({lastUpdate: lastUpdate?.value});
})
};

Expand Down
Loading