From 1e2fc326cbd794618da74705454c69a736b7b68c Mon Sep 17 00:00:00 2001 From: doug Date: Sat, 10 Feb 2024 15:44:40 +1000 Subject: [PATCH 01/13] Allow fetch requests to read from a single downloaded tar.gz file --- web/package-lock.json | 11 +++++++ web/package.json | 1 + web/src/db.js | 75 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+) diff --git a/web/package-lock.json b/web/package-lock.json index 27d9b581c1f..287e05a4901 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -19,6 +19,7 @@ "@testing-library/user-event": "^7.2.1", "dexie": "^3.0.2", "immer": "^7.0.8", + "js-untar": "^2.0.0", "pako": "^2.0.4", "react": "^16.13.1", "react-copy-to-clipboard": "^5.0.2", @@ -13865,6 +13866,11 @@ "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" }, + "node_modules/js-untar": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/js-untar/-/js-untar-2.0.0.tgz", + "integrity": "sha512-7CsDLrYQMbLxDt2zl9uKaPZSdmJMvGGQ7wo9hoB3J+z/VcO2w63bXFgHVnjF1+S9wD3zAu8FBVj7EYWjTQ3Z7g==" + }, "node_modules/js-yaml": { "version": "3.14.0", "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.0.tgz", @@ -29779,6 +29785,11 @@ "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" }, + "js-untar": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/js-untar/-/js-untar-2.0.0.tgz", + "integrity": "sha512-7CsDLrYQMbLxDt2zl9uKaPZSdmJMvGGQ7wo9hoB3J+z/VcO2w63bXFgHVnjF1+S9wD3zAu8FBVj7EYWjTQ3Z7g==" + }, "js-yaml": { "version": "3.14.0", "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.0.tgz", diff --git a/web/package.json b/web/package.json index c9a6c225360..4b95398f6dc 100644 --- a/web/package.json +++ b/web/package.json @@ -14,6 +14,7 @@ "@testing-library/user-event": "^7.2.1", "dexie": "^3.0.2", "immer": "^7.0.8", + "js-untar": "^2.0.0", "pako": "^2.0.4", "react": "^16.13.1", "react-copy-to-clipboard": "^5.0.2", diff --git a/web/src/db.js b/web/src/db.js index 9881bf56e04..b86fff81c3e 100644 --- a/web/src/db.js +++ b/web/src/db.js @@ -1,5 +1,6 @@ import Dexie from 'dexie'; import * as pako from 'pako'; +import untar from "js-untar"; if (!window.indexedDB) { alert("This page requires IndexedDB to work.\n" + @@ -101,8 +102,76 @@ export async function checkForComponentLibraryUpdate() { return updateAvailable; } +// contains data from all-data.tar.gz +let allData = { + filesPromise: null, + fetch: async function(path, expectJson) { // returns promise; resolves as data on success, and null on failure + return new Promise(async (resolve, reject) => { + if (this.filesPromise === null) { + this.filesPromise = new Promise(async (resolve, reject) => { + try { + const resp = await fetch(`${SOURCE_PATH}/all-data.tar.gz`); + if (resp.status === 200) { + const compressedData = await resp.arrayBuffer(); + const data = pako.ungzip(compressedData); + const files = await untar(data.buffer); + const fileData = {}; + for (const file of files) { + fileData[`${SOURCE_PATH}/${file.name}`.toLowerCase()] = file.buffer; + } + //console.log('Got all data', fileData); + resolve(fileData); + } else { + //reject('Bad fetch of all-data'); + resolve(null); + } + } catch(ex) { + //reject(ex); + resolve(null); + + console.log('Failed to fetch all-data.tar.gz', ex); + } + }); + } + + const files = await this.filesPromise; + + if (files) { + const fileData = files[path.toLowerCase()]; + if (fileData){ + if (expectJson) { + if (path.slice(-3) === '.gz') { + resolve(JSON.parse(pako.ungzip(fileData, {to: 'string'}))); + } else { + const decoder = new TextDecoder(); + resolve(JSON.parse(decoder.decode(fileData))); + } + } else { + const decoder = new TextDecoder(); + resolve(decoder.decode(fileData)); + } + } else { + //reject(`${path} not found`); + resolve(null); + } + } else { + //reject('All data not available'); + resolve(null); + } + }); + } +}; + // Fetch a JSON. If error occures, export async function fetchJson(path, errorIntro) { + if (path.indexOf('/index.json') < 0) { + // try from all data combined file first + const data = await allData.fetch(path, true); + if (data) { + return data; + } + } + let response = await fetch(path); if (!response.ok) { throw Error(errorIntro + response.statusText); @@ -131,6 +200,12 @@ export async function fetchJson(path, errorIntro) { } async function fetchText(path, errorIntro) { + // try from all data combined file first + const data = await allData.fetch(path, false); + if (data) { + return data; + } + let response = await fetch(path); if (!response.ok) { throw Error(errorIntro + response.statusText); From 415fae89edae2fe46936c6d998d08e3fdef2736d Mon Sep 17 00:00:00 2001 From: doug Date: Sat, 10 Feb 2024 16:44:07 +1000 Subject: [PATCH 02/13] Add all-data.tar.gz creation to the buildtables function --- jlcparts/datatables.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/jlcparts/datatables.py b/jlcparts/datatables.py index f3fe8cc57e2..3fb55210afb 100644 --- a/jlcparts/datatables.py +++ b/jlcparts/datatables.py @@ -13,6 +13,9 @@ from jlcparts.common import sha256file from jlcparts import attributes, descriptionAttributes +import tarfile +import glob + def saveJson(object, filename, hash=False, pretty=False, compress=False): openFn = gzip.open if compress else open with openFn(filename, "wt", encoding="utf-8") as f: @@ -25,6 +28,14 @@ def saveJson(object, filename, hash=False, pretty=False, compress=False): hash = sha256file(filename) f.write(hash) return hash + +def saveAllDataArchive(path): + patterns = ['*.json', '*.json.gz', '*.sha256'] + + with tarfile.open(os.path.join(path, 'all-data.tar.gz'), 'w:gz') as tar: + for pattern in patterns: + for file in glob.glob(os.path.join(path, pattern)): + tar.add(file, arcname=os.path.relpath(file, start=path)) def weakUpdateParameters(attrs, newParameters): for attr, value in newParameters.items(): @@ -382,3 +393,4 @@ def buildtables(library, outdir, ignoreoldstock, jobs): "created": datetime.datetime.now().astimezone().replace(microsecond=0).isoformat() } saveJson(index, os.path.join(outdir, "index.json"), hash=True) + saveAllDataArchive(outdir) \ No newline at end of file From 1fd9ada118ade0b994e638718f61a00970557235 Mon Sep 17 00:00:00 2001 From: doug Date: Sat, 10 Feb 2024 19:40:37 +1000 Subject: [PATCH 03/13] Split combined data file into two parts --- jlcparts/datatables.py | 13 +++++++++---- web/src/db.js | 35 +++++++++++++++++++++++------------ 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/jlcparts/datatables.py b/jlcparts/datatables.py index 3fb55210afb..ea7931db174 100644 --- a/jlcparts/datatables.py +++ b/jlcparts/datatables.py @@ -15,6 +15,7 @@ import tarfile import glob +import random def saveJson(object, filename, hash=False, pretty=False, compress=False): openFn = gzip.open if compress else open @@ -32,10 +33,14 @@ def saveJson(object, filename, hash=False, pretty=False, compress=False): def saveAllDataArchive(path): patterns = ['*.json', '*.json.gz', '*.sha256'] - with tarfile.open(os.path.join(path, 'all-data.tar.gz'), 'w:gz') as tar: - for pattern in patterns: - for file in glob.glob(os.path.join(path, pattern)): - tar.add(file, arcname=os.path.relpath(file, start=path)) + with tarfile.open(os.path.join(path, 'all-data-1.tar.gz'), 'w:gz') as tar1: + with tarfile.open(os.path.join(path, 'all-data-2.tar.gz'), 'w:gz') as tar2: + for pattern in patterns: + for file in glob.glob(os.path.join(path, pattern)): + if random.randint(1, 2) == 1: + tar1.add(file, arcname=os.path.relpath(file, start=path)) + else: + tar2.add(file, arcname=os.path.relpath(file, start=path)) def weakUpdateParameters(attrs, newParameters): for attr, value in newParameters.items(): diff --git a/web/src/db.js b/web/src/db.js index b86fff81c3e..889ff9671bb 100644 --- a/web/src/db.js +++ b/web/src/db.js @@ -105,24 +105,35 @@ export async function checkForComponentLibraryUpdate() { // contains data from all-data.tar.gz let allData = { filesPromise: null, + fetchSingle: async function(url) { // fetchs a single chunk of the combined files + const resp = await fetch(url); + if (resp.status === 200) { + const compressedData = await resp.arrayBuffer(); + const data = pako.ungzip(compressedData); + const files = await untar(data.buffer); + const fileData = {}; + for (const file of files) { + fileData[`${SOURCE_PATH}/${file.name}`.toLowerCase()] = file.buffer; + } + return fileData; + } else { + return {}; // failed to download/unpack + } + }, fetch: async function(path, expectJson) { // returns promise; resolves as data on success, and null on failure return new Promise(async (resolve, reject) => { if (this.filesPromise === null) { this.filesPromise = new Promise(async (resolve, reject) => { try { - const resp = await fetch(`${SOURCE_PATH}/all-data.tar.gz`); - if (resp.status === 200) { - const compressedData = await resp.arrayBuffer(); - const data = pako.ungzip(compressedData); - const files = await untar(data.buffer); - const fileData = {}; - for (const file of files) { - fileData[`${SOURCE_PATH}/${file.name}`.toLowerCase()] = file.buffer; - } - //console.log('Got all data', fileData); - resolve(fileData); + const ChunkCount = 2; + let data = {}; + for (let i = 0; i < ChunkCount; i++) { + Object.assign(data, await this.fetchSingle(`${SOURCE_PATH}/all-data-${i + 1}.tar.gz`)); + } + + if (Object.keys(data).length > 0) { + resolve(data); } else { - //reject('Bad fetch of all-data'); resolve(null); } } catch(ex) { From 16c4251ad7a5a28f823a8688954ce3e32531f0da Mon Sep 17 00:00:00 2001 From: doug Date: Sat, 10 Feb 2024 19:52:49 +1000 Subject: [PATCH 04/13] Allow using downloaded chunks of the combined, even if some chunks are missing --- web/src/db.js | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/web/src/db.js b/web/src/db.js index 889ff9671bb..08c8d1a4d82 100644 --- a/web/src/db.js +++ b/web/src/db.js @@ -106,17 +106,22 @@ export async function checkForComponentLibraryUpdate() { let allData = { filesPromise: null, fetchSingle: async function(url) { // fetchs a single chunk of the combined files - const resp = await fetch(url); - if (resp.status === 200) { - const compressedData = await resp.arrayBuffer(); - const data = pako.ungzip(compressedData); - const files = await untar(data.buffer); - const fileData = {}; - for (const file of files) { - fileData[`${SOURCE_PATH}/${file.name}`.toLowerCase()] = file.buffer; + try { + const resp = await fetch(url); + if (resp.status === 200) { + const compressedData = await resp.arrayBuffer(); + const data = pako.ungzip(compressedData); + const files = await untar(data.buffer); + const fileData = {}; + for (const file of files) { + fileData[`${SOURCE_PATH}/${file.name}`.toLowerCase()] = file.buffer; + } + return fileData; + } else { + return {}; // failed to download/unpack } - return fileData; - } else { + } catch (ex) { + console.log('Failed to fetch all-data.tar.gz', ex); return {}; // failed to download/unpack } }, @@ -137,10 +142,7 @@ let allData = { resolve(null); } } catch(ex) { - //reject(ex); resolve(null); - - console.log('Failed to fetch all-data.tar.gz', ex); } }); } From 0b2a27ee012fef59e0b4ba51f66962a702bd8bea Mon Sep 17 00:00:00 2001 From: doug Date: Mon, 19 Feb 2024 18:14:37 +1000 Subject: [PATCH 05/13] Squash database into 3 gz files. Single file download. Use gz files for queries. --- .gitignore | 2 + web/.gitignore | 4 ++ web/processData.js | 130 ++++++++++++++++++++++++++++++++++++ web/src/app.js | 15 +++-- web/src/componentTable.js | 105 +++++++++++++++++++++++------ web/src/db.js | 136 +++++++++++++++++++++++++++++++++++++- web/src/history.js | 20 ++++-- 7 files changed, 379 insertions(+), 33 deletions(-) create mode 100644 web/processData.js diff --git a/.gitignore b/.gitignore index e2ab415f904..de9138b67b2 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ .idea *.zip *.z* +*.tar +.vscode/ # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/web/.gitignore b/web/.gitignore index c1fe7dfdf2e..661b7cea7b9 100644 --- a/web/.gitignore +++ b/web/.gitignore @@ -5,6 +5,10 @@ /.pnp .pnp.js +*.json +*.gz +*.jsonlines + # testing /coverage diff --git a/web/processData.js b/web/processData.js new file mode 100644 index 00000000000..319c19eb0d0 --- /dev/null +++ b/web/processData.js @@ -0,0 +1,130 @@ + +const fs = require('fs'); +const path = require('path'); +const zlib = require("zlib"); +const process = require('process'); +const { execSync } = require('child_process'); + +const directoryPath = 'public/data'; + +try{process.chdir('web');}catch(ex){} // debug path is 'web/..' + +function foreachJsonFile(directory, processFunc) { + try { + // Read the directory + const filenames = fs.readdirSync(directory); + + // Filter .json files + const jsonFiles = filenames.filter(file => /(\.stock\.json$|\.json\.gz$)/.test(file)); + + // Iterate through .json files + for (const file of jsonFiles) { + const filePath = path.join(directory, file); + + // Read and process the JSON file + const getJson = () => { + let data = fs.readFileSync(filePath); + if (/\.gz$/.test(file)) { // decompress if required + data = zlib.gunzipSync(data); + } + + const json = JSON.parse(data); + return json; + }; + + processFunc(file, getJson); + + //break; + } + } catch (error) { + console.error('Error processing JSON files:', error); + } +} + +// Call the function +let database = { + subcategories: [schemaToLookup(['subcategory', 'category', 'sourcename'])], + components: [schemaToLookup(['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url'])], + + attributesLut: [], // this is a list of unique attributes; position is used as the attribute index + stock: {} // this is just a temporary lookup to help generate the components table +}; + +// adds the obj to the lut, and returns the index +function updateLut(lut, obj) { + return lut[JSON.stringify(obj)] ??= Object.keys(lut).length; +} + +// inverts the lut so that the object becomes an array, with the key being the value (values must be 0-based, numeric, and contiguous) +function lutToArray(lut) { + return Object.entries(lut).sort((a, b) => a[1] - b[1]).map(x => x[0] ? JSON.parse(x[0]) : null); +} + +function schemaToLookup(arr) { + let lut = {}; + arr.forEach((key, i) => lut[key] = i); + return lut; +} + +const startTime = new Date().getTime(); + +// populate the stock lookup +foreachJsonFile(directoryPath, (file, getObj) => { + if (file.includes('.stock.json')) { + Object.assign(database.stock, getObj()); + } +}); + +let processedCount = 0; +const totalCount = fs.readdirSync(directoryPath).filter(file => /\.json\.gz$/.test(file)).length; + +foreachJsonFile(directoryPath, (file, getObj) => { + if (file.includes('.stock.json')) { + return; + } + + const obj = getObj(); + + // subcategories schema: ['subcategory', 'category', 'sourcename'] + database.subcategories.push([obj.subcategory, obj.category, file.split('.')[0]]); + const subcategoryIdx = database.subcategories.length - 1; + + try { + //input schema = ["lcsc", "mfr", "joints", "description","datasheet", "price", "img", "url", "attributes"] + // components schema ['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url'] + const s = schemaToLookup(obj.schema); + obj.components.forEach(comp => { + let entry = [ + comp[s.lcsc], + comp[s.mfr], + comp[s.description], + Object.entries(comp[s.attributes]).map(attr => updateLut(database.attributesLut, attr)), + database.stock[comp[s.lcsc]], + subcategoryIdx, + comp[s.joints], + comp[s.datasheet], + comp[s.price], + comp[s.img], + comp[s.url] + ]; + database.components.push(entry); + }); + + console.log(`Processed ${++processedCount} / ${totalCount} (${Math.round(processedCount / totalCount * 100)}%)`, file); + } catch (ex) { + console.log(`Failed on ${file}`, ex); + } +}); + +console.log('Writing jsonlines files'); +function writeOutputFile(name, str) { + fs.writeFileSync(name, str); + fs.writeFileSync(name + '.gz', Buffer.from(zlib.gzipSync(str))); +} +writeOutputFile('subcategories.jsonlines', database.subcategories.map(d => JSON.stringify(d)).join('\n')); +writeOutputFile('components.jsonlines', database.components.map(d => JSON.stringify(d)).join('\n')); +writeOutputFile('attributes-lut.jsonlines', lutToArray(database.attributesLut).map(d => JSON.stringify(d)).join('\n')); + +execSync('tar -cf all.jsonlines.tar *.jsonlines.gz'); + +console.log(`Processing took ${Math.round((new Date().getTime() - startTime) / 6000) / 10} minutes`); \ No newline at end of file diff --git a/web/src/app.js b/web/src/app.js index aa97aab5c86..4cbdc0cc411 100644 --- a/web/src/app.js +++ b/web/src/app.js @@ -6,13 +6,14 @@ import { NavLink } from "react-router-dom"; + import { library } from '@fortawesome/fontawesome-svg-core' import { fas } from '@fortawesome/free-solid-svg-icons' import { far } from '@fortawesome/free-regular-svg-icons' import { fab } from '@fortawesome/free-brands-svg-icons' import './main.css'; -import { updateComponentLibrary, checkForComponentLibraryUpdate, db } from './db' +import { updateComponentLibrary, checkForComponentLibraryUpdate, db, unpackLinesAsArray, haveComponents } from './db' import { ComponentOverview } from './componentTable' import { History } from './history' @@ -79,8 +80,8 @@ class FirstTimeNote extends React.Component { } componentDidMount() { - db.components.count().then(x => { - this.setState({componentCount: x}); + unpackLinesAsArray('components').then(components => { + this.setState({componentCount: components.length - 1}); // don't count the schema entry }) } @@ -110,9 +111,11 @@ class NewComponentFormatWarning extends React.Component { } componentDidMount() { - db.components.toCollection().first().then(x => { - if (x !== undefined && typeof x.attributes[Object.keys(x.attributes)[0]] !== 'object') - this.setState({newComponentFormat: false}); + // I don't know if newComponentFormat will work like this + unpackLinesAsArray('subcategories').then(cats => { + if (cats.size > 1) { + this.setState({newComponentFormat: false}); + } }); } diff --git a/web/src/componentTable.js b/web/src/componentTable.js index 5de2d158882..084f0f78750 100644 --- a/web/src/componentTable.js +++ b/web/src/componentTable.js @@ -1,4 +1,4 @@ -import { db } from "./db"; +import { unpackAndProcessLines, unpackLinesAsArray} from "./db"; import React from "react"; import { produce, enableMapSet } from "immer"; import { FontAwesomeIcon } from '@fortawesome/react-fontawesome' @@ -161,14 +161,27 @@ export class ComponentOverview extends React.Component { } componentDidMount() { - db.categories.toArray().then( categories => { + (async () => { + // generate categories array + let subCats = (await unpackLinesAsArray('subcategories')).map(str => JSON.parse(str)); + + let schema = subCats[0]; // first entry is always the schema lookup + let cats = subCats.filter((sc, i) => i > 0).map((sc, id) => ({ + id: id + 1, + category: sc[schema.category], + subcategory: sc[schema.subcategory], + sourcename: sc[schema.sourcename], + stockhash: 0, // not needed + datahash: 0 // not needed + })); + this.setState({ - categories: this.prepareCategories(categories), - rawCategories: categories + categories: this.prepareCategories(cats), + rawCategories: cats }); - }) + })(); } - + prepareCategories(sourceCategories) { let categories = {}; for (const category of sourceCategories) { @@ -640,32 +653,84 @@ class CategoryFilter extends React.Component { // full-text search async components() { this.state.abort(); - let query; + + let categoryFilter = (cat) => true; + if (this.state.allCategories) { if (this.state.searchString.length < 3) { // prevent high ram usage return []; } - query = db.components; } - else - query = db.components.where("category").anyOf(this.collectActiveCategories()); - + else { + const catIds = this.collectActiveCategories(); + const catIdLookup = new Set(catIds); + categoryFilter = (catid) => catIdLookup.has(catid); + } + + let results = []; + let words = []; if (this.state.searchString.length !== 0) { - const words = this.state.searchString.split(/\s+/) + words = this.state.searchString.split(/\s+/) .filter(x => x.length > 0) .map(x => x.toLocaleLowerCase()); - if (words.length > 0) { - query = query.filter(component => { - const text = componentText(component); - return words.every(word => text.includes(word)); - }); - } } let aborted = false; this.setState({abort: () => aborted = true}); - const components = await query.until(() => aborted).toArray(); - return aborted ? null : components; + + let schema; + await unpackAndProcessLines('components', (comp, idx) => { + comp = JSON.parse(comp); + + if (idx === 0) { // first line is always schema lookup + schema = comp; + } else { + if (categoryFilter(comp[schema.subcategoryIdx])) { + let component = { + lcsc: comp[schema.lcsc], + mfr: comp[schema.mfr], + description: comp[schema.description], + attrsIdx: comp[schema.attrsIdx], + stock: comp[schema.stock], + category: comp[schema.subcategoryIdx], + componentIdx: idx, + joints: comp[schema.joints], + datasheet: comp[schema.datasheet], + price: comp[schema.price], + img: comp[schema.img], + url: comp[schema.url] + }; + + if (words.length > 0) { + const text = componentText(component); + if(words.every(word => text.includes(word))) { + results.push(component); + } + } else { + results.push(component); + } + } + } + }, () => aborted); + + if (aborted) { + return null; + } + + if (results.length > 0) { + let resultLookup = {}; + results.forEach(res => resultLookup[res.componentIdx] = res); + + const attributesLut = await unpackLinesAsArray('attributes-lut'); + results.forEach(res => { + res.attributes = {}; + res.attrsIdx.map(idx => JSON.parse(attributesLut[idx])).forEach(entry => { + res.attributes[entry[0]] = entry[1]; + }); + }); + } + + return results; } handleCategoryChange = (category, value) => { diff --git a/web/src/db.js b/web/src/db.js index 08c8d1a4d82..0ac34f8db0f 100644 --- a/web/src/db.js +++ b/web/src/db.js @@ -12,10 +12,9 @@ return await navigator.storage?.persist?.(); } export const db = new Dexie('jlcparts'); -db.version(1).stores({ +db.version(2).stores({ settings: 'key', - components: 'lcsc, category, mfr, *indexWords', - categories: 'id++,[category+subcategory], subcategory, category' + jsonlines: 'name' }); function extractCategoryKey(category) { @@ -24,10 +23,141 @@ function extractCategoryKey(category) { const SOURCE_PATH = "data"; +let jsonlines = {}; // copy of the database in memory so we only access the database once (doesn't really matter - it would be pretty fast anyway) +async function getJsonlines() { + if (Object.keys(jsonlines).length === 0) { + (await db.jsonlines.toArray()).forEach(obj => { + jsonlines[obj.name] = obj.compressedData + }); + } + return jsonlines; +} + +export async function haveComponents() { + await getJsonlines(); + return jsonlines['components']?.size; // TODO: check if this should be .length +} + +export async function unpackLinesAsArray(name) { + let arr = []; + await unpackAndProcessLines(name, (val, idx) => arr.push(val)); + return arr; +} + +async function yieldExec() { + return new Promise((resolve, reject) => { + setTimeout(() => resolve(), 0); + }); +} + +export async function unpackAndProcessLines(name, callback, checkAbort) { + await getJsonlines(); + + if (jsonlines[name] === undefined) { + return; + } + + let time = new Date().getTime(); + + if (!window.DecompressionStream) { + console.error("DecompressionStream is not supported in this environment."); + return; + } + + // Step 1: Create a DecompressionStream for gzip + const decompressionStream = new window.DecompressionStream('gzip'); + + // Convert the ArrayBuffer to a ReadableStream + const inputStream = new ReadableStream({ + start(controller) { + controller.enqueue(jsonlines[name]); + controller.close(); + }, + }); + + // Pipe the input stream through the decompression stream + const decompressedStream = inputStream.pipeThrough(decompressionStream); + + // Step 2: Convert the stream into text + const textStream = decompressedStream.pipeThrough(new window.TextDecoderStream()); + + // Step 3: Create a reader to read the stream line by line + const reader = textStream.getReader(); + let chunk = ''; + let idx = 0; + let lastYield = new Date().getTime(); + + try { + while (true) { + const now = new Date().getTime(); + + // Periodically allow UI to do what it needs to, including updating any abort flag. + // This does slow down the this function a variable amount (could be <100ms, could be a few seconds) + if (now - lastYield > 300) { + await yieldExec(); + console.log('yielded for ', new Date().getTime() - now, 'ms'); + lastYield = new Date().getTime(); + + if (checkAbort && checkAbort()) { // check abort flag + break; + } + } + + + const { done, value } = await reader.read(); + if (done) { + // If there's any remaining line, process it as well -- should never happen + if (chunk) { + callback(chunk, idx++); + } + break; + } + + // Decode the chunk to a string + chunk += value; + + let start = 0; + while(true) { + let pos = chunk.indexOf('\n', start); + if (pos >= 0) { + if (callback(chunk.slice(start, pos), idx++) === 'abort') { + break; // quit early + } + start = pos + 1; + } else { + chunk = chunk.slice(start); // dump everything that we've processed + break; // no more lines in our chunk + } + } + } + + console.log(`Time to gunzip & segment ${name}: ${new Date().getTime() - time}`); + } finally { + reader.releaseLock(); + } +} + // Updates the whole component library, takes a callback for reporting progress: // the progress is given as list of tuples (task, [statusMessage, finished]) export async function updateComponentLibrary(report) { await persist(); + + // get new db files + const resp = await fetch(`${SOURCE_PATH}/all.jsonlines.tar`); + if (resp.status === 200) { + const data = await resp.arrayBuffer(); + const files = await untar(data); + for (const file of files) { + const basename = file.name.split('.')[0]; + let result = await db.jsonlines.put({name: basename, compressedData: file.buffer}); + console.log(result); + + // store copy in memory (we can load from indexeddb on startup) + jsonlines[basename] = file.buffer; + } + } + + report({"Component index": ["fetching", false]}) let index = await fetchJson(`${SOURCE_PATH}/index.json`, "Cannot fetch categories index: "); diff --git a/web/src/history.js b/web/src/history.js index 29aea60d93c..29a86b4a69e 100644 --- a/web/src/history.js +++ b/web/src/history.js @@ -1,5 +1,5 @@ import React from 'react'; -import { fetchJson, db } from './db' +import { fetchJson, unpackAndProcessLines, unpackLinesAsArray } from './db' import { Spinbox, InlineSpinbox, ZoomableLazyImage, formatAttribute, findCategoryById, getImageUrl, restoreLcscUrl } from './componentTable' @@ -19,8 +19,17 @@ class HistoryItem extends React.Component { } componentDidMount() { - db.components.get({lcsc: this.props.lcsc}).then( component => { - this.setState({info: component}); + let schema; + unpackAndProcessLines('components', (component, idx) => { + component = JSON.parse(component); + if (idx === 0) { // first entry is schema + schema = component; + } else { + if (component[schema.lcsc] === this.props.lcsc) { + this.setState({info: component}); + return 'abort'; // done + } + } }); } @@ -152,7 +161,10 @@ class HistoryTable extends React.Component { log.sort((a, b) => b.day - a.day); this.setState({table: log}); }); - db.categories.toArray().then( categories => this.setState({categories}) ); + + unpackLinesAsArray('subcategories').then(cats => { + this.setState({categories: cats.filter((c,i) => i > 0).map(s => JSON.parse(s))}); + }); } render() { From 21df05208cebb3e4d8f8b3153d034d7a6b692102 Mon Sep 17 00:00:00 2001 From: doug Date: Mon, 19 Feb 2024 20:30:23 +1000 Subject: [PATCH 06/13] Generate new db combined data file in github action --- .github/workflows/update_components.yaml | 3 ++ jlcparts/datatables.py | 13 ------- .../generateJsonlinesDatabaseFiles.js | 39 ++++++++++++------- 3 files changed, 28 insertions(+), 27 deletions(-) rename web/processData.js => jlcparts/generateJsonlinesDatabaseFiles.js (65%) diff --git a/.github/workflows/update_components.yaml b/.github/workflows/update_components.yaml index a4ff063bc1e..b9361d9a5a1 100644 --- a/.github/workflows/update_components.yaml +++ b/.github/workflows/update_components.yaml @@ -61,6 +61,9 @@ jobs: rm -f web/build/data/cache.z* zip -s 50m web/build/data/cache.zip cache.sqlite3 + + node generateJsonlinesDatabaseFiles.js + - name: Tar artifact # Artifact are case insensitive, this is workaround run: tar -czf web_build.tar.gz web/build/ - name: Upload artifact diff --git a/jlcparts/datatables.py b/jlcparts/datatables.py index ea7931db174..6e52805f30c 100644 --- a/jlcparts/datatables.py +++ b/jlcparts/datatables.py @@ -30,18 +30,6 @@ def saveJson(object, filename, hash=False, pretty=False, compress=False): f.write(hash) return hash -def saveAllDataArchive(path): - patterns = ['*.json', '*.json.gz', '*.sha256'] - - with tarfile.open(os.path.join(path, 'all-data-1.tar.gz'), 'w:gz') as tar1: - with tarfile.open(os.path.join(path, 'all-data-2.tar.gz'), 'w:gz') as tar2: - for pattern in patterns: - for file in glob.glob(os.path.join(path, pattern)): - if random.randint(1, 2) == 1: - tar1.add(file, arcname=os.path.relpath(file, start=path)) - else: - tar2.add(file, arcname=os.path.relpath(file, start=path)) - def weakUpdateParameters(attrs, newParameters): for attr, value in newParameters.items(): if attr in attrs and attrs[attr] not in ["", "-"]: @@ -398,4 +386,3 @@ def buildtables(library, outdir, ignoreoldstock, jobs): "created": datetime.datetime.now().astimezone().replace(microsecond=0).isoformat() } saveJson(index, os.path.join(outdir, "index.json"), hash=True) - saveAllDataArchive(outdir) \ No newline at end of file diff --git a/web/processData.js b/jlcparts/generateJsonlinesDatabaseFiles.js similarity index 65% rename from web/processData.js rename to jlcparts/generateJsonlinesDatabaseFiles.js index 319c19eb0d0..df94f5f421d 100644 --- a/web/processData.js +++ b/jlcparts/generateJsonlinesDatabaseFiles.js @@ -1,3 +1,13 @@ +/* +This program loads all the category/stock *.json.gz and *.stock.json files and combines them +into three files, whose contents are a single JSON object per line: + - attributes-lut.jsonlines - each line is an attribute, and components will contain a list of attribute indices (the index is the line number) + - subcategories.jsonlines - each line is a subcategory + - components.jsonlines - each line is a component; references attributes and subcategory by there line number + +These files are then packaged into a .tar file, allowing a single file to be downloaded to update the entire database with new components and stock levels. +This reprocessing program is a bit slow, and takes of the order of 10 minutes. +*/ const fs = require('fs'); const path = require('path'); @@ -5,9 +15,9 @@ const zlib = require("zlib"); const process = require('process'); const { execSync } = require('child_process'); -const directoryPath = 'public/data'; +const dataPath = 'web/public/data'; -try{process.chdir('web');}catch(ex){} // debug path is 'web/..' +try{process.chdir('web/..');}catch(ex){} // debug path is 'web/..' function foreachJsonFile(directory, processFunc) { try { @@ -41,7 +51,7 @@ function foreachJsonFile(directory, processFunc) { } } -// Call the function +// this contains the output database table contents let database = { subcategories: [schemaToLookup(['subcategory', 'category', 'sourcename'])], components: [schemaToLookup(['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url'])], @@ -55,7 +65,8 @@ function updateLut(lut, obj) { return lut[JSON.stringify(obj)] ??= Object.keys(lut).length; } -// inverts the lut so that the object becomes an array, with the key being the value (values must be 0-based, numeric, and contiguous) +// Inverts the lut so that the object becomes an array, with the key being the value. +// Values must be 0-based, numeric, and contiguous, or everything will be wrong. function lutToArray(lut) { return Object.entries(lut).sort((a, b) => a[1] - b[1]).map(x => x[0] ? JSON.parse(x[0]) : null); } @@ -69,16 +80,16 @@ function schemaToLookup(arr) { const startTime = new Date().getTime(); // populate the stock lookup -foreachJsonFile(directoryPath, (file, getObj) => { +foreachJsonFile(dataPath, (file, getObj) => { if (file.includes('.stock.json')) { Object.assign(database.stock, getObj()); } }); let processedCount = 0; -const totalCount = fs.readdirSync(directoryPath).filter(file => /\.json\.gz$/.test(file)).length; +const totalCount = fs.readdirSync(dataPath).filter(file => /\.json\.gz$/.test(file)).length; -foreachJsonFile(directoryPath, (file, getObj) => { +foreachJsonFile(dataPath, (file, getObj) => { if (file.includes('.stock.json')) { return; } @@ -92,7 +103,7 @@ foreachJsonFile(directoryPath, (file, getObj) => { try { //input schema = ["lcsc", "mfr", "joints", "description","datasheet", "price", "img", "url", "attributes"] // components schema ['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url'] - const s = schemaToLookup(obj.schema); + const s = schemaToLookup(obj.schema); // input schema obj.components.forEach(comp => { let entry = [ comp[s.lcsc], @@ -118,13 +129,13 @@ foreachJsonFile(directoryPath, (file, getObj) => { console.log('Writing jsonlines files'); function writeOutputFile(name, str) { - fs.writeFileSync(name, str); + //fs.writeFileSync(name, str); fs.writeFileSync(name + '.gz', Buffer.from(zlib.gzipSync(str))); } -writeOutputFile('subcategories.jsonlines', database.subcategories.map(d => JSON.stringify(d)).join('\n')); -writeOutputFile('components.jsonlines', database.components.map(d => JSON.stringify(d)).join('\n')); -writeOutputFile('attributes-lut.jsonlines', lutToArray(database.attributesLut).map(d => JSON.stringify(d)).join('\n')); +writeOutputFile(`${dataPath}/subcategories.jsonlines`, database.subcategories.map(d => JSON.stringify(d)).join('\n')); +writeOutputFile(`${dataPath}/components.jsonlines`, database.components.map(d => JSON.stringify(d)).join('\n')); +writeOutputFile(`${dataPath}/attributes-lut.jsonlines`, lutToArray(database.attributesLut).map(d => JSON.stringify(d)).join('\n')); -execSync('tar -cf all.jsonlines.tar *.jsonlines.gz'); +execSync(`(cd ${dataPath} && tar -cf all.jsonlines.tar *.jsonlines.gz)`); -console.log(`Processing took ${Math.round((new Date().getTime() - startTime) / 6000) / 10} minutes`); \ No newline at end of file +console.log(`Reprocessing took ${Math.round((new Date().getTime() - startTime) / 6000) / 10} minutes`); From d4dacd61f85686fc228076361f9cb69182ffd538 Mon Sep 17 00:00:00 2001 From: doug Date: Mon, 19 Feb 2024 21:32:20 +1000 Subject: [PATCH 07/13] Update first-time and update-available code for new db. Remove old update db code. --- web/src/app.js | 6 +- web/src/db.js | 344 ++++++------------------------------------------- 2 files changed, 44 insertions(+), 306 deletions(-) diff --git a/web/src/app.js b/web/src/app.js index 4cbdc0cc411..a22c5b51e26 100644 --- a/web/src/app.js +++ b/web/src/app.js @@ -13,7 +13,7 @@ import { far } from '@fortawesome/free-regular-svg-icons' import { fab } from '@fortawesome/free-brands-svg-icons' import './main.css'; -import { updateComponentLibrary, checkForComponentLibraryUpdate, db, unpackLinesAsArray, haveComponents } from './db' +import { updateComponentLibrary, checkForComponentLibraryUpdate, db, unpackLinesAsArray } from './db' import { ComponentOverview } from './componentTable' import { History } from './history' @@ -81,7 +81,7 @@ class FirstTimeNote extends React.Component { componentDidMount() { unpackLinesAsArray('components').then(components => { - this.setState({componentCount: components.length - 1}); // don't count the schema entry + this.setState({componentCount: Math.max(0, components.length - 1)}); // don't count the schema entry }) } @@ -145,7 +145,7 @@ class UpdateBar extends React.Component { this.setState({updateAvailable}); }); db.settings.get("lastUpdate").then(lastUpdate => { - this.setState({lastUpdate}); + this.setState({lastUpdate: lastUpdate?.value}); }) }; diff --git a/web/src/db.js b/web/src/db.js index 0ac34f8db0f..746dc1e99f8 100644 --- a/web/src/db.js +++ b/web/src/db.js @@ -4,11 +4,11 @@ import untar from "js-untar"; if (!window.indexedDB) { alert("This page requires IndexedDB to work.\n" + - "Your browser does not support it. Please upgrade your browser."); + "Your browser does not support it. Please upgrade your browser."); } async function persist() { -return await navigator.storage?.persist?.(); + return await navigator.storage?.persist?.(); } export const db = new Dexie('jlcparts'); @@ -17,11 +17,9 @@ db.version(2).stores({ jsonlines: 'name' }); -function extractCategoryKey(category) { - return category.id; -} const SOURCE_PATH = "data"; +const dbWebPath = `${SOURCE_PATH}/all.jsonlines.tar`; let jsonlines = {}; // copy of the database in memory so we only access the database once (doesn't really matter - it would be pretty fast anyway) async function getJsonlines() { @@ -33,12 +31,7 @@ async function getJsonlines() { return jsonlines; } -export async function haveComponents() { - await getJsonlines(); - return jsonlines['components']?.size; // TODO: check if this should be .length -} - -export async function unpackLinesAsArray(name) { +export async function unpackLinesAsArray(name) { let arr = []; await unpackAndProcessLines(name, (val, idx) => arr.push(val)); return arr; @@ -58,7 +51,7 @@ export async function unpackAndProcessLines(name, callback, checkAbort) { } let time = new Date().getTime(); - + if (!window.DecompressionStream) { console.error("DecompressionStream is not supported in this environment."); return; @@ -89,10 +82,10 @@ export async function unpackAndProcessLines(name, callback, checkAbort) { try { while (true) { - const now = new Date().getTime(); // Periodically allow UI to do what it needs to, including updating any abort flag. // This does slow down the this function a variable amount (could be <100ms, could be a few seconds) + const now = new Date().getTime(); if (now - lastYield > 300) { await yieldExec(); console.log('yielded for ', new Date().getTime() - now, 'ms'); @@ -102,7 +95,7 @@ export async function unpackAndProcessLines(name, callback, checkAbort) { break; } } - + const { done, value } = await reader.read(); if (done) { @@ -117,14 +110,14 @@ export async function unpackAndProcessLines(name, callback, checkAbort) { chunk += value; let start = 0; - while(true) { + while (true) { let pos = chunk.indexOf('\n', start); if (pos >= 0) { if (callback(chunk.slice(start, pos), idx++) === 'abort') { break; // quit early } start = pos + 1; - } else { + } else { chunk = chunk.slice(start); // dump everything that we've processed break; // no more lines in our chunk } @@ -142,179 +135,62 @@ export async function unpackAndProcessLines(name, callback, checkAbort) { export async function updateComponentLibrary(report) { await persist(); + let progress = {}; + let updateProgress = (name, status) => { + progress[name] = status; + report(progress); + }; + // get new db files - const resp = await fetch(`${SOURCE_PATH}/all.jsonlines.tar`); + const downloadingTitle = `Downloading ${dbWebPath}`; + updateProgress(downloadingTitle, ["In progress", false]); + const resp = await fetch(dbWebPath); if (resp.status === 200) { const data = await resp.arrayBuffer(); + updateProgress(downloadingTitle, ["OK", false]); + + const untarTitle = `Updating database`; + updateProgress(untarTitle, ["In progress", false]); + const files = await untar(data); for (const file of files) { const basename = file.name.split('.')[0]; - let result = await db.jsonlines.put({name: basename, compressedData: file.buffer}); + let result = await db.jsonlines.put({ name: basename, compressedData: file.buffer }); console.log(result); // store copy in memory (we can load from indexeddb on startup) jsonlines[basename] = file.buffer; } - } + updateProgress(untarTitle, ["OK", true]); - report({"Component index": ["fetching", false]}) - let index = await fetchJson(`${SOURCE_PATH}/index.json`, - "Cannot fetch categories index: "); - let progress = {} - let updateProgress = (name, status) => { - progress[name] = status; - report(progress); + db.settings.put({ + key: "lastUpdate", + value: resp.headers.get('Last-Modified') || new Date().toUTCString() + }); + + } else { + updateProgress(downloadingTitle, ["Download failed", false]); } - db.settings.put({key: "lastDbUpdate", value: index.created}) - await updateCategories(index.categories, - // onNew - async (cName, sName, attr) => { - let name = cName + ": " + sName; - updateProgress(name, ["Adding components 1/2", false]); - let category = await addCategory(cName, sName, attr); - updateProgress(name, ["Updating stock 2/2", false]); - await updateStock(category); - updateProgress(name, ["Added", true]); - return category; - }, - // onUpdateExisting - async (category, attr) => { - let cName = category.category; - let sName = category.subcategory; - let name = cName + ": " + sName; - updateProgress(name, ["Updating components 1/2", false]); - await deleteCategory(category); - let newCategory = await addCategory(cName, sName, attr); - updateProgress(name, ["Updating stock 2/2", false]); - await updateStock(newCategory); - updateProgress(name, ["Update finished", true]); - return newCategory; - }, - // onUpdateStock - async (category, _) => { - let cName = category.category; - let sName = category.subcategory; - let name = cName + ": " + sName; - updateProgress(name, ["Updating stock 1/1", false]); - await updateStock(category); - updateProgress(name, ["Stock updated", true]); - return category; - }, - // onExcessive - async category => { - let cName = category.category; - let sName = category.subcategory; - let name = cName + ": " + sName; - updateProgress(name, ["Removing category", false]); - await deleteCategory(category); - updateProgress(name, ["Removed", true]); - } - ); } // Check if the component library can be updated export async function checkForComponentLibraryUpdate() { - let index = await fetchJson(`${SOURCE_PATH}/index.json`, - "Cannot fetch categories index: "); - let updateAvailable = false; - let onUpdate = (category) => { updateAvailable = true; return category; } - await updateCategories(index.categories, - // onNew - onUpdate, - // onUpdateExisting - onUpdate, - // onUpdateStock - onUpdate, - // onExcessive - onUpdate - ); - return updateAvailable; -} + let lastUpdate = (await db.settings.get("lastUpdate"))?.value || new Date(0).toUTCString(); -// contains data from all-data.tar.gz -let allData = { - filesPromise: null, - fetchSingle: async function(url) { // fetchs a single chunk of the combined files - try { - const resp = await fetch(url); - if (resp.status === 200) { - const compressedData = await resp.arrayBuffer(); - const data = pako.ungzip(compressedData); - const files = await untar(data.buffer); - const fileData = {}; - for (const file of files) { - fileData[`${SOURCE_PATH}/${file.name}`.toLowerCase()] = file.buffer; - } - return fileData; - } else { - return {}; // failed to download/unpack - } - } catch (ex) { - console.log('Failed to fetch all-data.tar.gz', ex); - return {}; // failed to download/unpack + let head = await fetch(dbWebPath, { + method: 'HEAD', + headers: { + 'If-Modified-Since': lastUpdate } - }, - fetch: async function(path, expectJson) { // returns promise; resolves as data on success, and null on failure - return new Promise(async (resolve, reject) => { - if (this.filesPromise === null) { - this.filesPromise = new Promise(async (resolve, reject) => { - try { - const ChunkCount = 2; - let data = {}; - for (let i = 0; i < ChunkCount; i++) { - Object.assign(data, await this.fetchSingle(`${SOURCE_PATH}/all-data-${i + 1}.tar.gz`)); - } - - if (Object.keys(data).length > 0) { - resolve(data); - } else { - resolve(null); - } - } catch(ex) { - resolve(null); - } - }); - } + }); - const files = await this.filesPromise; - - if (files) { - const fileData = files[path.toLowerCase()]; - if (fileData){ - if (expectJson) { - if (path.slice(-3) === '.gz') { - resolve(JSON.parse(pako.ungzip(fileData, {to: 'string'}))); - } else { - const decoder = new TextDecoder(); - resolve(JSON.parse(decoder.decode(fileData))); - } - } else { - const decoder = new TextDecoder(); - resolve(decoder.decode(fileData)); - } - } else { - //reject(`${path} not found`); - resolve(null); - } - } else { - //reject('All data not available'); - resolve(null); - } - }); - } -}; + let updateAvailable = head.status === 200; // 304 if not modified; any error means we don't know if there's an update + return updateAvailable; +} // Fetch a JSON. If error occures, export async function fetchJson(path, errorIntro) { - if (path.indexOf('/index.json') < 0) { - // try from all data combined file first - const data = await allData.fetch(path, true); - if (data) { - return data; - } - } - let response = await fetch(path); if (!response.ok) { throw Error(errorIntro + response.statusText); @@ -341,141 +217,3 @@ export async function fetchJson(path, errorIntro) { throw Error(errorIntro + `Response is not a (compressed) JSON, but ${contentType}: ` + path); } - -async function fetchText(path, errorIntro) { - // try from all data combined file first - const data = await allData.fetch(path, false); - if (data) { - return data; - } - - let response = await fetch(path); - if (!response.ok) { - throw Error(errorIntro + response.statusText); - } - return await response.text(); -} - -// Update categories. Fetched categoryIndex and 3 callback are supplied to -// perform the update. -async function updateCategories(categoryIndex, onNew, onUpdateExisting, onUpdateStock, onExcessive) { - let updates = []; - let usedCategories = new Set(); - for (const [categoryName, subcategories] of Object.entries(categoryIndex)) { - for ( const [subcategoryName, attributes] of Object.entries(subcategories)) { - let action = db.categories - .where({category: categoryName, subcategory: subcategoryName}) - .first(async category => { - if (category === undefined) { - category = await onNew(categoryName, subcategoryName, attributes); - } else if (attributes.datahash !== category.datahash || - attributes.sourcename !== category.sourcename) - { - category = await onUpdateExisting(category, attributes); - } else if (attributes.stockhash !== category.stockhash) { - category = await onUpdateStock(category); - } - - if (category) { - usedCategories.add(extractCategoryKey(category)); - } - }); - updates.push(action); - } - } - await Promise.all(updates); - await db.categories.each(category => { - if (usedCategories.has(extractCategoryKey(category))) { - return; - } - onExcessive(category); - }); -} - -// Takes an array containing schema and an array of values and turns them into -// dictionary -function restoreObject(schema, source) { - return schema.reduce((obj, k, i) => { - obj[k] = source[i]; - return obj; - }, {}); -} - -// Takes a JSON fetched from server and adds them to the database for the -// corresponding category -function addComponents(category, components) { - let schema = components.schema; - let cObjects = components.components.map(src => { - let obj = restoreObject(schema, src); - obj.category = extractCategoryKey(category); - return obj; - }); - return db.components.bulkPut(cObjects); -} - -// Add a single category and fetch all of its components -async function addCategory(categoryName, subcategoryName, attributes) { - let components = await fetchJson(`${SOURCE_PATH}/${attributes.sourcename}.json.gz`, - `Cannot fetch components for category ${categoryName}: ${subcategoryName}: `); - return db.transaction("rw", db.categories, db.components, async () => { - let key = await db.categories.put({ - category: categoryName, - subcategory: subcategoryName, - sourcename: attributes.sourcename, - datahash: attributes.datahash, - stockhash: attributes.stockhash - }); - let category = await db.categories.get(key); - await addComponents(category, components); - return category; - }); -} - -// Fetch and update stock -async function updateStock(category) { - let stock = await fetchJson(`${SOURCE_PATH}/${category.sourcename}.stock.json`, - `Cannot fetch stock for category ${category.category}: ${category.subcategory}: `); - await db.components.where({category: category.id}).modify(component =>{ - component.stock = stock[component.lcsc]; - }); - // await db.transaction("rw", db.components, async () => { - // let actions = []; - // for (const [component, stockVal] of Object.entries(stock)) { - // actions.push(db.components.update(component, {"stock": stockVal })); - // } - // await Promise.all(actions); - // }); - let hash = await fetchText(`${SOURCE_PATH}/${category.sourcename}.stock.json.sha256`, - `Cannot fetch stock hash for category ${category.category}: ${category.subcategory}: `); - await db.categories.update(extractCategoryKey(category), {stockhash: hash}); -} - -// Delete given category and all of its components -async function deleteCategory(category) { - await db.transaction("rw", db.components, db.categories, async () => { - await db.components.where({category: extractCategoryKey(category)}).delete(); - await db.categories.delete(extractCategoryKey(category)); - }); -} - - -// See https://stackoverflow.com/questions/64114482/aborting-dexie-js-query -// export function cancellableDexieQuery(includedTables, querierFunction) { -// let tx = null; -// let cancelled = false; -// const promise = db.transaction('r', includedTables, () => { -// if (cancelled) -// throw new Dexie.AbortError('Query was cancelled'); -// tx = Dexie.currentTransaction; -// return querierFunction(); -// }); -// return [ -// promise, -// () => { -// cancelled = true; // In case transaction hasn't been started yet. -// if (tx) -// tx.abort(); // If started, abort it. -// tx = null; // Avoid calling abort twice. -// } -// ]; -// } \ No newline at end of file From 5f1ce99d13b6df3978e8ffe5cd660a49d2099c2c Mon Sep 17 00:00:00 2001 From: doug Date: Mon, 19 Feb 2024 23:05:10 +1000 Subject: [PATCH 08/13] Reverting inconsequential changes to datatables.py --- jlcparts/datatables.py | 6 +----- jlcparts/generateJsonlinesDatabaseFiles.js | 2 +- web/src/db.js | 7 ++----- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/jlcparts/datatables.py b/jlcparts/datatables.py index 6e52805f30c..f3fe8cc57e2 100644 --- a/jlcparts/datatables.py +++ b/jlcparts/datatables.py @@ -13,10 +13,6 @@ from jlcparts.common import sha256file from jlcparts import attributes, descriptionAttributes -import tarfile -import glob -import random - def saveJson(object, filename, hash=False, pretty=False, compress=False): openFn = gzip.open if compress else open with openFn(filename, "wt", encoding="utf-8") as f: @@ -29,7 +25,7 @@ def saveJson(object, filename, hash=False, pretty=False, compress=False): hash = sha256file(filename) f.write(hash) return hash - + def weakUpdateParameters(attrs, newParameters): for attr, value in newParameters.items(): if attr in attrs and attrs[attr] not in ["", "-"]: diff --git a/jlcparts/generateJsonlinesDatabaseFiles.js b/jlcparts/generateJsonlinesDatabaseFiles.js index df94f5f421d..b5a522296bc 100644 --- a/jlcparts/generateJsonlinesDatabaseFiles.js +++ b/jlcparts/generateJsonlinesDatabaseFiles.js @@ -3,7 +3,7 @@ This program loads all the category/stock *.json.gz and *.stock.json files and c into three files, whose contents are a single JSON object per line: - attributes-lut.jsonlines - each line is an attribute, and components will contain a list of attribute indices (the index is the line number) - subcategories.jsonlines - each line is a subcategory - - components.jsonlines - each line is a component; references attributes and subcategory by there line number + - components.jsonlines - each line is a component; references attributes and subcategory by their line number These files are then packaged into a .tar file, allowing a single file to be downloaded to update the entire database with new components and stock levels. This reprocessing program is a bit slow, and takes of the order of 10 minutes. diff --git a/web/src/db.js b/web/src/db.js index 746dc1e99f8..85cf7e62caf 100644 --- a/web/src/db.js +++ b/web/src/db.js @@ -57,7 +57,6 @@ export async function unpackAndProcessLines(name, callback, checkAbort) { return; } - // Step 1: Create a DecompressionStream for gzip const decompressionStream = new window.DecompressionStream('gzip'); // Convert the ArrayBuffer to a ReadableStream @@ -71,11 +70,10 @@ export async function unpackAndProcessLines(name, callback, checkAbort) { // Pipe the input stream through the decompression stream const decompressedStream = inputStream.pipeThrough(decompressionStream); - // Step 2: Convert the stream into text + // Convert the stream into text const textStream = decompressedStream.pipeThrough(new window.TextDecoderStream()); - // Step 3: Create a reader to read the stream line by line - const reader = textStream.getReader(); + const reader = textStream.getReader(); // to read chunks of text from stream let chunk = ''; let idx = 0; let lastYield = new Date().getTime(); @@ -106,7 +104,6 @@ export async function unpackAndProcessLines(name, callback, checkAbort) { break; } - // Decode the chunk to a string chunk += value; let start = 0; From 56f4d38e96446632552364770d2892d192a910f2 Mon Sep 17 00:00:00 2001 From: doug Date: Tue, 20 Feb 2024 02:07:10 +1000 Subject: [PATCH 09/13] Fix update download finished status --- web/src/db.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/db.js b/web/src/db.js index 85cf7e62caf..fe4fa164514 100644 --- a/web/src/db.js +++ b/web/src/db.js @@ -144,7 +144,7 @@ export async function updateComponentLibrary(report) { const resp = await fetch(dbWebPath); if (resp.status === 200) { const data = await resp.arrayBuffer(); - updateProgress(downloadingTitle, ["OK", false]); + updateProgress(downloadingTitle, ["OK", true]); const untarTitle = `Updating database`; updateProgress(untarTitle, ["In progress", false]); From e3127651a4e6eb08dc306458e13def2e93b19ce8 Mon Sep 17 00:00:00 2001 From: doug Date: Tue, 20 Feb 2024 02:35:56 +1000 Subject: [PATCH 10/13] Update readme.md to include the generateJsonlinesDatabaseFiles step --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d3b121a4633..3c2159ab683 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,7 @@ $ wget https://yaqwsx.github.io/jlcparts/data/cache.zip https://yaqwsx.github.io $ 7z x cache.zip $ mkdir -p web/public/data/ $ jlcparts buildtables --jobs 0 --ignoreoldstock 30 cache.sqlite3 web/public/data +$ node generateJsonlinesDatabaseFiles.js ``` To launch the frontend web server, run: From 26fbcbdbf06f31f229d9d54b18881c8bfcfadf78 Mon Sep 17 00:00:00 2001 From: doug Date: Tue, 20 Feb 2024 23:31:55 +1000 Subject: [PATCH 11/13] Fix slow generateJsonlinesDatabaseFiles.js processing (now takes <15seconds) --- jlcparts/generateJsonlinesDatabaseFiles.js | 28 ++++++++++++---------- web/src/componentTable.js | 3 --- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/jlcparts/generateJsonlinesDatabaseFiles.js b/jlcparts/generateJsonlinesDatabaseFiles.js index b5a522296bc..5c49e40e810 100644 --- a/jlcparts/generateJsonlinesDatabaseFiles.js +++ b/jlcparts/generateJsonlinesDatabaseFiles.js @@ -6,7 +6,6 @@ into three files, whose contents are a single JSON object per line: - components.jsonlines - each line is a component; references attributes and subcategory by their line number These files are then packaged into a .tar file, allowing a single file to be downloaded to update the entire database with new components and stock levels. -This reprocessing program is a bit slow, and takes of the order of 10 minutes. */ const fs = require('fs'); @@ -15,9 +14,8 @@ const zlib = require("zlib"); const process = require('process'); const { execSync } = require('child_process'); -const dataPath = 'web/public/data'; -try{process.chdir('web/..');}catch(ex){} // debug path is 'web/..' +const dataPath = ['web/build/data', 'web/public/data', '../web/public/data'].filter(f => fs.existsSync(f))[0]; function foreachJsonFile(directory, processFunc) { try { @@ -56,19 +54,25 @@ let database = { subcategories: [schemaToLookup(['subcategory', 'category', 'sourcename'])], components: [schemaToLookup(['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url'])], - attributesLut: [], // this is a list of unique attributes; position is used as the attribute index + attributesLut: new Map(), // this is a list of unique attributes; each new entry gets a new index. Using a Map here instead of an object gives 40x processing speedup stock: {} // this is just a temporary lookup to help generate the components table }; // adds the obj to the lut, and returns the index -function updateLut(lut, obj) { - return lut[JSON.stringify(obj)] ??= Object.keys(lut).length; -} +function updateLut(entryMap, entry) { + const entryKey = JSON.stringify(entry); + if (!entryMap.has(entryKey)) { + const index = entryMap.size; + entryMap.set(entryKey, index); + return index; + } + return entryMap.get(entryKey); + } -// Inverts the lut so that the object becomes an array, with the key being the value. +// Inverts the lut so that the Map becomes an array, with the key being the value. // Values must be 0-based, numeric, and contiguous, or everything will be wrong. -function lutToArray(lut) { - return Object.entries(lut).sort((a, b) => a[1] - b[1]).map(x => x[0] ? JSON.parse(x[0]) : null); +function lutToArray(lutMap) { + return Array.from(lutMap.entries()).sort((a, b) => a[1] - b[1]).map(x => x[0]); } function schemaToLookup(arr) { @@ -134,8 +138,8 @@ function writeOutputFile(name, str) { } writeOutputFile(`${dataPath}/subcategories.jsonlines`, database.subcategories.map(d => JSON.stringify(d)).join('\n')); writeOutputFile(`${dataPath}/components.jsonlines`, database.components.map(d => JSON.stringify(d)).join('\n')); -writeOutputFile(`${dataPath}/attributes-lut.jsonlines`, lutToArray(database.attributesLut).map(d => JSON.stringify(d)).join('\n')); +writeOutputFile(`${dataPath}/attributes-lut.jsonlines`, lutToArray(database.attributesLut).join('\n')); execSync(`(cd ${dataPath} && tar -cf all.jsonlines.tar *.jsonlines.gz)`); -console.log(`Reprocessing took ${Math.round((new Date().getTime() - startTime) / 6000) / 10} minutes`); +console.log(`Reprocessing took ${Math.round((new Date().getTime() - startTime) / 1000)} seconds`); diff --git a/web/src/componentTable.js b/web/src/componentTable.js index 084f0f78750..e0017ac5003 100644 --- a/web/src/componentTable.js +++ b/web/src/componentTable.js @@ -718,9 +718,6 @@ class CategoryFilter extends React.Component { } if (results.length > 0) { - let resultLookup = {}; - results.forEach(res => resultLookup[res.componentIdx] = res); - const attributesLut = await unpackLinesAsArray('attributes-lut'); results.forEach(res => { res.attributes = {}; From 1a7899faaf613c2db2f41706bcbd563a65bc778e Mon Sep 17 00:00:00 2001 From: doug Date: Fri, 23 Feb 2024 23:56:41 +1000 Subject: [PATCH 12/13] Move generateJsonlinesDatabaseFiles.js to correct location --- ...JsonlinesDatabaseFiles.js => generateJsonlinesDatabaseFiles.js | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename jlcparts/generateJsonlinesDatabaseFiles.js => generateJsonlinesDatabaseFiles.js (100%) diff --git a/jlcparts/generateJsonlinesDatabaseFiles.js b/generateJsonlinesDatabaseFiles.js similarity index 100% rename from jlcparts/generateJsonlinesDatabaseFiles.js rename to generateJsonlinesDatabaseFiles.js From 6e2bf8b4f91e2aeb9e4ae871ab2230a2a5c503b8 Mon Sep 17 00:00:00 2001 From: doug Date: Sun, 25 Feb 2024 00:40:04 +1000 Subject: [PATCH 13/13] Move all datatable processing into the python script. Don't create json datatables --- .github/workflows/update_components.yaml | 2 - README.md | 1 - generateJsonlinesDatabaseFiles.js | 145 ----------------------- jlcparts/datatables.py | 138 +++++++++++++-------- web/src/componentTable.js | 4 +- 5 files changed, 93 insertions(+), 197 deletions(-) delete mode 100644 generateJsonlinesDatabaseFiles.js diff --git a/.github/workflows/update_components.yaml b/.github/workflows/update_components.yaml index b9361d9a5a1..aad24abd346 100644 --- a/.github/workflows/update_components.yaml +++ b/.github/workflows/update_components.yaml @@ -62,8 +62,6 @@ jobs: rm -f web/build/data/cache.z* zip -s 50m web/build/data/cache.zip cache.sqlite3 - node generateJsonlinesDatabaseFiles.js - - name: Tar artifact # Artifact are case insensitive, this is workaround run: tar -czf web_build.tar.gz web/build/ - name: Upload artifact diff --git a/README.md b/README.md index 3c2159ab683..d3b121a4633 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,6 @@ $ wget https://yaqwsx.github.io/jlcparts/data/cache.zip https://yaqwsx.github.io $ 7z x cache.zip $ mkdir -p web/public/data/ $ jlcparts buildtables --jobs 0 --ignoreoldstock 30 cache.sqlite3 web/public/data -$ node generateJsonlinesDatabaseFiles.js ``` To launch the frontend web server, run: diff --git a/generateJsonlinesDatabaseFiles.js b/generateJsonlinesDatabaseFiles.js deleted file mode 100644 index 5c49e40e810..00000000000 --- a/generateJsonlinesDatabaseFiles.js +++ /dev/null @@ -1,145 +0,0 @@ -/* -This program loads all the category/stock *.json.gz and *.stock.json files and combines them -into three files, whose contents are a single JSON object per line: - - attributes-lut.jsonlines - each line is an attribute, and components will contain a list of attribute indices (the index is the line number) - - subcategories.jsonlines - each line is a subcategory - - components.jsonlines - each line is a component; references attributes and subcategory by their line number - -These files are then packaged into a .tar file, allowing a single file to be downloaded to update the entire database with new components and stock levels. -*/ - -const fs = require('fs'); -const path = require('path'); -const zlib = require("zlib"); -const process = require('process'); -const { execSync } = require('child_process'); - - -const dataPath = ['web/build/data', 'web/public/data', '../web/public/data'].filter(f => fs.existsSync(f))[0]; - -function foreachJsonFile(directory, processFunc) { - try { - // Read the directory - const filenames = fs.readdirSync(directory); - - // Filter .json files - const jsonFiles = filenames.filter(file => /(\.stock\.json$|\.json\.gz$)/.test(file)); - - // Iterate through .json files - for (const file of jsonFiles) { - const filePath = path.join(directory, file); - - // Read and process the JSON file - const getJson = () => { - let data = fs.readFileSync(filePath); - if (/\.gz$/.test(file)) { // decompress if required - data = zlib.gunzipSync(data); - } - - const json = JSON.parse(data); - return json; - }; - - processFunc(file, getJson); - - //break; - } - } catch (error) { - console.error('Error processing JSON files:', error); - } -} - -// this contains the output database table contents -let database = { - subcategories: [schemaToLookup(['subcategory', 'category', 'sourcename'])], - components: [schemaToLookup(['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url'])], - - attributesLut: new Map(), // this is a list of unique attributes; each new entry gets a new index. Using a Map here instead of an object gives 40x processing speedup - stock: {} // this is just a temporary lookup to help generate the components table -}; - -// adds the obj to the lut, and returns the index -function updateLut(entryMap, entry) { - const entryKey = JSON.stringify(entry); - if (!entryMap.has(entryKey)) { - const index = entryMap.size; - entryMap.set(entryKey, index); - return index; - } - return entryMap.get(entryKey); - } - -// Inverts the lut so that the Map becomes an array, with the key being the value. -// Values must be 0-based, numeric, and contiguous, or everything will be wrong. -function lutToArray(lutMap) { - return Array.from(lutMap.entries()).sort((a, b) => a[1] - b[1]).map(x => x[0]); -} - -function schemaToLookup(arr) { - let lut = {}; - arr.forEach((key, i) => lut[key] = i); - return lut; -} - -const startTime = new Date().getTime(); - -// populate the stock lookup -foreachJsonFile(dataPath, (file, getObj) => { - if (file.includes('.stock.json')) { - Object.assign(database.stock, getObj()); - } -}); - -let processedCount = 0; -const totalCount = fs.readdirSync(dataPath).filter(file => /\.json\.gz$/.test(file)).length; - -foreachJsonFile(dataPath, (file, getObj) => { - if (file.includes('.stock.json')) { - return; - } - - const obj = getObj(); - - // subcategories schema: ['subcategory', 'category', 'sourcename'] - database.subcategories.push([obj.subcategory, obj.category, file.split('.')[0]]); - const subcategoryIdx = database.subcategories.length - 1; - - try { - //input schema = ["lcsc", "mfr", "joints", "description","datasheet", "price", "img", "url", "attributes"] - // components schema ['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url'] - const s = schemaToLookup(obj.schema); // input schema - obj.components.forEach(comp => { - let entry = [ - comp[s.lcsc], - comp[s.mfr], - comp[s.description], - Object.entries(comp[s.attributes]).map(attr => updateLut(database.attributesLut, attr)), - database.stock[comp[s.lcsc]], - subcategoryIdx, - comp[s.joints], - comp[s.datasheet], - comp[s.price], - comp[s.img], - comp[s.url] - ]; - database.components.push(entry); - }); - - console.log(`Processed ${++processedCount} / ${totalCount} (${Math.round(processedCount / totalCount * 100)}%)`, file); - } catch (ex) { - console.log(`Failed on ${file}`, ex); - } -}); - -console.log('Writing jsonlines files'); -function writeOutputFile(name, str) { - //fs.writeFileSync(name, str); - fs.writeFileSync(name + '.gz', Buffer.from(zlib.gzipSync(str))); -} -writeOutputFile(`${dataPath}/subcategories.jsonlines`, database.subcategories.map(d => JSON.stringify(d)).join('\n')); -writeOutputFile(`${dataPath}/components.jsonlines`, database.components.map(d => JSON.stringify(d)).join('\n')); -writeOutputFile(`${dataPath}/attributes-lut.jsonlines`, lutToArray(database.attributesLut).join('\n')); - -execSync(`(cd ${dataPath} && tar -cf all.jsonlines.tar *.jsonlines.gz)`); - -console.log(`Reprocessing took ${Math.round((new Date().getTime() - startTime) / 1000)} seconds`); diff --git a/jlcparts/datatables.py b/jlcparts/datatables.py index f3fe8cc57e2..f0673ae9469 100644 --- a/jlcparts/datatables.py +++ b/jlcparts/datatables.py @@ -13,18 +13,23 @@ from jlcparts.common import sha256file from jlcparts import attributes, descriptionAttributes -def saveJson(object, filename, hash=False, pretty=False, compress=False): - openFn = gzip.open if compress else open - with openFn(filename, "wt", encoding="utf-8") as f: - if pretty: - json.dump(object, f, indent=4, sort_keys=True) - else: - json.dump(object, f, separators=(',', ':'), sort_keys=True) - if hash: - with open(filename + ".sha256", "w") as f: - hash = sha256file(filename) - f.write(hash) - return hash +import tarfile + +from time import time + +def saveDatabaseFile(database, outpath, outfilename): + for key, value in database.items(): + filename = os.path.join(outpath, key + ".jsonlines.gz") + with gzip.open(filename, "wt", encoding="utf-8") as f: + for entry in value: + json.dump(entry, f, separators=(',', ':'), sort_keys=False) + f.write("\n") + + with tarfile.open(os.path.join(outpath, outfilename), 'w') as tar: + for key, value in database.items(): + filename = os.path.join(outpath, key + ".jsonlines.gz") + tar.add(filename, arcname=os.path.relpath(filename, start=outpath)) + os.unlink(filename) def weakUpdateParameters(attrs, newParameters): for attr, value in newParameters.items(): @@ -260,6 +265,8 @@ def extractComponent(component, schema): elif schItem == "url": url = component.get("extra", {}).get("url", None) propertyList.append(trimLcscUrl(url, component["lcsc"])) + elif schItem == "stock": + propertyList.append(component["stock"]) elif schItem in component: item = component[schItem] if isinstance(item, str): @@ -273,15 +280,12 @@ def extractComponent(component, schema): def buildDatatable(components): schema = ["lcsc", "mfr", "joints", "description", - "datasheet", "price", "img", "url", "attributes"] + "datasheet", "price", "img", "url", "attributes", "stock"] return { "schema": schema, "components": [extractComponent(x, schema) for x in components] } -def buildStocktable(components): - return {component["lcsc"]: component["stock"] for component in components } - def clearDir(directory): """ Delete everything inside a directory @@ -293,6 +297,28 @@ def clearDir(directory): elif os.path.isdir(file_path): shutil.rmtree(file_path) +def schemaToLookup(schema): + lut = {} + for idx, key in enumerate(schema): + lut[key] = idx + return lut + +def updateLut(lut, item): + key = json.dumps(item, separators=(',', ':'), sort_keys=True) + if not key in lut: + index = len(lut) + lut[key] = index + return index + return lut[key] + +# Inverts the lut so that the Map becomes an array, with the key being the value. +# Values must be 0-based, numeric, and contiguous, or everything will be wrong. +def lutToArray(lutMap): + arr = [None] * len(lutMap) + for key, value in lutMap.items(): + arr[value] = key + return arr + @dataclasses.dataclass class MapCategoryParams: @@ -316,26 +342,10 @@ def _map_category(val: MapCategoryParams): components = lib.getCategoryComponents(val.catName, val.subcatName, stockNewerThan=val.ignoreoldstock) if not components: return None - - filebase = val.catName + val.subcatName - filebase = filebase.replace("&", "and").replace("/", "aka") - filebase = re.sub('[^A-Za-z0-9]', '_', filebase) - - dataTable = buildDatatable(components) + dataTable = buildDatatable(components) dataTable.update({"category": val.catName, "subcategory": val.subcatName}) - dataHash = saveJson(dataTable, os.path.join(val.outdir, f"{filebase}.json.gz"), - hash=True, compress=True) - stockTable = buildStocktable(components) - stockHash = saveJson(stockTable, os.path.join(val.outdir, f"{filebase}.stock.json"), hash=True) - - return { - "catName": val.catName, - "subcatName": val.subcatName, - "sourcename": filebase, - "datahash": dataHash, - "stockhash": stockHash - } + return dataTable @click.command() @click.argument("library", type=click.Path(dir_okay=False)) @@ -348,6 +358,8 @@ def buildtables(library, outdir, ignoreoldstock, jobs): """ Build datatables out of the LIBRARY and save them in OUTDIR """ + t0 = time() + lib = PartLibraryDb(library) Path(outdir).mkdir(parents=True, exist_ok=True) clearDir(outdir) @@ -367,18 +379,50 @@ def buildtables(library, outdir, ignoreoldstock, jobs): for i, result in enumerate(pool.imap_unordered(_map_category, params)): if result is None: continue - catName, subcatName = result["catName"], result["subcatName"] + catName = result["category"] #.lower() + subcatName = result["subcategory"] #.lower() + sourceName = f"{catName}__x__{subcatName}" print(f"{((i) / total * 100):.2f} % {catName}: {subcatName}") - if catName not in categoryIndex: - categoryIndex[catName] = {} - assert subcatName not in categoryIndex[catName] - categoryIndex[catName][subcatName] = { - "sourcename": result["sourcename"], - "datahash": result["datahash"], - "stockhash": result["stockhash"] - } - index = { - "categories": categoryIndex, - "created": datetime.datetime.now().astimezone().replace(microsecond=0).isoformat() + if sourceName not in categoryIndex: + categoryIndex[sourceName] = result + else: + categoryIndex[sourceName]["components"] += result["components"] # combine for categories that are only different because of case + + t1 = time() + # db holds the data we're putting into our database file + db = { + "subcategories": [schemaToLookup(['subcategory', 'category', 'subcategoryIdx'])], + "components": [schemaToLookup(['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url'])], + "attributes-lut": {} } - saveJson(index, os.path.join(outdir, "index.json"), hash=True) + + # fill database + s = None # schema lookup + subcatIndex = 0 + for sourceName, subcatEntry in categoryIndex.items(): + if s is None: + s = schemaToLookup(subcatEntry["schema"]) # all schema will be the same + + subcatIndex += 1 + db["subcategories"] += [[subcatEntry["subcategory"], subcatEntry["category"], subcatIndex]] + + for comp in subcatEntry["components"]: + db["components"] += [[ + comp[s["lcsc"]], + comp[s["mfr"]], + comp[s["description"]], + [updateLut(db["attributes-lut"], [attrName, value]) for attrName,value in comp[s["attributes"]].items()], + comp[s["stock"]], + subcatIndex, + comp[s["joints"]], + comp[s["datasheet"]], + comp[s["price"]], + comp[s["img"]], + comp[s["url"]] + ]] + + # invert the lut + db["attributes-lut"] = [json.loads(str) for str in lutToArray(db["attributes-lut"])] + saveDatabaseFile(db, outdir, "all.jsonlines.tar") + + print(f"Table extraction took {(t1 - t0)}, reformat into one file took {time() - t1}") diff --git a/web/src/componentTable.js b/web/src/componentTable.js index e0017ac5003..bf419d169b2 100644 --- a/web/src/componentTable.js +++ b/web/src/componentTable.js @@ -167,10 +167,10 @@ export class ComponentOverview extends React.Component { let schema = subCats[0]; // first entry is always the schema lookup let cats = subCats.filter((sc, i) => i > 0).map((sc, id) => ({ - id: id + 1, + id: sc[schema.subcategoryIdx], category: sc[schema.category], subcategory: sc[schema.subcategory], - sourcename: sc[schema.sourcename], + sourcename: "", // not needed stockhash: 0, // not needed datahash: 0 // not needed }));