From 1e2fc326cbd794618da74705454c69a736b7b68c Mon Sep 17 00:00:00 2001
From: doug <dougy83@gmail.com>
Date: Sat, 10 Feb 2024 15:44:40 +1000
Subject: [PATCH 01/13] Allow fetch requests to read from a single downloaded
 tar.gz file

---
 web/package-lock.json | 11 +++++++
 web/package.json      |  1 +
 web/src/db.js         | 75 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+)

diff --git a/web/package-lock.json b/web/package-lock.json
index 27d9b581c1f..287e05a4901 100644
--- a/web/package-lock.json
+++ b/web/package-lock.json
@@ -19,6 +19,7 @@
         "@testing-library/user-event": "^7.2.1",
         "dexie": "^3.0.2",
         "immer": "^7.0.8",
+        "js-untar": "^2.0.0",
         "pako": "^2.0.4",
         "react": "^16.13.1",
         "react-copy-to-clipboard": "^5.0.2",
@@ -13865,6 +13866,11 @@
       "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
       "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ=="
     },
+    "node_modules/js-untar": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/js-untar/-/js-untar-2.0.0.tgz",
+      "integrity": "sha512-7CsDLrYQMbLxDt2zl9uKaPZSdmJMvGGQ7wo9hoB3J+z/VcO2w63bXFgHVnjF1+S9wD3zAu8FBVj7EYWjTQ3Z7g=="
+    },
     "node_modules/js-yaml": {
       "version": "3.14.0",
       "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.0.tgz",
@@ -29779,6 +29785,11 @@
       "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
       "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ=="
     },
+    "js-untar": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/js-untar/-/js-untar-2.0.0.tgz",
+      "integrity": "sha512-7CsDLrYQMbLxDt2zl9uKaPZSdmJMvGGQ7wo9hoB3J+z/VcO2w63bXFgHVnjF1+S9wD3zAu8FBVj7EYWjTQ3Z7g=="
+    },
     "js-yaml": {
       "version": "3.14.0",
       "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.0.tgz",
diff --git a/web/package.json b/web/package.json
index c9a6c225360..4b95398f6dc 100644
--- a/web/package.json
+++ b/web/package.json
@@ -14,6 +14,7 @@
     "@testing-library/user-event": "^7.2.1",
     "dexie": "^3.0.2",
     "immer": "^7.0.8",
+    "js-untar": "^2.0.0",
     "pako": "^2.0.4",
     "react": "^16.13.1",
     "react-copy-to-clipboard": "^5.0.2",
diff --git a/web/src/db.js b/web/src/db.js
index 9881bf56e04..b86fff81c3e 100644
--- a/web/src/db.js
+++ b/web/src/db.js
@@ -1,5 +1,6 @@
 import Dexie from 'dexie';
 import * as pako from 'pako';
+import untar from "js-untar";
 
 if (!window.indexedDB) {
     alert("This page requires IndexedDB to work.\n" +
@@ -101,8 +102,76 @@ export async function checkForComponentLibraryUpdate() {
     return updateAvailable;
 }
 
+// contains data from all-data.tar.gz
+let allData = {
+    filesPromise: null,
+    fetch: async function(path, expectJson) {  // returns promise; resolves as data on success, and null on failure
+        return new Promise(async (resolve, reject) => {
+            if (this.filesPromise === null) {
+                this.filesPromise = new Promise(async (resolve, reject) => {
+                    try {
+                        const resp = await fetch(`${SOURCE_PATH}/all-data.tar.gz`);
+                        if (resp.status === 200) {
+                            const compressedData = await resp.arrayBuffer();
+                            const data = pako.ungzip(compressedData);
+                            const files = await untar(data.buffer);                            
+                            const fileData = {};
+                            for (const file of files) {
+                                fileData[`${SOURCE_PATH}/${file.name}`.toLowerCase()] = file.buffer;
+                            }
+                            //console.log('Got all data', fileData);
+                            resolve(fileData);
+                        } else {
+                            //reject('Bad fetch of all-data');
+                            resolve(null);
+                        }
+                    } catch(ex) {
+                        //reject(ex);
+                        resolve(null);
+
+                        console.log('Failed to fetch all-data.tar.gz', ex);
+                    }                
+                });
+            }
+
+            const files = await this.filesPromise;
+
+            if (files) {
+                const fileData = files[path.toLowerCase()];
+                if (fileData){
+                    if (expectJson) {
+                        if (path.slice(-3) === '.gz') {
+                            resolve(JSON.parse(pako.ungzip(fileData, {to: 'string'})));
+                        } else {
+                            const decoder = new TextDecoder();
+                            resolve(JSON.parse(decoder.decode(fileData)));
+                        }
+                    } else {
+                        const decoder = new TextDecoder();
+                        resolve(decoder.decode(fileData));
+                    }
+                } else {
+                    //reject(`${path} not found`);
+                    resolve(null);
+                }
+            } else {
+                //reject('All data not available');
+                resolve(null);
+            }
+        });
+    }
+}; 
+
 // Fetch a JSON. If error occures,
 export async function fetchJson(path, errorIntro) {
+    if (path.indexOf('/index.json') < 0) {
+        // try from all data combined file first
+        const data = await allData.fetch(path, true);
+        if (data) {
+            return data;
+        }
+    }
+
     let response = await fetch(path);
     if (!response.ok) {
         throw Error(errorIntro + response.statusText);
@@ -131,6 +200,12 @@ export async function fetchJson(path, errorIntro) {
 }
 
 async function fetchText(path, errorIntro) {
+    // try from all data combined file first
+    const data = await allData.fetch(path, false);
+    if (data) {
+        return data;
+    }
+
     let response = await fetch(path);
     if (!response.ok) {
         throw Error(errorIntro + response.statusText);

From 415fae89edae2fe46936c6d998d08e3fdef2736d Mon Sep 17 00:00:00 2001
From: doug <dougy83@gmail.com>
Date: Sat, 10 Feb 2024 16:44:07 +1000
Subject: [PATCH 02/13] Add all-data.tar.gz creation to the buildtables
 function

---
 jlcparts/datatables.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/jlcparts/datatables.py b/jlcparts/datatables.py
index f3fe8cc57e2..3fb55210afb 100644
--- a/jlcparts/datatables.py
+++ b/jlcparts/datatables.py
@@ -13,6 +13,9 @@
 from jlcparts.common import sha256file
 from jlcparts import attributes, descriptionAttributes
 
+import tarfile
+import glob
+
 def saveJson(object, filename, hash=False, pretty=False, compress=False):
     openFn = gzip.open if compress else open
     with openFn(filename, "wt", encoding="utf-8") as f:
@@ -25,6 +28,14 @@ def saveJson(object, filename, hash=False, pretty=False, compress=False):
             hash = sha256file(filename)
             f.write(hash)
         return hash
+    
+def saveAllDataArchive(path):
+    patterns = ['*.json', '*.json.gz', '*.sha256']
+
+    with tarfile.open(os.path.join(path, 'all-data.tar.gz'), 'w:gz') as tar:
+        for pattern in patterns:
+            for file in glob.glob(os.path.join(path, pattern)):
+                tar.add(file, arcname=os.path.relpath(file, start=path))
 
 def weakUpdateParameters(attrs, newParameters):
     for attr, value in newParameters.items():
@@ -382,3 +393,4 @@ def buildtables(library, outdir, ignoreoldstock, jobs):
         "created": datetime.datetime.now().astimezone().replace(microsecond=0).isoformat()
     }
     saveJson(index, os.path.join(outdir, "index.json"), hash=True)
+    saveAllDataArchive(outdir)
\ No newline at end of file

From 1fd9ada118ade0b994e638718f61a00970557235 Mon Sep 17 00:00:00 2001
From: doug <dougy83@gmail.com>
Date: Sat, 10 Feb 2024 19:40:37 +1000
Subject: [PATCH 03/13] Split combined data file into two parts

---
 jlcparts/datatables.py | 13 +++++++++----
 web/src/db.js          | 35 +++++++++++++++++++++++------------
 2 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/jlcparts/datatables.py b/jlcparts/datatables.py
index 3fb55210afb..ea7931db174 100644
--- a/jlcparts/datatables.py
+++ b/jlcparts/datatables.py
@@ -15,6 +15,7 @@
 
 import tarfile
 import glob
+import random
 
 def saveJson(object, filename, hash=False, pretty=False, compress=False):
     openFn = gzip.open if compress else open
@@ -32,10 +33,14 @@ def saveJson(object, filename, hash=False, pretty=False, compress=False):
 def saveAllDataArchive(path):
     patterns = ['*.json', '*.json.gz', '*.sha256']
 
-    with tarfile.open(os.path.join(path, 'all-data.tar.gz'), 'w:gz') as tar:
-        for pattern in patterns:
-            for file in glob.glob(os.path.join(path, pattern)):
-                tar.add(file, arcname=os.path.relpath(file, start=path))
+    with tarfile.open(os.path.join(path, 'all-data-1.tar.gz'), 'w:gz') as tar1:
+        with tarfile.open(os.path.join(path, 'all-data-2.tar.gz'), 'w:gz') as tar2:
+            for pattern in patterns:
+                for file in glob.glob(os.path.join(path, pattern)):
+                    if random.randint(1, 2) == 1:
+                        tar1.add(file, arcname=os.path.relpath(file, start=path))
+                    else:
+                        tar2.add(file, arcname=os.path.relpath(file, start=path))
 
 def weakUpdateParameters(attrs, newParameters):
     for attr, value in newParameters.items():
diff --git a/web/src/db.js b/web/src/db.js
index b86fff81c3e..889ff9671bb 100644
--- a/web/src/db.js
+++ b/web/src/db.js
@@ -105,24 +105,35 @@ export async function checkForComponentLibraryUpdate() {
 // contains data from all-data.tar.gz
 let allData = {
     filesPromise: null,
+    fetchSingle: async function(url) {  // fetchs a single chunk of the combined files
+        const resp = await fetch(url);
+        if (resp.status === 200) {
+            const compressedData = await resp.arrayBuffer();
+            const data = pako.ungzip(compressedData);
+            const files = await untar(data.buffer);                            
+            const fileData = {};
+            for (const file of files) {
+                fileData[`${SOURCE_PATH}/${file.name}`.toLowerCase()] = file.buffer;
+            }
+            return fileData;
+        } else {
+            return {};  // failed to download/unpack
+        }
+    },
     fetch: async function(path, expectJson) {  // returns promise; resolves as data on success, and null on failure
         return new Promise(async (resolve, reject) => {
             if (this.filesPromise === null) {
                 this.filesPromise = new Promise(async (resolve, reject) => {
                     try {
-                        const resp = await fetch(`${SOURCE_PATH}/all-data.tar.gz`);
-                        if (resp.status === 200) {
-                            const compressedData = await resp.arrayBuffer();
-                            const data = pako.ungzip(compressedData);
-                            const files = await untar(data.buffer);                            
-                            const fileData = {};
-                            for (const file of files) {
-                                fileData[`${SOURCE_PATH}/${file.name}`.toLowerCase()] = file.buffer;
-                            }
-                            //console.log('Got all data', fileData);
-                            resolve(fileData);
+                        const ChunkCount = 2;
+                        let data = {};
+                        for (let i = 0; i < ChunkCount; i++) {
+                            Object.assign(data, await this.fetchSingle(`${SOURCE_PATH}/all-data-${i + 1}.tar.gz`));
+                        }
+
+                        if (Object.keys(data).length > 0) {
+                            resolve(data);
                         } else {
-                            //reject('Bad fetch of all-data');
                             resolve(null);
                         }
                     } catch(ex) {

From 16c4251ad7a5a28f823a8688954ce3e32531f0da Mon Sep 17 00:00:00 2001
From: doug <dougy83@gmail.com>
Date: Sat, 10 Feb 2024 19:52:49 +1000
Subject: [PATCH 04/13] Allow using downloaded chunks of the combined, even if
 some chunks are missing

---
 web/src/db.js | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/web/src/db.js b/web/src/db.js
index 889ff9671bb..08c8d1a4d82 100644
--- a/web/src/db.js
+++ b/web/src/db.js
@@ -106,17 +106,22 @@ export async function checkForComponentLibraryUpdate() {
 let allData = {
     filesPromise: null,
     fetchSingle: async function(url) {  // fetchs a single chunk of the combined files
-        const resp = await fetch(url);
-        if (resp.status === 200) {
-            const compressedData = await resp.arrayBuffer();
-            const data = pako.ungzip(compressedData);
-            const files = await untar(data.buffer);                            
-            const fileData = {};
-            for (const file of files) {
-                fileData[`${SOURCE_PATH}/${file.name}`.toLowerCase()] = file.buffer;
+        try {
+            const resp = await fetch(url);
+            if (resp.status === 200) {
+                const compressedData = await resp.arrayBuffer();
+                const data = pako.ungzip(compressedData);
+                const files = await untar(data.buffer);                            
+                const fileData = {};
+                for (const file of files) {
+                    fileData[`${SOURCE_PATH}/${file.name}`.toLowerCase()] = file.buffer;
+                }
+                return fileData;
+            } else {
+                return {};  // failed to download/unpack
             }
-            return fileData;
-        } else {
+        } catch (ex) {
+            console.log('Failed to fetch all-data.tar.gz', ex);
             return {};  // failed to download/unpack
         }
     },
@@ -137,10 +142,7 @@ let allData = {
                             resolve(null);
                         }
                     } catch(ex) {
-                        //reject(ex);
                         resolve(null);
-
-                        console.log('Failed to fetch all-data.tar.gz', ex);
                     }                
                 });
             }

From 0b2a27ee012fef59e0b4ba51f66962a702bd8bea Mon Sep 17 00:00:00 2001
From: doug <dougy83@gmail.com>
Date: Mon, 19 Feb 2024 18:14:37 +1000
Subject: [PATCH 05/13] Squash database into 3 gz files. Single file download.
 Use gz files for queries.

---
 .gitignore                |   2 +
 web/.gitignore            |   4 ++
 web/processData.js        | 130 ++++++++++++++++++++++++++++++++++++
 web/src/app.js            |  15 +++--
 web/src/componentTable.js | 105 +++++++++++++++++++++++------
 web/src/db.js             | 136 +++++++++++++++++++++++++++++++++++++-
 web/src/history.js        |  20 ++++--
 7 files changed, 379 insertions(+), 33 deletions(-)
 create mode 100644 web/processData.js

diff --git a/.gitignore b/.gitignore
index e2ab415f904..de9138b67b2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,8 @@
 .idea
 *.zip
 *.z*
+*.tar
+.vscode/
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/web/.gitignore b/web/.gitignore
index c1fe7dfdf2e..661b7cea7b9 100644
--- a/web/.gitignore
+++ b/web/.gitignore
@@ -5,6 +5,10 @@
 /.pnp
 .pnp.js
 
+*.json
+*.gz
+*.jsonlines
+
 # testing
 /coverage
 
diff --git a/web/processData.js b/web/processData.js
new file mode 100644
index 00000000000..319c19eb0d0
--- /dev/null
+++ b/web/processData.js
@@ -0,0 +1,130 @@
+
+const fs = require('fs');
+const path = require('path');
+const zlib = require("zlib"); 
+const process = require('process');
+const { execSync } = require('child_process');
+
+const directoryPath = 'public/data';
+
+try{process.chdir('web');}catch(ex){}   // debug path is 'web/..'
+
+function foreachJsonFile(directory, processFunc) {
+    try {
+        // Read the directory
+        const filenames = fs.readdirSync(directory);
+
+        // Filter .json files
+        const jsonFiles = filenames.filter(file => /(\.stock\.json$|\.json\.gz$)/.test(file));
+
+        // Iterate through .json files
+        for (const file of jsonFiles) {
+            const filePath = path.join(directory, file);
+
+            // Read and process the JSON file
+            const getJson = () => {
+                let data = fs.readFileSync(filePath);
+                if (/\.gz$/.test(file)) {   // decompress if required
+                    data = zlib.gunzipSync(data);
+                }
+
+                const json = JSON.parse(data);
+                return json;
+            };
+
+            processFunc(file, getJson);
+
+            //break;
+        }
+    } catch (error) {
+        console.error('Error processing JSON files:', error);
+    }
+}
+
+// Call the function
+let database = {
+    subcategories: [schemaToLookup(['subcategory', 'category', 'sourcename'])],
+    components: [schemaToLookup(['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url'])],
+
+    attributesLut: [],  // this is a list of unique attributes; position is used as the attribute index
+    stock: {}   // this is just a temporary lookup to help generate the components table
+};
+
+// adds the obj to the lut, and returns the index
+function updateLut(lut, obj) {
+    return lut[JSON.stringify(obj)] ??= Object.keys(lut).length;
+}
+
+// inverts the lut so that the object becomes an array, with the key being the value (values must be 0-based, numeric, and contiguous)
+function lutToArray(lut) {
+    return Object.entries(lut).sort((a, b) => a[1] - b[1]).map(x => x[0] ? JSON.parse(x[0]) : null);
+}
+
+function schemaToLookup(arr) {
+    let lut = {};
+    arr.forEach((key, i) => lut[key] = i);
+    return lut;
+}
+
+const startTime = new Date().getTime();
+
+// populate the stock lookup
+foreachJsonFile(directoryPath, (file, getObj) => {
+    if (file.includes('.stock.json')) {
+        Object.assign(database.stock, getObj());
+    }
+});
+
+let processedCount = 0;
+const totalCount = fs.readdirSync(directoryPath).filter(file => /\.json\.gz$/.test(file)).length;
+
+foreachJsonFile(directoryPath, (file, getObj) => {
+    if (file.includes('.stock.json')) {
+        return;
+    }
+
+    const obj = getObj();
+
+    // subcategories schema: ['subcategory', 'category', 'sourcename']
+    database.subcategories.push([obj.subcategory, obj.category, file.split('.')[0]]);
+    const subcategoryIdx = database.subcategories.length - 1;
+    
+    try {
+        //input schema = ["lcsc", "mfr", "joints", "description","datasheet", "price", "img", "url", "attributes"]
+        // components schema ['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url']
+        const s = schemaToLookup(obj.schema);
+        obj.components.forEach(comp => {
+            let entry = [
+                comp[s.lcsc], 
+                comp[s.mfr], 
+                comp[s.description], 
+                Object.entries(comp[s.attributes]).map(attr => updateLut(database.attributesLut, attr)),
+                database.stock[comp[s.lcsc]],
+                subcategoryIdx,
+                comp[s.joints],
+                comp[s.datasheet],
+                comp[s.price],
+                comp[s.img],
+                comp[s.url]
+            ];
+            database.components.push(entry);
+        });
+
+        console.log(`Processed ${++processedCount} / ${totalCount} (${Math.round(processedCount / totalCount * 100)}%)`, file);
+    } catch (ex) {
+        console.log(`Failed on ${file}`, ex);
+    }
+});
+
+console.log('Writing jsonlines files');
+function writeOutputFile(name, str) {
+    fs.writeFileSync(name, str);    
+    fs.writeFileSync(name + '.gz', Buffer.from(zlib.gzipSync(str)));
+}
+writeOutputFile('subcategories.jsonlines', database.subcategories.map(d => JSON.stringify(d)).join('\n'));
+writeOutputFile('components.jsonlines', database.components.map(d => JSON.stringify(d)).join('\n'));
+writeOutputFile('attributes-lut.jsonlines', lutToArray(database.attributesLut).map(d => JSON.stringify(d)).join('\n'));
+
+execSync('tar -cf all.jsonlines.tar *.jsonlines.gz');
+
+console.log(`Processing took ${Math.round((new Date().getTime() - startTime) / 6000) / 10} minutes`);
\ No newline at end of file
diff --git a/web/src/app.js b/web/src/app.js
index aa97aab5c86..4cbdc0cc411 100644
--- a/web/src/app.js
+++ b/web/src/app.js
@@ -6,13 +6,14 @@ import {
   NavLink
 } from "react-router-dom";
 
+
 import { library } from '@fortawesome/fontawesome-svg-core'
 import { fas } from '@fortawesome/free-solid-svg-icons'
 import { far } from '@fortawesome/free-regular-svg-icons'
 import { fab } from '@fortawesome/free-brands-svg-icons'
 
 import './main.css';
-import { updateComponentLibrary, checkForComponentLibraryUpdate, db } from './db'
+import { updateComponentLibrary, checkForComponentLibraryUpdate, db, unpackLinesAsArray, haveComponents } from './db'
 import { ComponentOverview } from './componentTable'
 import { History } from './history'
 
@@ -79,8 +80,8 @@ class FirstTimeNote extends React.Component {
   }
 
   componentDidMount() {
-    db.components.count().then(x => {
-      this.setState({componentCount: x});
+    unpackLinesAsArray('components').then(components => {
+      this.setState({componentCount: components.length - 1});   // don't count the schema entry
     })
   }
 
@@ -110,9 +111,11 @@ class NewComponentFormatWarning extends React.Component {
   }
 
   componentDidMount() {
-    db.components.toCollection().first().then(x => {
-      if (x !== undefined && typeof x.attributes[Object.keys(x.attributes)[0]] !== 'object')
-        this.setState({newComponentFormat: false});
+    // I don't know if newComponentFormat will work like this
+    unpackLinesAsArray('subcategories').then(cats => {
+        if (cats.size > 1) {
+            this.setState({newComponentFormat: false});
+        }
     });
   }
 
diff --git a/web/src/componentTable.js b/web/src/componentTable.js
index 5de2d158882..084f0f78750 100644
--- a/web/src/componentTable.js
+++ b/web/src/componentTable.js
@@ -1,4 +1,4 @@
-import { db } from "./db";
+import { unpackAndProcessLines, unpackLinesAsArray} from "./db";
 import React from "react";
 import { produce, enableMapSet } from "immer";
 import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'
@@ -161,14 +161,27 @@ export class ComponentOverview extends React.Component {
     }
 
     componentDidMount() {
-        db.categories.toArray().then( categories => {
+        (async () => {
+            // generate categories array
+            let subCats = (await unpackLinesAsArray('subcategories')).map(str => JSON.parse(str));
+
+            let schema = subCats[0];    // first entry is always the schema lookup
+            let cats = subCats.filter((sc, i) => i > 0).map((sc, id) => ({
+                id: id + 1,
+                category: sc[schema.category],
+                subcategory: sc[schema.subcategory],
+                sourcename: sc[schema.sourcename],
+                stockhash: 0,   // not needed
+                datahash: 0     // not needed
+            }));
+
             this.setState({
-                categories: this.prepareCategories(categories),
-                rawCategories: categories
+                categories: this.prepareCategories(cats),
+                rawCategories: cats
             });
-        })
+        })();
     }
-
+    
     prepareCategories(sourceCategories) {
         let categories = {};
         for (const category of sourceCategories) {
@@ -640,32 +653,84 @@ class CategoryFilter extends React.Component {
     // full-text search
     async components() {
         this.state.abort();
-        let query;
+
+        let categoryFilter = (cat) => true;
+        
         if (this.state.allCategories) {
             if (this.state.searchString.length < 3) { // prevent high ram usage
                 return [];
             }
-            query = db.components;
         }
-        else
-            query = db.components.where("category").anyOf(this.collectActiveCategories());
-
+        else {
+            const catIds = this.collectActiveCategories();
+            const catIdLookup = new Set(catIds);
+            categoryFilter = (catid) => catIdLookup.has(catid);
+        }
+        
+        let results = [];
+        let words = [];
         if (this.state.searchString.length !== 0) {
-            const words = this.state.searchString.split(/\s+/)
+            words = this.state.searchString.split(/\s+/)
                 .filter(x => x.length > 0)
                 .map(x => x.toLocaleLowerCase());
-            if (words.length > 0) {
-                query = query.filter(component => {
-                    const text = componentText(component);
-                    return words.every(word => text.includes(word));
-                });
-            }
         }
 
         let aborted = false;
         this.setState({abort: () => aborted = true});
-        const components = await query.until(() => aborted).toArray();
-        return aborted ? null : components;
+
+        let schema;
+        await unpackAndProcessLines('components', (comp, idx) => {
+            comp = JSON.parse(comp);
+
+            if (idx === 0) {    // first line is always schema lookup
+                schema = comp;
+            } else {
+                if (categoryFilter(comp[schema.subcategoryIdx])) {
+                    let component = {
+                        lcsc: comp[schema.lcsc], 
+                        mfr: comp[schema.mfr], 
+                        description: comp[schema.description],
+                        attrsIdx: comp[schema.attrsIdx],
+                        stock: comp[schema.stock],
+                        category: comp[schema.subcategoryIdx],
+                        componentIdx: idx,
+                        joints: comp[schema.joints],
+                        datasheet: comp[schema.datasheet],
+                        price: comp[schema.price],
+                        img: comp[schema.img],
+                        url: comp[schema.url]
+                    };
+
+                    if (words.length > 0) {
+                        const text = componentText(component);
+                        if(words.every(word => text.includes(word))) {
+                            results.push(component);
+                        }
+                    } else {
+                        results.push(component);
+                    }
+                }
+            }
+        }, () => aborted);
+
+        if (aborted) {
+            return null;
+        }
+
+        if (results.length > 0) {
+            let resultLookup = {};
+            results.forEach(res => resultLookup[res.componentIdx] = res);
+
+            const attributesLut = await unpackLinesAsArray('attributes-lut');
+            results.forEach(res => {
+                res.attributes = {}; 
+                res.attrsIdx.map(idx => JSON.parse(attributesLut[idx])).forEach(entry => {
+                    res.attributes[entry[0]] = entry[1];
+                });
+            });
+        }
+
+        return results;
     }
 
     handleCategoryChange = (category, value) => {
diff --git a/web/src/db.js b/web/src/db.js
index 08c8d1a4d82..0ac34f8db0f 100644
--- a/web/src/db.js
+++ b/web/src/db.js
@@ -12,10 +12,9 @@ return await navigator.storage?.persist?.();
 }
 
 export const db = new Dexie('jlcparts');
-db.version(1).stores({
+db.version(2).stores({
     settings: 'key',
-    components: 'lcsc, category, mfr, *indexWords',
-    categories: 'id++,[category+subcategory], subcategory, category'
+    jsonlines: 'name'
 });
 
 function extractCategoryKey(category) {
@@ -24,10 +23,141 @@ function extractCategoryKey(category) {
 
 const SOURCE_PATH = "data";
 
+let jsonlines = {}; // copy of the database in memory so we only access the database once (doesn't really matter - it would be pretty fast anyway)
+async function getJsonlines() {
+    if (Object.keys(jsonlines).length === 0) {
+        (await db.jsonlines.toArray()).forEach(obj => {
+            jsonlines[obj.name] = obj.compressedData
+        });
+    }
+    return jsonlines;
+}
+
+export async function haveComponents() {
+    await getJsonlines();
+    return jsonlines['components']?.size;   // TODO: check if this should be .length
+}
+
+export async function unpackLinesAsArray(name) {    
+    let arr = [];
+    await unpackAndProcessLines(name, (val, idx) => arr.push(val));
+    return arr;
+}
+
+async function yieldExec() {
+    return new Promise((resolve, reject) => {
+        setTimeout(() => resolve(), 0);
+    });
+}
+
+export async function unpackAndProcessLines(name, callback, checkAbort) {
+    await getJsonlines();
+
+    if (jsonlines[name] === undefined) {
+        return;
+    }
+
+    let time = new Date().getTime();
+    
+    if (!window.DecompressionStream) {
+        console.error("DecompressionStream is not supported in this environment.");
+        return;
+    }
+
+    // Step 1: Create a DecompressionStream for gzip
+    const decompressionStream = new window.DecompressionStream('gzip');
+
+    // Convert the ArrayBuffer to a ReadableStream
+    const inputStream = new ReadableStream({
+        start(controller) {
+            controller.enqueue(jsonlines[name]);
+            controller.close();
+        },
+    });
+
+    // Pipe the input stream through the decompression stream
+    const decompressedStream = inputStream.pipeThrough(decompressionStream);
+
+    // Step 2: Convert the stream into text
+    const textStream = decompressedStream.pipeThrough(new window.TextDecoderStream());
+
+    // Step 3: Create a reader to read the stream line by line
+    const reader = textStream.getReader();
+    let chunk = '';
+    let idx = 0;
+    let lastYield = new Date().getTime();
+
+    try {
+        while (true) {
+            const now = new Date().getTime();
+
+            // Periodically allow UI to do what it needs to, including updating any abort flag.
+            // This does slow down the this function a variable amount (could be <100ms, could be a few seconds) 
+            if (now - lastYield > 300) {
+                await yieldExec();
+                console.log('yielded for ', new Date().getTime() - now, 'ms');
+                lastYield = new Date().getTime();
+
+                if (checkAbort && checkAbort()) {   // check abort flag
+                    break;
+                }
+            }
+            
+
+            const { done, value } = await reader.read();
+            if (done) {
+                // If there's any remaining line, process it as well -- should never happen
+                if (chunk) {
+                    callback(chunk, idx++);
+                }
+                break;
+            }
+
+            // Decode the chunk to a string
+            chunk += value;
+
+            let start = 0;
+            while(true) {
+                let pos = chunk.indexOf('\n', start);
+                if (pos >= 0) {
+                    if (callback(chunk.slice(start, pos), idx++) === 'abort') {
+                        break;  // quit early
+                    }
+                    start = pos + 1;
+                } else {                    
+                    chunk = chunk.slice(start); // dump everything that we've processed
+                    break;  // no more lines in our chunk
+                }
+            }
+        }
+
+        console.log(`Time to gunzip & segment ${name}: ${new Date().getTime() - time}`);
+    } finally {
+        reader.releaseLock();
+    }
+}
+
 // Updates the whole component library, takes a callback for reporting progress:
 // the progress is given as list of tuples (task, [statusMessage, finished])
 export async function updateComponentLibrary(report) {
     await persist();
+
+    // get new db files
+    const resp = await fetch(`${SOURCE_PATH}/all.jsonlines.tar`);
+    if (resp.status === 200) {
+        const data = await resp.arrayBuffer();
+        const files = await untar(data);
+        for (const file of files) {
+            const basename = file.name.split('.')[0];
+            let result = await db.jsonlines.put({name: basename, compressedData: file.buffer});
+            console.log(result);
+
+            // store copy in memory (we can load from indexeddb on startup)
+            jsonlines[basename] = file.buffer;
+        }
+    }
+
+
     report({"Component index": ["fetching", false]})
     let index = await fetchJson(`${SOURCE_PATH}/index.json`,
         "Cannot fetch categories index: ");
diff --git a/web/src/history.js b/web/src/history.js
index 29aea60d93c..29a86b4a69e 100644
--- a/web/src/history.js
+++ b/web/src/history.js
@@ -1,5 +1,5 @@
 import React from 'react';
-import { fetchJson, db } from './db'
+import { fetchJson, unpackAndProcessLines, unpackLinesAsArray } from './db'
 import { Spinbox, InlineSpinbox, ZoomableLazyImage,
          formatAttribute, findCategoryById, getImageUrl,
          restoreLcscUrl } from './componentTable'
@@ -19,8 +19,17 @@ class HistoryItem extends React.Component {
     }
 
     componentDidMount() {
-        db.components.get({lcsc: this.props.lcsc}).then( component => {
-            this.setState({info: component});
+        let schema;
+        unpackAndProcessLines('components', (component, idx) => {
+            component = JSON.parse(component);
+            if (idx === 0) {    // first entry is schema
+                schema = component;
+            } else {
+                if (component[schema.lcsc] === this.props.lcsc) {
+                    this.setState({info: component});
+                    return 'abort'; // done
+                }
+            }
         });
     }
 
@@ -152,7 +161,10 @@ class HistoryTable extends React.Component {
                 log.sort((a, b) => b.day - a.day);
                 this.setState({table: log});
             });
-        db.categories.toArray().then( categories => this.setState({categories}) );
+
+        unpackLinesAsArray('subcategories').then(cats => {
+            this.setState({categories: cats.filter((c,i) => i > 0).map(s => JSON.parse(s))});
+        });
     }
 
     render() {

From 21df05208cebb3e4d8f8b3153d034d7a6b692102 Mon Sep 17 00:00:00 2001
From: doug <dougy83@gmail.com>
Date: Mon, 19 Feb 2024 20:30:23 +1000
Subject: [PATCH 06/13] Generate new db combined data file in github action

---
 .github/workflows/update_components.yaml      |  3 ++
 jlcparts/datatables.py                        | 13 -------
 .../generateJsonlinesDatabaseFiles.js         | 39 ++++++++++++-------
 3 files changed, 28 insertions(+), 27 deletions(-)
 rename web/processData.js => jlcparts/generateJsonlinesDatabaseFiles.js (65%)

diff --git a/.github/workflows/update_components.yaml b/.github/workflows/update_components.yaml
index a4ff063bc1e..b9361d9a5a1 100644
--- a/.github/workflows/update_components.yaml
+++ b/.github/workflows/update_components.yaml
@@ -61,6 +61,9 @@ jobs:
 
           rm -f web/build/data/cache.z*
           zip -s 50m web/build/data/cache.zip cache.sqlite3
+
+          node generateJsonlinesDatabaseFiles.js
+          
       - name: Tar artifact # Artifact are case insensitive, this is workaround
         run: tar -czf web_build.tar.gz web/build/
       - name: Upload artifact
diff --git a/jlcparts/datatables.py b/jlcparts/datatables.py
index ea7931db174..6e52805f30c 100644
--- a/jlcparts/datatables.py
+++ b/jlcparts/datatables.py
@@ -30,18 +30,6 @@ def saveJson(object, filename, hash=False, pretty=False, compress=False):
             f.write(hash)
         return hash
     
-def saveAllDataArchive(path):
-    patterns = ['*.json', '*.json.gz', '*.sha256']
-
-    with tarfile.open(os.path.join(path, 'all-data-1.tar.gz'), 'w:gz') as tar1:
-        with tarfile.open(os.path.join(path, 'all-data-2.tar.gz'), 'w:gz') as tar2:
-            for pattern in patterns:
-                for file in glob.glob(os.path.join(path, pattern)):
-                    if random.randint(1, 2) == 1:
-                        tar1.add(file, arcname=os.path.relpath(file, start=path))
-                    else:
-                        tar2.add(file, arcname=os.path.relpath(file, start=path))
-
 def weakUpdateParameters(attrs, newParameters):
     for attr, value in newParameters.items():
         if attr in attrs and attrs[attr] not in ["", "-"]:
@@ -398,4 +386,3 @@ def buildtables(library, outdir, ignoreoldstock, jobs):
         "created": datetime.datetime.now().astimezone().replace(microsecond=0).isoformat()
     }
     saveJson(index, os.path.join(outdir, "index.json"), hash=True)
-    saveAllDataArchive(outdir)
\ No newline at end of file
diff --git a/web/processData.js b/jlcparts/generateJsonlinesDatabaseFiles.js
similarity index 65%
rename from web/processData.js
rename to jlcparts/generateJsonlinesDatabaseFiles.js
index 319c19eb0d0..df94f5f421d 100644
--- a/web/processData.js
+++ b/jlcparts/generateJsonlinesDatabaseFiles.js
@@ -1,3 +1,13 @@
+/*
+This program loads all the category/stock *.json.gz and *.stock.json files and combines them
+into three files, whose contents are a single JSON object per line:
+    - attributes-lut.jsonlines  - each line is an attribute, and components will contain a list of attribute indices (the index is the line number)
+    - subcategories.jsonlines   - each line is a subcategory
+    - components.jsonlines      - each line is a component; references attributes and subcategory by there line number
+
+These files are then packaged into a .tar file, allowing a single file to be downloaded to update the entire database with new components and stock levels.
+This reprocessing program is a bit slow, and takes of the order of 10 minutes.
+*/
 
 const fs = require('fs');
 const path = require('path');
@@ -5,9 +15,9 @@ const zlib = require("zlib");
 const process = require('process');
 const { execSync } = require('child_process');
 
-const directoryPath = 'public/data';
+const dataPath = 'web/public/data';
 
-try{process.chdir('web');}catch(ex){}   // debug path is 'web/..'
+try{process.chdir('web/..');}catch(ex){}   // debug path is 'web/..'
 
 function foreachJsonFile(directory, processFunc) {
     try {
@@ -41,7 +51,7 @@ function foreachJsonFile(directory, processFunc) {
     }
 }
 
-// Call the function
+// this contains the output database table contents
 let database = {
     subcategories: [schemaToLookup(['subcategory', 'category', 'sourcename'])],
     components: [schemaToLookup(['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url'])],
@@ -55,7 +65,8 @@ function updateLut(lut, obj) {
     return lut[JSON.stringify(obj)] ??= Object.keys(lut).length;
 }
 
-// inverts the lut so that the object becomes an array, with the key being the value (values must be 0-based, numeric, and contiguous)
+// Inverts the lut so that the object becomes an array, with the key being the value.
+// Values must be 0-based, numeric, and contiguous, or everything will be wrong.
 function lutToArray(lut) {
     return Object.entries(lut).sort((a, b) => a[1] - b[1]).map(x => x[0] ? JSON.parse(x[0]) : null);
 }
@@ -69,16 +80,16 @@ function schemaToLookup(arr) {
 const startTime = new Date().getTime();
 
 // populate the stock lookup
-foreachJsonFile(directoryPath, (file, getObj) => {
+foreachJsonFile(dataPath, (file, getObj) => {
     if (file.includes('.stock.json')) {
         Object.assign(database.stock, getObj());
     }
 });
 
 let processedCount = 0;
-const totalCount = fs.readdirSync(directoryPath).filter(file => /\.json\.gz$/.test(file)).length;
+const totalCount = fs.readdirSync(dataPath).filter(file => /\.json\.gz$/.test(file)).length;
 
-foreachJsonFile(directoryPath, (file, getObj) => {
+foreachJsonFile(dataPath, (file, getObj) => {
     if (file.includes('.stock.json')) {
         return;
     }
@@ -92,7 +103,7 @@ foreachJsonFile(directoryPath, (file, getObj) => {
     try {
         //input schema = ["lcsc", "mfr", "joints", "description","datasheet", "price", "img", "url", "attributes"]
         // components schema ['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url']
-        const s = schemaToLookup(obj.schema);
+        const s = schemaToLookup(obj.schema);       // input schema
         obj.components.forEach(comp => {
             let entry = [
                 comp[s.lcsc], 
@@ -118,13 +129,13 @@ foreachJsonFile(directoryPath, (file, getObj) => {
 
 console.log('Writing jsonlines files');
 function writeOutputFile(name, str) {
-    fs.writeFileSync(name, str);    
+    //fs.writeFileSync(name, str);    
     fs.writeFileSync(name + '.gz', Buffer.from(zlib.gzipSync(str)));
 }
-writeOutputFile('subcategories.jsonlines', database.subcategories.map(d => JSON.stringify(d)).join('\n'));
-writeOutputFile('components.jsonlines', database.components.map(d => JSON.stringify(d)).join('\n'));
-writeOutputFile('attributes-lut.jsonlines', lutToArray(database.attributesLut).map(d => JSON.stringify(d)).join('\n'));
+writeOutputFile(`${dataPath}/subcategories.jsonlines`, database.subcategories.map(d => JSON.stringify(d)).join('\n'));
+writeOutputFile(`${dataPath}/components.jsonlines`, database.components.map(d => JSON.stringify(d)).join('\n'));
+writeOutputFile(`${dataPath}/attributes-lut.jsonlines`, lutToArray(database.attributesLut).map(d => JSON.stringify(d)).join('\n'));
 
-execSync('tar -cf all.jsonlines.tar *.jsonlines.gz');
+execSync(`(cd ${dataPath} && tar -cf all.jsonlines.tar *.jsonlines.gz)`);
 
-console.log(`Processing took ${Math.round((new Date().getTime() - startTime) / 6000) / 10} minutes`);
\ No newline at end of file
+console.log(`Reprocessing took ${Math.round((new Date().getTime() - startTime) / 6000) / 10} minutes`);

From d4dacd61f85686fc228076361f9cb69182ffd538 Mon Sep 17 00:00:00 2001
From: doug <dougy83@gmail.com>
Date: Mon, 19 Feb 2024 21:32:20 +1000
Subject: [PATCH 07/13] Update first-time and update-available code for new db.
 Remove old update db code.

---
 web/src/app.js |   6 +-
 web/src/db.js  | 344 ++++++-------------------------------------------
 2 files changed, 44 insertions(+), 306 deletions(-)

diff --git a/web/src/app.js b/web/src/app.js
index 4cbdc0cc411..a22c5b51e26 100644
--- a/web/src/app.js
+++ b/web/src/app.js
@@ -13,7 +13,7 @@ import { far } from '@fortawesome/free-regular-svg-icons'
 import { fab } from '@fortawesome/free-brands-svg-icons'
 
 import './main.css';
-import { updateComponentLibrary, checkForComponentLibraryUpdate, db, unpackLinesAsArray, haveComponents } from './db'
+import { updateComponentLibrary, checkForComponentLibraryUpdate, db, unpackLinesAsArray } from './db'
 import { ComponentOverview } from './componentTable'
 import { History } from './history'
 
@@ -81,7 +81,7 @@ class FirstTimeNote extends React.Component {
 
   componentDidMount() {
     unpackLinesAsArray('components').then(components => {
-      this.setState({componentCount: components.length - 1});   // don't count the schema entry
+      this.setState({componentCount: Math.max(0, components.length - 1)});   // don't count the schema entry
     })
   }
 
@@ -145,7 +145,7 @@ class UpdateBar extends React.Component {
         this.setState({updateAvailable});
       });
       db.settings.get("lastUpdate").then(lastUpdate => {
-        this.setState({lastUpdate});
+        this.setState({lastUpdate: lastUpdate?.value});
       })
     };
 
diff --git a/web/src/db.js b/web/src/db.js
index 0ac34f8db0f..746dc1e99f8 100644
--- a/web/src/db.js
+++ b/web/src/db.js
@@ -4,11 +4,11 @@ import untar from "js-untar";
 
 if (!window.indexedDB) {
     alert("This page requires IndexedDB to work.\n" +
-            "Your browser does not support it. Please upgrade your browser.");
+        "Your browser does not support it. Please upgrade your browser.");
 }
 
 async function persist() {
-return await navigator.storage?.persist?.();
+    return await navigator.storage?.persist?.();
 }
 
 export const db = new Dexie('jlcparts');
@@ -17,11 +17,9 @@ db.version(2).stores({
     jsonlines: 'name'
 });
 
-function extractCategoryKey(category) {
-    return category.id;
-}
 
 const SOURCE_PATH = "data";
+const dbWebPath = `${SOURCE_PATH}/all.jsonlines.tar`;
 
 let jsonlines = {}; // copy of the database in memory so we only access the database once (doesn't really matter - it would be pretty fast anyway)
 async function getJsonlines() {
@@ -33,12 +31,7 @@ async function getJsonlines() {
     return jsonlines;
 }
 
-export async function haveComponents() {
-    await getJsonlines();
-    return jsonlines['components']?.size;   // TODO: check if this should be .length
-}
-
-export async function unpackLinesAsArray(name) {    
+export async function unpackLinesAsArray(name) {
     let arr = [];
     await unpackAndProcessLines(name, (val, idx) => arr.push(val));
     return arr;
@@ -58,7 +51,7 @@ export async function unpackAndProcessLines(name, callback, checkAbort) {
     }
 
     let time = new Date().getTime();
-    
+
     if (!window.DecompressionStream) {
         console.error("DecompressionStream is not supported in this environment.");
         return;
@@ -89,10 +82,10 @@ export async function unpackAndProcessLines(name, callback, checkAbort) {
 
     try {
         while (true) {
-            const now = new Date().getTime();
 
             // Periodically allow UI to do what it needs to, including updating any abort flag.
             // This does slow down the this function a variable amount (could be <100ms, could be a few seconds) 
+            const now = new Date().getTime();
             if (now - lastYield > 300) {
                 await yieldExec();
                 console.log('yielded for ', new Date().getTime() - now, 'ms');
@@ -102,7 +95,7 @@ export async function unpackAndProcessLines(name, callback, checkAbort) {
                     break;
                 }
             }
-            
+
 
             const { done, value } = await reader.read();
             if (done) {
@@ -117,14 +110,14 @@ export async function unpackAndProcessLines(name, callback, checkAbort) {
             chunk += value;
 
             let start = 0;
-            while(true) {
+            while (true) {
                 let pos = chunk.indexOf('\n', start);
                 if (pos >= 0) {
                     if (callback(chunk.slice(start, pos), idx++) === 'abort') {
                         break;  // quit early
                     }
                     start = pos + 1;
-                } else {                    
+                } else {
                     chunk = chunk.slice(start); // dump everything that we've processed
                     break;  // no more lines in our chunk
                 }
@@ -142,179 +135,62 @@ export async function unpackAndProcessLines(name, callback, checkAbort) {
 export async function updateComponentLibrary(report) {
     await persist();
 
+    let progress = {};
+    let updateProgress = (name, status) => {
+        progress[name] = status;
+        report(progress);
+    };
+
     // get new db files
-    const resp = await fetch(`${SOURCE_PATH}/all.jsonlines.tar`);
+    const downloadingTitle = `Downloading ${dbWebPath}`;
+    updateProgress(downloadingTitle, ["In progress", false]);
+    const resp = await fetch(dbWebPath);
     if (resp.status === 200) {
         const data = await resp.arrayBuffer();
+        updateProgress(downloadingTitle, ["OK", false]);
+
+        const untarTitle = `Updating database`;
+        updateProgress(untarTitle, ["In progress", false]);
+
         const files = await untar(data);
         for (const file of files) {
             const basename = file.name.split('.')[0];
-            let result = await db.jsonlines.put({name: basename, compressedData: file.buffer});
+            let result = await db.jsonlines.put({ name: basename, compressedData: file.buffer });
             console.log(result);
 
             // store copy in memory (we can load from indexeddb on startup)
             jsonlines[basename] = file.buffer;
         }
-    }
 
+        updateProgress(untarTitle, ["OK", true]);
 
-    report({"Component index": ["fetching", false]})
-    let index = await fetchJson(`${SOURCE_PATH}/index.json`,
-        "Cannot fetch categories index: ");
-    let progress = {}
-    let updateProgress = (name, status) => {
-        progress[name] = status;
-        report(progress);
+        db.settings.put({
+            key: "lastUpdate",
+            value: resp.headers.get('Last-Modified') || new Date().toUTCString()
+        });
+
+    } else {
+        updateProgress(downloadingTitle, ["Download failed", false]);
     }
-    db.settings.put({key: "lastDbUpdate", value: index.created})
-    await updateCategories(index.categories,
-        // onNew
-        async (cName, sName, attr) => {
-            let name = cName + ": " + sName;
-            updateProgress(name, ["Adding components 1/2", false]);
-            let category = await addCategory(cName, sName, attr);
-            updateProgress(name, ["Updating stock 2/2", false]);
-            await updateStock(category);
-            updateProgress(name, ["Added", true]);
-            return category;
-        },
-        // onUpdateExisting
-        async (category, attr) => {
-            let cName = category.category;
-            let sName = category.subcategory;
-            let name = cName + ": " + sName;
-            updateProgress(name, ["Updating components 1/2", false]);
-            await deleteCategory(category);
-            let newCategory = await addCategory(cName, sName, attr);
-            updateProgress(name, ["Updating stock 2/2", false]);
-            await updateStock(newCategory);
-            updateProgress(name, ["Update finished", true]);
-            return newCategory;
-        },
-        // onUpdateStock
-        async (category, _) => {
-            let cName = category.category;
-            let sName = category.subcategory;
-            let name = cName + ": " + sName;
-            updateProgress(name, ["Updating stock 1/1", false]);
-            await updateStock(category);
-            updateProgress(name, ["Stock updated", true]);
-            return category;
-        },
-        // onExcessive
-        async category => {
-            let cName = category.category;
-            let sName = category.subcategory;
-            let name = cName + ": " + sName;
-            updateProgress(name, ["Removing category", false]);
-            await deleteCategory(category);
-            updateProgress(name, ["Removed", true]);
-        }
-    );
 }
 
 // Check if the component library can be updated
 export async function checkForComponentLibraryUpdate() {
-    let index = await fetchJson(`${SOURCE_PATH}/index.json`,
-        "Cannot fetch categories index: ");
-    let updateAvailable = false;
-    let onUpdate = (category) => { updateAvailable = true; return category; }
-    await updateCategories(index.categories,
-        // onNew
-        onUpdate,
-        // onUpdateExisting
-        onUpdate,
-        // onUpdateStock
-        onUpdate,
-        // onExcessive
-        onUpdate
-    );
-    return updateAvailable;
-}
+    let lastUpdate = (await db.settings.get("lastUpdate"))?.value || new Date(0).toUTCString();
 
-// contains data from all-data.tar.gz
-let allData = {
-    filesPromise: null,
-    fetchSingle: async function(url) {  // fetchs a single chunk of the combined files
-        try {
-            const resp = await fetch(url);
-            if (resp.status === 200) {
-                const compressedData = await resp.arrayBuffer();
-                const data = pako.ungzip(compressedData);
-                const files = await untar(data.buffer);                            
-                const fileData = {};
-                for (const file of files) {
-                    fileData[`${SOURCE_PATH}/${file.name}`.toLowerCase()] = file.buffer;
-                }
-                return fileData;
-            } else {
-                return {};  // failed to download/unpack
-            }
-        } catch (ex) {
-            console.log('Failed to fetch all-data.tar.gz', ex);
-            return {};  // failed to download/unpack
+    let head = await fetch(dbWebPath, {
+        method: 'HEAD',
+        headers: {
+            'If-Modified-Since': lastUpdate
         }
-    },
-    fetch: async function(path, expectJson) {  // returns promise; resolves as data on success, and null on failure
-        return new Promise(async (resolve, reject) => {
-            if (this.filesPromise === null) {
-                this.filesPromise = new Promise(async (resolve, reject) => {
-                    try {
-                        const ChunkCount = 2;
-                        let data = {};
-                        for (let i = 0; i < ChunkCount; i++) {
-                            Object.assign(data, await this.fetchSingle(`${SOURCE_PATH}/all-data-${i + 1}.tar.gz`));
-                        }
-
-                        if (Object.keys(data).length > 0) {
-                            resolve(data);
-                        } else {
-                            resolve(null);
-                        }
-                    } catch(ex) {
-                        resolve(null);
-                    }                
-                });
-            }
+    });
 
-            const files = await this.filesPromise;
-
-            if (files) {
-                const fileData = files[path.toLowerCase()];
-                if (fileData){
-                    if (expectJson) {
-                        if (path.slice(-3) === '.gz') {
-                            resolve(JSON.parse(pako.ungzip(fileData, {to: 'string'})));
-                        } else {
-                            const decoder = new TextDecoder();
-                            resolve(JSON.parse(decoder.decode(fileData)));
-                        }
-                    } else {
-                        const decoder = new TextDecoder();
-                        resolve(decoder.decode(fileData));
-                    }
-                } else {
-                    //reject(`${path} not found`);
-                    resolve(null);
-                }
-            } else {
-                //reject('All data not available');
-                resolve(null);
-            }
-        });
-    }
-}; 
+    let updateAvailable = head.status === 200;   // 304 if not modified; any error means we don't know if there's an update
+    return updateAvailable;
+}
 
 // Fetch a JSON. If error occures,
 export async function fetchJson(path, errorIntro) {
-    if (path.indexOf('/index.json') < 0) {
-        // try from all data combined file first
-        const data = await allData.fetch(path, true);
-        if (data) {
-            return data;
-        }
-    }
-
     let response = await fetch(path);
     if (!response.ok) {
         throw Error(errorIntro + response.statusText);
@@ -341,141 +217,3 @@ export async function fetchJson(path, errorIntro) {
 
     throw Error(errorIntro + `Response is not a (compressed) JSON, but ${contentType}: ` + path);
 }
-
-async function fetchText(path, errorIntro) {
-    // try from all data combined file first
-    const data = await allData.fetch(path, false);
-    if (data) {
-        return data;
-    }
-
-    let response = await fetch(path);
-    if (!response.ok) {
-        throw Error(errorIntro + response.statusText);
-    }
-    return await response.text();
-}
-
-// Update categories. Fetched categoryIndex and 3 callback are supplied to
-// perform the update.
-async function updateCategories(categoryIndex, onNew, onUpdateExisting, onUpdateStock, onExcessive) {
-    let updates = [];
-    let usedCategories = new Set();
-    for (const [categoryName, subcategories] of Object.entries(categoryIndex)) {
-        for ( const [subcategoryName, attributes] of Object.entries(subcategories)) {
-            let action = db.categories
-                .where({category: categoryName, subcategory: subcategoryName})
-                .first(async category => {
-                    if (category === undefined) {
-                        category = await onNew(categoryName, subcategoryName, attributes);
-                    } else if (attributes.datahash !== category.datahash ||
-                               attributes.sourcename !== category.sourcename)
-                    {
-                        category = await onUpdateExisting(category, attributes);
-                    } else if (attributes.stockhash !== category.stockhash) {
-                        category = await onUpdateStock(category);
-                    }
-
-                    if (category) {
-                        usedCategories.add(extractCategoryKey(category));
-                    }
-                });
-            updates.push(action);
-        }
-    }
-    await Promise.all(updates);
-    await db.categories.each(category => {
-        if (usedCategories.has(extractCategoryKey(category))) {
-            return;
-        }
-        onExcessive(category);
-    });
-}
-
-// Takes an array containing schema and an array of values and turns them into
-// dictionary
-function restoreObject(schema, source) {
-    return schema.reduce((obj, k, i) => {
-        obj[k] = source[i];
-        return obj;
-    }, {});
-}
-
-// Takes a JSON fetched from server and adds them to the database for the
-// corresponding category
-function addComponents(category, components) {
-    let schema = components.schema;
-    let cObjects = components.components.map(src => {
-        let obj = restoreObject(schema, src);
-        obj.category = extractCategoryKey(category);
-        return obj;
-    });
-    return db.components.bulkPut(cObjects);
-}
-
-// Add a single category and fetch all of its components
-async function addCategory(categoryName, subcategoryName, attributes) {
-    let components = await fetchJson(`${SOURCE_PATH}/${attributes.sourcename}.json.gz`,
-        `Cannot fetch components for category ${categoryName}: ${subcategoryName}: `);
-    return db.transaction("rw", db.categories, db.components, async () => {
-        let key = await db.categories.put({
-            category: categoryName,
-            subcategory: subcategoryName,
-            sourcename: attributes.sourcename,
-            datahash: attributes.datahash,
-            stockhash: attributes.stockhash
-        });
-        let category = await db.categories.get(key);
-        await addComponents(category, components);
-        return category;
-    });
-}
-
-// Fetch and update stock
-async function updateStock(category) {
-    let stock = await fetchJson(`${SOURCE_PATH}/${category.sourcename}.stock.json`,
-        `Cannot fetch stock for category ${category.category}: ${category.subcategory}: `);
-    await db.components.where({category: category.id}).modify(component =>{
-        component.stock = stock[component.lcsc];
-    });
-    // await db.transaction("rw", db.components, async () => {
-    //     let actions = [];
-    //     for (const [component, stockVal] of Object.entries(stock)) {
-    //         actions.push(db.components.update(component, {"stock": stockVal }));
-    //     }
-    //     await Promise.all(actions);
-    // });
-    let hash = await fetchText(`${SOURCE_PATH}/${category.sourcename}.stock.json.sha256`,
-        `Cannot fetch stock hash for category ${category.category}: ${category.subcategory}: `);
-    await db.categories.update(extractCategoryKey(category), {stockhash: hash});
-}
-
-// Delete given category and all of its components
-async function deleteCategory(category) {
-    await db.transaction("rw", db.components, db.categories, async () => {
-        await db.components.where({category: extractCategoryKey(category)}).delete();
-        await db.categories.delete(extractCategoryKey(category));
-    });
-}
-
-
-// See https://stackoverflow.com/questions/64114482/aborting-dexie-js-query
-// export function cancellableDexieQuery(includedTables, querierFunction) {
-//     let tx = null;
-//     let cancelled = false;
-//     const promise = db.transaction('r', includedTables, () => {
-//         if (cancelled)
-//             throw new Dexie.AbortError('Query was cancelled');
-//         tx = Dexie.currentTransaction;
-//         return querierFunction();
-//     });
-//     return [
-//         promise,
-//         () => {
-//             cancelled = true; // In case transaction hasn't been started yet.
-//             if (tx)
-//                 tx.abort(); // If started, abort it.
-//             tx = null; // Avoid calling abort twice.
-//         }
-//     ];
-// }
\ No newline at end of file

From 5f1ce99d13b6df3978e8ffe5cd660a49d2099c2c Mon Sep 17 00:00:00 2001
From: doug <dougy83@gmail.com>
Date: Mon, 19 Feb 2024 23:05:10 +1000
Subject: [PATCH 08/13] Reverting inconsequential changes to datatables.py

---
 jlcparts/datatables.py                     | 6 +-----
 jlcparts/generateJsonlinesDatabaseFiles.js | 2 +-
 web/src/db.js                              | 7 ++-----
 3 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/jlcparts/datatables.py b/jlcparts/datatables.py
index 6e52805f30c..f3fe8cc57e2 100644
--- a/jlcparts/datatables.py
+++ b/jlcparts/datatables.py
@@ -13,10 +13,6 @@
 from jlcparts.common import sha256file
 from jlcparts import attributes, descriptionAttributes
 
-import tarfile
-import glob
-import random
-
 def saveJson(object, filename, hash=False, pretty=False, compress=False):
     openFn = gzip.open if compress else open
     with openFn(filename, "wt", encoding="utf-8") as f:
@@ -29,7 +25,7 @@ def saveJson(object, filename, hash=False, pretty=False, compress=False):
             hash = sha256file(filename)
             f.write(hash)
         return hash
-    
+
 def weakUpdateParameters(attrs, newParameters):
     for attr, value in newParameters.items():
         if attr in attrs and attrs[attr] not in ["", "-"]:
diff --git a/jlcparts/generateJsonlinesDatabaseFiles.js b/jlcparts/generateJsonlinesDatabaseFiles.js
index df94f5f421d..b5a522296bc 100644
--- a/jlcparts/generateJsonlinesDatabaseFiles.js
+++ b/jlcparts/generateJsonlinesDatabaseFiles.js
@@ -3,7 +3,7 @@ This program loads all the category/stock *.json.gz and *.stock.json files and c
 into three files, whose contents are a single JSON object per line:
     - attributes-lut.jsonlines  - each line is an attribute, and components will contain a list of attribute indices (the index is the line number)
     - subcategories.jsonlines   - each line is a subcategory
-    - components.jsonlines      - each line is a component; references attributes and subcategory by there line number
+    - components.jsonlines      - each line is a component; references attributes and subcategory by their line number
 
 These files are then packaged into a .tar file, allowing a single file to be downloaded to update the entire database with new components and stock levels.
 This reprocessing program is a bit slow, and takes of the order of 10 minutes.
diff --git a/web/src/db.js b/web/src/db.js
index 746dc1e99f8..85cf7e62caf 100644
--- a/web/src/db.js
+++ b/web/src/db.js
@@ -57,7 +57,6 @@ export async function unpackAndProcessLines(name, callback, checkAbort) {
         return;
     }
 
-    // Step 1: Create a DecompressionStream for gzip
     const decompressionStream = new window.DecompressionStream('gzip');
 
     // Convert the ArrayBuffer to a ReadableStream
@@ -71,11 +70,10 @@ export async function unpackAndProcessLines(name, callback, checkAbort) {
     // Pipe the input stream through the decompression stream
     const decompressedStream = inputStream.pipeThrough(decompressionStream);
 
-    // Step 2: Convert the stream into text
+    // Convert the stream into text
     const textStream = decompressedStream.pipeThrough(new window.TextDecoderStream());
 
-    // Step 3: Create a reader to read the stream line by line
-    const reader = textStream.getReader();
+    const reader = textStream.getReader();  // to read chunks of text from stream
     let chunk = '';
     let idx = 0;
     let lastYield = new Date().getTime();
@@ -106,7 +104,6 @@ export async function unpackAndProcessLines(name, callback, checkAbort) {
                 break;
             }
 
-            // Decode the chunk to a string
             chunk += value;
 
             let start = 0;

From 56f4d38e96446632552364770d2892d192a910f2 Mon Sep 17 00:00:00 2001
From: doug <dougy83@gmail.com>
Date: Tue, 20 Feb 2024 02:07:10 +1000
Subject: [PATCH 09/13] Fix update download finished status

---
 web/src/db.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/src/db.js b/web/src/db.js
index 85cf7e62caf..fe4fa164514 100644
--- a/web/src/db.js
+++ b/web/src/db.js
@@ -144,7 +144,7 @@ export async function updateComponentLibrary(report) {
     const resp = await fetch(dbWebPath);
     if (resp.status === 200) {
         const data = await resp.arrayBuffer();
-        updateProgress(downloadingTitle, ["OK", false]);
+        updateProgress(downloadingTitle, ["OK", true]);
 
         const untarTitle = `Updating database`;
         updateProgress(untarTitle, ["In progress", false]);

From e3127651a4e6eb08dc306458e13def2e93b19ce8 Mon Sep 17 00:00:00 2001
From: doug <dougy83@gmail.com>
Date: Tue, 20 Feb 2024 02:35:56 +1000
Subject: [PATCH 10/13] Update readme.md to include the
 generateJsonlinesDatabaseFiles step

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index d3b121a4633..3c2159ab683 100644
--- a/README.md
+++ b/README.md
@@ -77,6 +77,7 @@ $ wget https://yaqwsx.github.io/jlcparts/data/cache.zip https://yaqwsx.github.io
 $ 7z x cache.zip
 $ mkdir -p web/public/data/
 $ jlcparts buildtables --jobs 0 --ignoreoldstock 30 cache.sqlite3 web/public/data
+$ node generateJsonlinesDatabaseFiles.js
 ```
 
 To launch the frontend web server, run:

From 26fbcbdbf06f31f229d9d54b18881c8bfcfadf78 Mon Sep 17 00:00:00 2001
From: doug <dougy83@gmail.com>
Date: Tue, 20 Feb 2024 23:31:55 +1000
Subject: [PATCH 11/13] Fix slow generateJsonlinesDatabaseFiles.js processing
 (now takes <15seconds)

---
 jlcparts/generateJsonlinesDatabaseFiles.js | 28 ++++++++++++----------
 web/src/componentTable.js                  |  3 ---
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/jlcparts/generateJsonlinesDatabaseFiles.js b/jlcparts/generateJsonlinesDatabaseFiles.js
index b5a522296bc..5c49e40e810 100644
--- a/jlcparts/generateJsonlinesDatabaseFiles.js
+++ b/jlcparts/generateJsonlinesDatabaseFiles.js
@@ -6,7 +6,6 @@ into three files, whose contents are a single JSON object per line:
     - components.jsonlines      - each line is a component; references attributes and subcategory by their line number
 
 These files are then packaged into a .tar file, allowing a single file to be downloaded to update the entire database with new components and stock levels.
-This reprocessing program is a bit slow, and takes of the order of 10 minutes.
 */
 
 const fs = require('fs');
@@ -15,9 +14,8 @@ const zlib = require("zlib");
 const process = require('process');
 const { execSync } = require('child_process');
 
-const dataPath = 'web/public/data';
 
-try{process.chdir('web/..');}catch(ex){}   // debug path is 'web/..'
+const dataPath = ['web/build/data', 'web/public/data', '../web/public/data'].filter(f => fs.existsSync(f))[0];
 
 function foreachJsonFile(directory, processFunc) {
     try {
@@ -56,19 +54,25 @@ let database = {
     subcategories: [schemaToLookup(['subcategory', 'category', 'sourcename'])],
     components: [schemaToLookup(['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url'])],
 
-    attributesLut: [],  // this is a list of unique attributes; position is used as the attribute index
+    attributesLut: new Map(),  // this is a list of unique attributes; each new entry gets a new index. Using a Map here instead of an object gives 40x processing speedup
     stock: {}   // this is just a temporary lookup to help generate the components table
 };
 
 // adds the obj to the lut, and returns the index
-function updateLut(lut, obj) {
-    return lut[JSON.stringify(obj)] ??= Object.keys(lut).length;
-}
+function updateLut(entryMap, entry) {
+    const entryKey = JSON.stringify(entry);
+    if (!entryMap.has(entryKey)) {
+        const index = entryMap.size;
+        entryMap.set(entryKey, index);
+        return index;
+    }
+    return entryMap.get(entryKey);
+  }
 
-// Inverts the lut so that the object becomes an array, with the key being the value.
+// Inverts the lut so that the Map becomes an array, with the key being the value.
 // Values must be 0-based, numeric, and contiguous, or everything will be wrong.
-function lutToArray(lut) {
-    return Object.entries(lut).sort((a, b) => a[1] - b[1]).map(x => x[0] ? JSON.parse(x[0]) : null);
+function lutToArray(lutMap) {
+    return Array.from(lutMap.entries()).sort((a, b) => a[1] - b[1]).map(x => x[0]);
 }
 
 function schemaToLookup(arr) {
@@ -134,8 +138,8 @@ function writeOutputFile(name, str) {
 }
 writeOutputFile(`${dataPath}/subcategories.jsonlines`, database.subcategories.map(d => JSON.stringify(d)).join('\n'));
 writeOutputFile(`${dataPath}/components.jsonlines`, database.components.map(d => JSON.stringify(d)).join('\n'));
-writeOutputFile(`${dataPath}/attributes-lut.jsonlines`, lutToArray(database.attributesLut).map(d => JSON.stringify(d)).join('\n'));
+writeOutputFile(`${dataPath}/attributes-lut.jsonlines`, lutToArray(database.attributesLut).join('\n'));
 
 execSync(`(cd ${dataPath} && tar -cf all.jsonlines.tar *.jsonlines.gz)`);
 
-console.log(`Reprocessing took ${Math.round((new Date().getTime() - startTime) / 6000) / 10} minutes`);
+console.log(`Reprocessing took ${Math.round((new Date().getTime() - startTime) / 1000)} seconds`);
diff --git a/web/src/componentTable.js b/web/src/componentTable.js
index 084f0f78750..e0017ac5003 100644
--- a/web/src/componentTable.js
+++ b/web/src/componentTable.js
@@ -718,9 +718,6 @@ class CategoryFilter extends React.Component {
         }
 
         if (results.length > 0) {
-            let resultLookup = {};
-            results.forEach(res => resultLookup[res.componentIdx] = res);
-
             const attributesLut = await unpackLinesAsArray('attributes-lut');
             results.forEach(res => {
                 res.attributes = {}; 

From 1a7899faaf613c2db2f41706bcbd563a65bc778e Mon Sep 17 00:00:00 2001
From: doug <dougy83@gmail.com>
Date: Fri, 23 Feb 2024 23:56:41 +1000
Subject: [PATCH 12/13] Move generateJsonlinesDatabaseFiles.js to correct
 location

---
 ...JsonlinesDatabaseFiles.js => generateJsonlinesDatabaseFiles.js | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename jlcparts/generateJsonlinesDatabaseFiles.js => generateJsonlinesDatabaseFiles.js (100%)

diff --git a/jlcparts/generateJsonlinesDatabaseFiles.js b/generateJsonlinesDatabaseFiles.js
similarity index 100%
rename from jlcparts/generateJsonlinesDatabaseFiles.js
rename to generateJsonlinesDatabaseFiles.js

From 6e2bf8b4f91e2aeb9e4ae871ab2230a2a5c503b8 Mon Sep 17 00:00:00 2001
From: doug <dougy83@gmail.com>
Date: Sun, 25 Feb 2024 00:40:04 +1000
Subject: [PATCH 13/13] Move all datatable processing into the python script.
 Don't create json datatables

---
 .github/workflows/update_components.yaml |   2 -
 README.md                                |   1 -
 generateJsonlinesDatabaseFiles.js        | 145 -----------------------
 jlcparts/datatables.py                   | 138 +++++++++++++--------
 web/src/componentTable.js                |   4 +-
 5 files changed, 93 insertions(+), 197 deletions(-)
 delete mode 100644 generateJsonlinesDatabaseFiles.js

diff --git a/.github/workflows/update_components.yaml b/.github/workflows/update_components.yaml
index b9361d9a5a1..aad24abd346 100644
--- a/.github/workflows/update_components.yaml
+++ b/.github/workflows/update_components.yaml
@@ -62,8 +62,6 @@ jobs:
           rm -f web/build/data/cache.z*
           zip -s 50m web/build/data/cache.zip cache.sqlite3
 
-          node generateJsonlinesDatabaseFiles.js
-          
       - name: Tar artifact # Artifact are case insensitive, this is workaround
         run: tar -czf web_build.tar.gz web/build/
       - name: Upload artifact
diff --git a/README.md b/README.md
index 3c2159ab683..d3b121a4633 100644
--- a/README.md
+++ b/README.md
@@ -77,7 +77,6 @@ $ wget https://yaqwsx.github.io/jlcparts/data/cache.zip https://yaqwsx.github.io
 $ 7z x cache.zip
 $ mkdir -p web/public/data/
 $ jlcparts buildtables --jobs 0 --ignoreoldstock 30 cache.sqlite3 web/public/data
-$ node generateJsonlinesDatabaseFiles.js
 ```
 
 To launch the frontend web server, run:
diff --git a/generateJsonlinesDatabaseFiles.js b/generateJsonlinesDatabaseFiles.js
deleted file mode 100644
index 5c49e40e810..00000000000
--- a/generateJsonlinesDatabaseFiles.js
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
-This program loads all the category/stock *.json.gz and *.stock.json files and combines them
-into three files, whose contents are a single JSON object per line:
-    - attributes-lut.jsonlines  - each line is an attribute, and components will contain a list of attribute indices (the index is the line number)
-    - subcategories.jsonlines   - each line is a subcategory
-    - components.jsonlines      - each line is a component; references attributes and subcategory by their line number
-
-These files are then packaged into a .tar file, allowing a single file to be downloaded to update the entire database with new components and stock levels.
-*/
-
-const fs = require('fs');
-const path = require('path');
-const zlib = require("zlib"); 
-const process = require('process');
-const { execSync } = require('child_process');
-
-
-const dataPath = ['web/build/data', 'web/public/data', '../web/public/data'].filter(f => fs.existsSync(f))[0];
-
-function foreachJsonFile(directory, processFunc) {
-    try {
-        // Read the directory
-        const filenames = fs.readdirSync(directory);
-
-        // Filter .json files
-        const jsonFiles = filenames.filter(file => /(\.stock\.json$|\.json\.gz$)/.test(file));
-
-        // Iterate through .json files
-        for (const file of jsonFiles) {
-            const filePath = path.join(directory, file);
-
-            // Read and process the JSON file
-            const getJson = () => {
-                let data = fs.readFileSync(filePath);
-                if (/\.gz$/.test(file)) {   // decompress if required
-                    data = zlib.gunzipSync(data);
-                }
-
-                const json = JSON.parse(data);
-                return json;
-            };
-
-            processFunc(file, getJson);
-
-            //break;
-        }
-    } catch (error) {
-        console.error('Error processing JSON files:', error);
-    }
-}
-
-// this contains the output database table contents
-let database = {
-    subcategories: [schemaToLookup(['subcategory', 'category', 'sourcename'])],
-    components: [schemaToLookup(['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url'])],
-
-    attributesLut: new Map(),  // this is a list of unique attributes; each new entry gets a new index. Using a Map here instead of an object gives 40x processing speedup
-    stock: {}   // this is just a temporary lookup to help generate the components table
-};
-
-// adds the obj to the lut, and returns the index
-function updateLut(entryMap, entry) {
-    const entryKey = JSON.stringify(entry);
-    if (!entryMap.has(entryKey)) {
-        const index = entryMap.size;
-        entryMap.set(entryKey, index);
-        return index;
-    }
-    return entryMap.get(entryKey);
-  }
-
-// Inverts the lut so that the Map becomes an array, with the key being the value.
-// Values must be 0-based, numeric, and contiguous, or everything will be wrong.
-function lutToArray(lutMap) {
-    return Array.from(lutMap.entries()).sort((a, b) => a[1] - b[1]).map(x => x[0]);
-}
-
-function schemaToLookup(arr) {
-    let lut = {};
-    arr.forEach((key, i) => lut[key] = i);
-    return lut;
-}
-
-const startTime = new Date().getTime();
-
-// populate the stock lookup
-foreachJsonFile(dataPath, (file, getObj) => {
-    if (file.includes('.stock.json')) {
-        Object.assign(database.stock, getObj());
-    }
-});
-
-let processedCount = 0;
-const totalCount = fs.readdirSync(dataPath).filter(file => /\.json\.gz$/.test(file)).length;
-
-foreachJsonFile(dataPath, (file, getObj) => {
-    if (file.includes('.stock.json')) {
-        return;
-    }
-
-    const obj = getObj();
-
-    // subcategories schema: ['subcategory', 'category', 'sourcename']
-    database.subcategories.push([obj.subcategory, obj.category, file.split('.')[0]]);
-    const subcategoryIdx = database.subcategories.length - 1;
-    
-    try {
-        //input schema = ["lcsc", "mfr", "joints", "description","datasheet", "price", "img", "url", "attributes"]
-        // components schema ['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url']
-        const s = schemaToLookup(obj.schema);       // input schema
-        obj.components.forEach(comp => {
-            let entry = [
-                comp[s.lcsc], 
-                comp[s.mfr], 
-                comp[s.description], 
-                Object.entries(comp[s.attributes]).map(attr => updateLut(database.attributesLut, attr)),
-                database.stock[comp[s.lcsc]],
-                subcategoryIdx,
-                comp[s.joints],
-                comp[s.datasheet],
-                comp[s.price],
-                comp[s.img],
-                comp[s.url]
-            ];
-            database.components.push(entry);
-        });
-
-        console.log(`Processed ${++processedCount} / ${totalCount} (${Math.round(processedCount / totalCount * 100)}%)`, file);
-    } catch (ex) {
-        console.log(`Failed on ${file}`, ex);
-    }
-});
-
-console.log('Writing jsonlines files');
-function writeOutputFile(name, str) {
-    //fs.writeFileSync(name, str);    
-    fs.writeFileSync(name + '.gz', Buffer.from(zlib.gzipSync(str)));
-}
-writeOutputFile(`${dataPath}/subcategories.jsonlines`, database.subcategories.map(d => JSON.stringify(d)).join('\n'));
-writeOutputFile(`${dataPath}/components.jsonlines`, database.components.map(d => JSON.stringify(d)).join('\n'));
-writeOutputFile(`${dataPath}/attributes-lut.jsonlines`, lutToArray(database.attributesLut).join('\n'));
-
-execSync(`(cd ${dataPath} && tar -cf all.jsonlines.tar *.jsonlines.gz)`);
-
-console.log(`Reprocessing took ${Math.round((new Date().getTime() - startTime) / 1000)} seconds`);
diff --git a/jlcparts/datatables.py b/jlcparts/datatables.py
index f3fe8cc57e2..f0673ae9469 100644
--- a/jlcparts/datatables.py
+++ b/jlcparts/datatables.py
@@ -13,18 +13,23 @@
 from jlcparts.common import sha256file
 from jlcparts import attributes, descriptionAttributes
 
-def saveJson(object, filename, hash=False, pretty=False, compress=False):
-    openFn = gzip.open if compress else open
-    with openFn(filename, "wt", encoding="utf-8") as f:
-        if pretty:
-            json.dump(object, f, indent=4, sort_keys=True)
-        else:
-            json.dump(object, f, separators=(',', ':'), sort_keys=True)
-    if hash:
-        with open(filename + ".sha256", "w") as f:
-            hash = sha256file(filename)
-            f.write(hash)
-        return hash
+import tarfile
+
+from time import time
+
+def saveDatabaseFile(database, outpath, outfilename):
+    for key, value in database.items():
+        filename = os.path.join(outpath, key + ".jsonlines.gz")
+        with gzip.open(filename, "wt", encoding="utf-8") as f:
+            for entry in value:
+                json.dump(entry, f, separators=(',', ':'), sort_keys=False)
+                f.write("\n")
+
+    with tarfile.open(os.path.join(outpath, outfilename), 'w') as tar:
+        for key, value in database.items():
+            filename = os.path.join(outpath, key + ".jsonlines.gz")
+            tar.add(filename, arcname=os.path.relpath(filename, start=outpath))
+            os.unlink(filename)
 
 def weakUpdateParameters(attrs, newParameters):
     for attr, value in newParameters.items():
@@ -260,6 +265,8 @@ def extractComponent(component, schema):
             elif schItem == "url":
                 url = component.get("extra", {}).get("url", None)
                 propertyList.append(trimLcscUrl(url, component["lcsc"]))
+            elif schItem == "stock":
+                propertyList.append(component["stock"])
             elif schItem in component:
                 item = component[schItem]
                 if isinstance(item, str):
@@ -273,15 +280,12 @@ def extractComponent(component, schema):
 
 def buildDatatable(components):
     schema = ["lcsc", "mfr", "joints", "description",
-              "datasheet", "price", "img", "url", "attributes"]
+              "datasheet", "price", "img", "url", "attributes", "stock"]
     return {
         "schema": schema,
         "components": [extractComponent(x, schema) for x in components]
     }
 
-def buildStocktable(components):
-    return {component["lcsc"]: component["stock"] for component in components }
-
 def clearDir(directory):
     """
     Delete everything inside a directory
@@ -293,6 +297,28 @@ def clearDir(directory):
         elif os.path.isdir(file_path):
             shutil.rmtree(file_path)
 
+def schemaToLookup(schema):
+    lut = {}
+    for idx, key in enumerate(schema):
+        lut[key] = idx
+    return lut
+
+def updateLut(lut, item):
+    key = json.dumps(item, separators=(',', ':'), sort_keys=True)
+    if not key in lut:
+        index = len(lut)
+        lut[key] = index
+        return index
+    return lut[key]
+
+# Inverts the lut so that the Map becomes an array, with the key being the value.
+# Values must be 0-based, numeric, and contiguous, or everything will be wrong.
+def lutToArray(lutMap):
+    arr = [None] * len(lutMap)
+    for key, value in lutMap.items():
+        arr[value] = key
+    return arr
+
 
 @dataclasses.dataclass
 class MapCategoryParams:
@@ -316,26 +342,10 @@ def _map_category(val: MapCategoryParams):
     components = lib.getCategoryComponents(val.catName, val.subcatName, stockNewerThan=val.ignoreoldstock)
     if not components:
         return None
-
-    filebase = val.catName + val.subcatName
-    filebase = filebase.replace("&", "and").replace("/", "aka")
-    filebase = re.sub('[^A-Za-z0-9]', '_', filebase)
-
-    dataTable = buildDatatable(components)
+    dataTable = buildDatatable(components) 
     dataTable.update({"category": val.catName, "subcategory": val.subcatName})
-    dataHash = saveJson(dataTable, os.path.join(val.outdir, f"{filebase}.json.gz"),
-                        hash=True, compress=True)
 
-    stockTable = buildStocktable(components)
-    stockHash = saveJson(stockTable, os.path.join(val.outdir, f"{filebase}.stock.json"), hash=True)
-
-    return {
-        "catName": val.catName,
-        "subcatName": val.subcatName,
-        "sourcename": filebase,
-        "datahash": dataHash,
-        "stockhash": stockHash
-    }
+    return dataTable
 
 @click.command()
 @click.argument("library", type=click.Path(dir_okay=False))
@@ -348,6 +358,8 @@ def buildtables(library, outdir, ignoreoldstock, jobs):
     """
     Build datatables out of the LIBRARY and save them in OUTDIR
     """
+    t0 = time()
+
     lib = PartLibraryDb(library)
     Path(outdir).mkdir(parents=True, exist_ok=True)
     clearDir(outdir)
@@ -367,18 +379,50 @@ def buildtables(library, outdir, ignoreoldstock, jobs):
         for i, result in enumerate(pool.imap_unordered(_map_category, params)):
             if result is None:
                 continue
-            catName, subcatName = result["catName"], result["subcatName"]
+            catName = result["category"] #.lower()
+            subcatName = result["subcategory"] #.lower()
+            sourceName = f"{catName}__x__{subcatName}"
             print(f"{((i) / total * 100):.2f} % {catName}: {subcatName}")
-            if catName not in categoryIndex:
-                categoryIndex[catName] = {}
-            assert subcatName not in categoryIndex[catName]
-            categoryIndex[catName][subcatName] = {
-                "sourcename": result["sourcename"],
-                "datahash": result["datahash"],
-                "stockhash": result["stockhash"]
-            }
-    index = {
-        "categories": categoryIndex,
-        "created": datetime.datetime.now().astimezone().replace(microsecond=0).isoformat()
+            if sourceName not in categoryIndex:
+                categoryIndex[sourceName] = result
+            else:
+                categoryIndex[sourceName]["components"] += result["components"]    # combine for categories that are only different because of case
+
+    t1 = time()
+    # db holds the data we're putting into our database file
+    db = {
+        "subcategories": [schemaToLookup(['subcategory', 'category', 'subcategoryIdx'])],
+        "components": [schemaToLookup(['lcsc', 'mfr', 'description', 'attrsIdx', 'stock', 'subcategoryIdx', 'joints', 'datasheet', 'price', 'img', 'url'])],
+        "attributes-lut": {}
     }
-    saveJson(index, os.path.join(outdir, "index.json"), hash=True)
+
+    # fill database
+    s = None    # schema lookup
+    subcatIndex = 0
+    for sourceName, subcatEntry in categoryIndex.items():        
+        if s is None:
+            s = schemaToLookup(subcatEntry["schema"])  # all schema will be the same
+
+        subcatIndex += 1
+        db["subcategories"] += [[subcatEntry["subcategory"], subcatEntry["category"], subcatIndex]]
+
+        for comp in subcatEntry["components"]:
+            db["components"] += [[
+                comp[s["lcsc"]],
+                comp[s["mfr"]],
+                comp[s["description"]],
+                [updateLut(db["attributes-lut"], [attrName, value]) for attrName,value in comp[s["attributes"]].items()],
+                comp[s["stock"]],
+                subcatIndex,
+                comp[s["joints"]],
+                comp[s["datasheet"]],
+                comp[s["price"]],
+                comp[s["img"]],
+                comp[s["url"]]
+            ]]
+
+    # invert the lut
+    db["attributes-lut"] = [json.loads(str) for str in lutToArray(db["attributes-lut"])]
+    saveDatabaseFile(db, outdir, "all.jsonlines.tar")
+
+    print(f"Table extraction took {(t1 - t0)}, reformat into one file took {time() - t1}")
diff --git a/web/src/componentTable.js b/web/src/componentTable.js
index e0017ac5003..bf419d169b2 100644
--- a/web/src/componentTable.js
+++ b/web/src/componentTable.js
@@ -167,10 +167,10 @@ export class ComponentOverview extends React.Component {
 
             let schema = subCats[0];    // first entry is always the schema lookup
             let cats = subCats.filter((sc, i) => i > 0).map((sc, id) => ({
-                id: id + 1,
+                id: sc[schema.subcategoryIdx],
                 category: sc[schema.category],
                 subcategory: sc[schema.subcategory],
-                sourcename: sc[schema.sourcename],
+                sourcename: "", // not needed
                 stockhash: 0,   // not needed
                 datahash: 0     // not needed
             }));