From 178226a7fe2e4c07b5caf657876db54bfb20ac9a Mon Sep 17 00:00:00 2001
From: Franciszek Stachura <fbstachura@gmail.com>
Date: Sun, 29 Dec 2024 21:22:22 +0100
Subject: [PATCH] update: Remove old update script

---
 update.py                    | 624 -----------------------------------
 utils/index-all-repositories |   5 +-
 utils/index-repository       |   2 +-
 utils/update-elixir-data     |   2 +-
 4 files changed, 4 insertions(+), 629 deletions(-)
 delete mode 100755 update.py

diff --git a/update.py b/update.py
deleted file mode 100755
index 79cb4dcf..00000000
--- a/update.py
+++ /dev/null
@@ -1,624 +0,0 @@
-#!/usr/bin/env python3
-
-#  This file is part of Elixir, a source code cross-referencer.
-#
-#  Copyright (C) 2017--2020 Mikaël Bouillot <mikael.bouillot@bootlin.com>
-#                           Maxime Chretien <maxime.chretien@bootlin.com>
-#                           and contributors
-#
-#  Elixir is free software: you can redistribute it and/or modify
-#  it under the terms of the GNU Affero General Public License as published by
-#  the Free Software Foundation, either version 3 of the License, or
-#  (at your option) any later version.
-#
-#  Elixir is distributed in the hope that it will be useful,
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#  GNU Affero General Public License for more details.
-#
-#  You should have received a copy of the GNU Affero General Public License
-#  along with Elixir.  If not, see <http://www.gnu.org/licenses/>.
-
-# Throughout, an "idx" is the sequential number associated with a blob.
-# This is different from that blob's Git hash.
-
-from sys import argv
-from threading import Thread, Lock, Event, Condition
-
-import elixir.lib as lib
-from elixir.lib import script, scriptLines
-import elixir.data as data
-from elixir.data import PathList
-from find_compatible_dts import FindCompatibleDTS
-
-verbose = False
-
-dts_comp_support = int(script('dts-comp'))
-
-compatibles_parser = FindCompatibleDTS()
-
-db = data.DB(lib.getDataDir(), readonly=False, shared=True, dtscomp=dts_comp_support)
-
-# Number of cpu threads (+2 for version indexing)
-cpu = 10
-threads_list = []
-
-hash_file_lock = Lock() # Lock for db.hash and db.file
-blobs_lock = Lock() # Lock for db.blobs
-defs_lock = Lock() # Lock for db.defs
-refs_lock = Lock() # Lock for db.refs
-docs_lock = Lock() # Lock for db.docs
-comps_lock = Lock() # Lock for db.comps
-comps_docs_lock = Lock() # Lock for db.comps_docs
-tag_ready = Condition() # Waiting for new tags
-
-new_idxes = [] # (new idxes, Event idxes ready, Event defs ready, Event comps ready, Event vers ready)
-bindings_idxes = [] # DT bindings documentation files
-idx_key_mod = 1000000
-defs_idxes = {} # Idents definitions stored with (idx*idx_key_mod + line) as the key.
-
-tags_done = False # True if all tags have been added to new_idxes
-
-# Progress variables [tags, finished threads]
-tags_defs = [0, 0]
-tags_defs_lock = Lock()
-tags_refs = [0, 0]
-tags_refs_lock = Lock()
-tags_docs = [0, 0]
-tags_docs_lock = Lock()
-tags_comps = [0, 0]
-tags_comps_lock = Lock()
-tags_comps_docs = [0, 0]
-tags_comps_docs_lock = Lock()
-
-class UpdateIds(Thread):
-    def __init__(self, tag_buf):
-        Thread.__init__(self, name="UpdateIdsElixir")
-        self.tag_buf = tag_buf
-
-    def run(self):
-        global new_idxes, tags_done, tag_ready
-        self.index = 0
-
-        for tag in self.tag_buf:
-
-            new_idxes.append((self.update_blob_ids(tag), Event(), Event(), Event(), Event()))
-
-            progress('ids: ' +  tag.decode() + ': ' + str(len(new_idxes[self.index][0])) +
-                        ' new blobs', self.index+1)
-
-            new_idxes[self.index][1].set() # Tell that the tag is ready
-
-            self.index += 1
-
-            # Wake up waiting threads
-            with tag_ready:
-                tag_ready.notify_all()
-
-        tags_done = True
-        progress('ids: Thread finished', self.index)
-
-    def update_blob_ids(self, tag):
-
-        global hash_file_lock, blobs_lock
-
-        if db.vars.exists('numBlobs'):
-            idx = db.vars.get('numBlobs')
-        else:
-            idx = 0
-
-        # Get blob hashes and associated file names (without path)
-        blobs = scriptLines('list-blobs', '-f', tag)
-
-        new_idxes = []
-        for blob in blobs:
-            hash, filename = blob.split(b' ',maxsplit=1)
-            with blobs_lock:
-                blob_exist = db.blob.exists(hash)
-                if not blob_exist:
-                    db.blob.put(hash, idx)
-
-            if not blob_exist:
-                with hash_file_lock:
-                    db.hash.put(idx, hash)
-                    db.file.put(idx, filename)
-
-                new_idxes.append(idx)
-                if verbose:
-                    print(f"New blob #{idx} {hash}:{filename}")
-                idx += 1
-        db.vars.put('numBlobs', idx)
-        return new_idxes
-
-
-class UpdateVersions(Thread):
-    def __init__(self, tag_buf):
-        Thread.__init__(self, name="UpdateVersionsElixir")
-        self.tag_buf = tag_buf
-
-    def run(self):
-        global new_idxes, tag_ready
-
-        index = 0
-
-        while index < len(self.tag_buf):
-            if index >= len(new_idxes):
-                # Wait for new tags
-                with tag_ready:
-                    tag_ready.wait()
-                continue
-
-            tag = self.tag_buf[index]
-
-            new_idxes[index][1].wait() # Make sure the tag is ready
-
-            self.update_versions(tag)
-
-            new_idxes[index][4].set() # Tell that UpdateVersions processed the tag
-
-            progress('vers: ' + tag.decode() + ' done', index+1)
-
-            index += 1
-
-        progress('vers: Thread finished', index)
-
-    def update_versions(self, tag):
-        global blobs_lock
-
-        # Get blob hashes and associated file paths
-        blobs = scriptLines('list-blobs', '-p', tag)
-        buf = []
-
-        for blob in blobs:
-            hash, path = blob.split(b' ', maxsplit=1)
-            with blobs_lock:
-                idx = db.blob.get(hash)
-            buf.append((idx, path))
-
-        buf = sorted(buf)
-        obj = PathList()
-        for idx, path in buf:
-            obj.append(idx, path)
-
-            # Store DT bindings documentation files to parse them later
-            if path[:33] == b'Documentation/devicetree/bindings':
-                bindings_idxes.append(idx)
-
-            if verbose:
-                print(f"Tag {tag}: adding #{idx} {path}")
-        db.vers.put(tag, obj, sync=True)
-
-
-class UpdateDefs(Thread):
-    def __init__(self, start, inc):
-        Thread.__init__(self, name="UpdateDefsElixir")
-        self.index = start
-        self.inc = inc # Equivalent to the number of defs threads
-
-    def run(self):
-        global new_idxes, tags_done, tag_ready, tags_defs, tags_defs_lock
-
-        while not (tags_done and self.index >= len(new_idxes)):
-            if self.index >= len(new_idxes):
-                # Wait for new tags
-                with tag_ready:
-                    tag_ready.wait()
-                continue
-
-            new_idxes[self.index][1].wait() # Make sure the tag is ready
-
-            with tags_defs_lock:
-                tags_defs[0] += 1
-
-            self.update_definitions(new_idxes[self.index][0])
-
-            new_idxes[self.index][2].set() # Tell that UpdateDefs processed the tag
-
-            self.index += self.inc
-
-        with tags_defs_lock:
-            tags_defs[1] += 1
-            progress('defs: Thread ' + str(tags_defs[1]) + '/' + str(self.inc) + ' finished', tags_defs[0])
-
-
-    def update_definitions(self, idxes):
-        global hash_file_lock, defs_lock, tags_defs
-
-        for idx in idxes:
-            if idx % 1000 == 0: progress('defs: ' + str(idx), tags_defs[0])
-
-            with hash_file_lock:
-                hash = db.hash.get(idx)
-                filename = db.file.get(idx)
-
-            family = lib.getFileFamily(filename)
-            if family in [None, 'M']: continue
-
-            lines = scriptLines('parse-defs', hash, filename, family)
-
-            with defs_lock:
-                for l in lines:
-                    ident, type, line = l.split(b' ')
-                    type = type.decode()
-                    line = int(line.decode())
-
-                    defs_idxes[idx*idx_key_mod + line] = ident
-
-                    if db.defs.exists(ident):
-                        obj = db.defs.get(ident)
-                    elif lib.isIdent(ident):
-                        obj = data.DefList()
-                    else:
-                        continue
-
-                    obj.append(idx, type, line, family)
-                    if verbose:
-                        print(f"def {type} {ident} in #{idx} @ {line}")
-                    db.defs.put(ident, obj)
-
-
-class UpdateRefs(Thread):
-    def __init__(self, start, inc):
-        Thread.__init__(self, name="UpdateRefsElixir")
-        self.index = start
-        self.inc = inc # Equivalent to the number of refs threads
-
-    def run(self):
-        global new_idxes, tags_done, tags_refs, tags_refs_lock
-
-        while not (tags_done and self.index >= len(new_idxes)):
-            if self.index >= len(new_idxes):
-                # Wait for new tags
-                with tag_ready:
-                    tag_ready.wait()
-                continue
-
-            new_idxes[self.index][1].wait() # Make sure the tag is ready
-            new_idxes[self.index][2].wait() # Make sure UpdateDefs processed the tag
-
-            with tags_refs_lock:
-                tags_refs[0] += 1
-
-            self.update_references(new_idxes[self.index][0])
-
-            self.index += self.inc
-
-        with tags_refs_lock:
-            tags_refs[1] += 1
-            progress('refs: Thread ' + str(tags_refs[1]) + '/' + str(self.inc) + ' finished', tags_refs[0])
-
-    def update_references(self, idxes):
-        global hash_file_lock, defs_lock, refs_lock, tags_refs
-
-        for idx in idxes:
-            if idx % 1000 == 0: progress('refs: ' + str(idx), tags_refs[0])
-
-            with hash_file_lock:
-                hash = db.hash.get(idx)
-                filename = db.file.get(idx)
-
-            family = lib.getFileFamily(filename)
-            if family == None: continue
-
-            prefix = b''
-            # Kconfig values are saved as CONFIG_<value>
-            if family == 'K':
-                prefix = b'CONFIG_'
-
-            tokens = scriptLines('tokenize-file', '-b', hash, family)
-            even = True
-            line_num = 1
-            idents = {}
-            with defs_lock:
-                for tok in tokens:
-                    even = not even
-                    if even:
-                        tok = prefix + tok
-
-                        if (db.defs.exists(tok) and
-                            not ( (idx*idx_key_mod + line_num) in defs_idxes and
-                                defs_idxes[idx*idx_key_mod + line_num] == tok ) and
-                            (family != 'M' or tok.startswith(b'CONFIG_'))):
-                            # We only index CONFIG_??? in makefiles
-                            if tok in idents:
-                                idents[tok] += ',' + str(line_num)
-                            else:
-                                idents[tok] = str(line_num)
-
-                    else:
-                        line_num += tok.count(b'\1')
-
-            with refs_lock:
-                for ident, lines in idents.items():
-                    if db.refs.exists(ident):
-                        obj = db.refs.get(ident)
-                    else:
-                        obj = data.RefList()
-
-                    obj.append(idx, lines, family)
-                    if verbose:
-                        print(f"ref: {ident} in #{idx} @ {lines}")
-                    db.refs.put(ident, obj)
-
-
-class UpdateDocs(Thread):
-    def __init__(self, start, inc):
-        Thread.__init__(self, name="UpdateDocsElixir")
-        self.index = start
-        self.inc = inc # Equivalent to the number of docs threads
-
-    def run(self):
-        global new_idxes, tags_done, tags_docs, tags_docs_lock
-
-        while not (tags_done and self.index >= len(new_idxes)):
-            if self.index >= len(new_idxes):
-                # Wait for new tags
-                with tag_ready:
-                    tag_ready.wait()
-                continue
-
-            new_idxes[self.index][1].wait() # Make sure the tag is ready
-
-            with tags_docs_lock:
-                tags_docs[0] += 1
-
-            self.update_doc_comments(new_idxes[self.index][0])
-
-            self.index += self.inc
-
-        with tags_docs_lock:
-            tags_docs[1] += 1
-            progress('docs: Thread ' + str(tags_docs[1]) + '/' + str(self.inc) + ' finished', tags_docs[0])
-
-    def update_doc_comments(self, idxes):
-        global hash_file_lock, docs_lock, tags_docs
-
-        for idx in idxes:
-            if idx % 1000 == 0: progress('docs: ' + str(idx), tags_docs[0])
-
-            with hash_file_lock:
-                hash = db.hash.get(idx)
-                filename = db.file.get(idx)
-
-            family = lib.getFileFamily(filename)
-            if family in [None, 'M']: continue
-
-            lines = scriptLines('parse-docs', hash, filename)
-            with docs_lock:
-                for l in lines:
-                    ident, line = l.split(b' ')
-                    line = int(line.decode())
-
-                    if db.docs.exists(ident):
-                        obj = db.docs.get(ident)
-                    else:
-                        obj = data.RefList()
-
-                    obj.append(idx, str(line), family)
-                    if verbose:
-                        print(f"doc: {ident} in #{idx} @ {line}")
-                    db.docs.put(ident, obj)
-
-
-class UpdateComps(Thread):
-    def __init__(self, start, inc):
-        Thread.__init__(self, name="UpdateCompsElixir")
-        self.index = start
-        self.inc = inc # Equivalent to the number of comps threads
-
-    def run(self):
-        global new_idxes, tags_done, tags_comps, tags_comps_lock
-
-        while not (tags_done and self.index >= len(new_idxes)):
-            if self.index >= len(new_idxes):
-                # Wait for new tags
-                with tag_ready:
-                    tag_ready.wait()
-                continue
-
-            new_idxes[self.index][1].wait() # Make sure the tag is ready
-
-            with tags_comps_lock:
-                tags_comps[0] += 1
-
-            self.update_compatibles(new_idxes[self.index][0])
-
-            new_idxes[self.index][3].set() # Tell that UpdateComps processed the tag
-
-            self.index += self.inc
-
-        with tags_comps_lock:
-            tags_comps[1] += 1
-            progress('comps: Thread ' + str(tags_comps[1]) + '/' + str(self.inc) + ' finished', tags_comps[0])
-
-    def update_compatibles(self, idxes):
-        global hash_file_lock, comps_lock, tags_comps
-
-        for idx in idxes:
-            if idx % 1000 == 0: progress('comps: ' + str(idx), tags_comps[0])
-
-            with hash_file_lock:
-                hash = db.hash.get(idx)
-                filename = db.file.get(idx)
-
-            family = lib.getFileFamily(filename)
-            if family in [None, 'K', 'M']: continue
-
-            lines = compatibles_parser.run(scriptLines('get-blob', hash), family)
-            comps = {}
-            for l in lines:
-                ident, line = l.split(' ')
-
-                if ident in comps:
-                    comps[ident] += ',' + str(line)
-                else:
-                    comps[ident] = str(line)
-
-            with comps_lock:
-                for ident, lines in comps.items():
-                    if db.comps.exists(ident):
-                        obj = db.comps.get(ident)
-                    else:
-                        obj = data.RefList()
-
-                    obj.append(idx, lines, family)
-                    if verbose:
-                        print(f"comps: {ident} in #{idx} @ {line}")
-                    db.comps.put(ident, obj)
-
-
-class UpdateCompsDocs(Thread):
-    def __init__(self, start, inc):
-        Thread.__init__(self, name="UpdateCompsDocsElixir")
-        self.index = start
-        self.inc = inc # Equivalent to the number of comps_docs threads
-
-    def run(self):
-        global new_idxes, tags_done, tags_comps_docs, tags_comps_docs_lock
-
-        while not (tags_done and self.index >= len(new_idxes)):
-            if self.index >= len(new_idxes):
-                # Wait for new tags
-                with tag_ready:
-                    tag_ready.wait()
-                continue
-
-            new_idxes[self.index][1].wait() # Make sure the tag is ready
-            new_idxes[self.index][3].wait() # Make sure UpdateComps processed the tag
-            new_idxes[self.index][4].wait() # Make sure UpdateVersions processed the tag
-
-            with tags_comps_docs_lock:
-                tags_comps_docs[0] += 1
-
-            self.update_compatibles_bindings(new_idxes[self.index][0])
-
-            self.index += self.inc
-
-        with tags_comps_docs_lock:
-            tags_comps_docs[1] += 1
-            progress('comps_docs: Thread ' + str(tags_comps_docs[1]) + '/' + str(self.inc) + ' finished', tags_comps_docs[0])
-
-    def update_compatibles_bindings(self, idxes):
-        global hash_file_lock, comps_lock, comps_docs_lock, tags_comps_docs, bindings_idxes
-
-        for idx in idxes:
-            if idx % 1000 == 0: progress('comps_docs: ' + str(idx), tags_comps_docs[0])
-
-            if not idx in bindings_idxes: # Parse only bindings doc files
-                continue
-
-            with hash_file_lock:
-                hash = db.hash.get(idx)
-
-            family = 'B'
-            lines = compatibles_parser.run(scriptLines('get-blob', hash), family)
-            comps_docs = {}
-            with comps_lock:
-                for l in lines:
-                    ident, line = l.split(' ')
-
-                    if db.comps.exists(ident):
-                        if ident in comps_docs:
-                            comps_docs[ident] += ',' + str(line)
-                        else:
-                            comps_docs[ident] = str(line)
-
-            with comps_docs_lock:
-                for ident, lines in comps_docs.items():
-                    if db.comps_docs.exists(ident):
-                        obj = db.comps_docs.get(ident)
-                    else:
-                        obj = data.RefList()
-
-                    obj.append(idx, lines, family)
-                    if verbose:
-                        print(f"comps_docs: {ident} in #{idx} @ {line}")
-                    db.comps_docs.put(ident, obj)
-
-
-def progress(msg, current):
-    print('{} - {} ({:.1%})'.format(project, msg, current/num_tags))
-
-
-# Main
-
-# Check number of threads arg
-if len(argv) >= 2 and argv[1].isdigit() :
-    cpu = int(argv[1])
-
-    if cpu < 5 :
-        cpu = 5
-
-# Distribute threads among functions using the following rules :
-# There are more (or equal) refs threads than others
-# There are more (or equal) defs threads than docs or comps threads
-# Example : if cpu=6 : defs=1, refs=2, docs=1, comps=1, comps_docs=1
-#           if cpu=7 : defs=2, refs=2, docs=1, comps=1, comps_docs=1
-#           if cpu=8 : defs=2, refs=3, docs=1, comps=1, comps_docs=1
-#           if cpu=11: defs=2, refs=3, docs=2, comps=2, comps_docs=2
-quo, rem = divmod(cpu, 5)
-num_th_refs = quo
-num_th_defs = quo
-num_th_docs = quo
-
-# If DT bindings support is enabled, use $quo threads for each of the 2 threads
-# Otherwise add them to the remaining threads
-if dts_comp_support:
-    num_th_comps = quo
-    num_th_comps_docs = quo
-else :
-    num_th_comps = 0
-    num_th_comps_docs = 0
-    rem += 2*quo
-
-quo, rem = divmod(rem, 2)
-num_th_defs += quo
-num_th_refs += quo + rem
-
-tag_buf = []
-for tag in scriptLines('list-tags'):
-    if not db.vers.exists(tag):
-        tag_buf.append(tag)
-
-num_tags = len(tag_buf)
-project = lib.currentProject()
-
-print(project + ' - found ' + str(num_tags) + ' new tags')
-
-if not num_tags:
-    exit(0)
-
-threads_list.append(UpdateIds(tag_buf))
-threads_list.append(UpdateVersions(tag_buf))
-
-# Define defs threads
-for i in range(num_th_defs):
-    threads_list.append(UpdateDefs(i, num_th_defs))
-# Define refs threads
-for i in range(num_th_refs):
-    threads_list.append(UpdateRefs(i, num_th_refs))
-# Define docs threads
-for i in range(num_th_docs):
-    threads_list.append(UpdateDocs(i, num_th_docs))
-# Define comps threads
-for i in range(num_th_comps):
-    threads_list.append(UpdateComps(i, num_th_comps))
-# Define comps_docs threads
-for i in range(num_th_comps_docs):
-    threads_list.append(UpdateCompsDocs(i, num_th_comps_docs))
-
-
-# Start to process tags
-threads_list[0].start()
-
-# Wait until the first tag is ready
-with tag_ready:
-    tag_ready.wait()
-
-# Start remaining threads
-for i in range(1, len(threads_list)):
-    threads_list[i].start()
-
-# Make sure all threads finished
-for i in range(len(threads_list)):
-    threads_list[i].join()
diff --git a/utils/index-all-repositories b/utils/index-all-repositories
index eaf2fafa..81880237 100755
--- a/utils/index-all-repositories
+++ b/utils/index-all-repositories
@@ -48,8 +48,7 @@ index() {
         git fetch --tags other2
     fi
 
-    cd $ELIXIR_INSTALL
-    ./update.py $ELIXIR_THREADS
+    python3 -m elixir.update
 
     # The above can take so much time on the first run that it's worth running a new update
 
@@ -59,7 +58,7 @@ index() {
 
     echo "$project: 2nd indexing pass..."
     cd $ELIXIR_INSTALL
-    ./update.py $ELIXIR_THREADS
+    python3 -m elixir.update
 }
 
 export ELIXIR_INSTALL=$(dirname $(dirname $(readlink -f "$0")))
diff --git a/utils/index-repository b/utils/index-repository
index 82314068..fa7169c5 100755
--- a/utils/index-repository
+++ b/utils/index-repository
@@ -26,4 +26,4 @@ done
 
 export LXR_REPO_DIR=$dir/repo
 export LXR_DATA_DIR=$dir/data
-python3 /usr/local/elixir/update.py $ELIXIR_THREADS
+PWD=/usr/local/elixir/ python3 -m elixir.update
diff --git a/utils/update-elixir-data b/utils/update-elixir-data
index de948ec4..e5f7576e 100755
--- a/utils/update-elixir-data
+++ b/utils/update-elixir-data
@@ -34,5 +34,5 @@ for dir_name in $LXR_PROJ_DIR/*; do
     git fetch --all --tags
 
     cd $ELIXIR_INSTALL
-    ./update.py $ELIXIR_THREADS
+    python3 -m elixir.update
 done