From 178226a7fe2e4c07b5caf657876db54bfb20ac9a Mon Sep 17 00:00:00 2001 From: Franciszek Stachura Date: Sun, 29 Dec 2024 21:22:22 +0100 Subject: [PATCH] update: Remove old update script --- update.py | 624 ----------------------------------- utils/index-all-repositories | 5 +- utils/index-repository | 2 +- utils/update-elixir-data | 2 +- 4 files changed, 4 insertions(+), 629 deletions(-) delete mode 100755 update.py diff --git a/update.py b/update.py deleted file mode 100755 index 79cb4dcf..00000000 --- a/update.py +++ /dev/null @@ -1,624 +0,0 @@ -#!/usr/bin/env python3 - -# This file is part of Elixir, a source code cross-referencer. -# -# Copyright (C) 2017--2020 Mikaƫl Bouillot -# Maxime Chretien -# and contributors -# -# Elixir is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Elixir is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with Elixir. If not, see . - -# Throughout, an "idx" is the sequential number associated with a blob. -# This is different from that blob's Git hash. - -from sys import argv -from threading import Thread, Lock, Event, Condition - -import elixir.lib as lib -from elixir.lib import script, scriptLines -import elixir.data as data -from elixir.data import PathList -from find_compatible_dts import FindCompatibleDTS - -verbose = False - -dts_comp_support = int(script('dts-comp')) - -compatibles_parser = FindCompatibleDTS() - -db = data.DB(lib.getDataDir(), readonly=False, shared=True, dtscomp=dts_comp_support) - -# Number of cpu threads (+2 for version indexing) -cpu = 10 -threads_list = [] - -hash_file_lock = Lock() # Lock for db.hash and db.file -blobs_lock = Lock() # Lock for db.blobs -defs_lock = Lock() # Lock for db.defs -refs_lock = Lock() # Lock for db.refs -docs_lock = Lock() # Lock for db.docs -comps_lock = Lock() # Lock for db.comps -comps_docs_lock = Lock() # Lock for db.comps_docs -tag_ready = Condition() # Waiting for new tags - -new_idxes = [] # (new idxes, Event idxes ready, Event defs ready, Event comps ready, Event vers ready) -bindings_idxes = [] # DT bindings documentation files -idx_key_mod = 1000000 -defs_idxes = {} # Idents definitions stored with (idx*idx_key_mod + line) as the key. - -tags_done = False # True if all tags have been added to new_idxes - -# Progress variables [tags, finished threads] -tags_defs = [0, 0] -tags_defs_lock = Lock() -tags_refs = [0, 0] -tags_refs_lock = Lock() -tags_docs = [0, 0] -tags_docs_lock = Lock() -tags_comps = [0, 0] -tags_comps_lock = Lock() -tags_comps_docs = [0, 0] -tags_comps_docs_lock = Lock() - -class UpdateIds(Thread): - def __init__(self, tag_buf): - Thread.__init__(self, name="UpdateIdsElixir") - self.tag_buf = tag_buf - - def run(self): - global new_idxes, tags_done, tag_ready - self.index = 0 - - for tag in self.tag_buf: - - new_idxes.append((self.update_blob_ids(tag), Event(), Event(), Event(), Event())) - - progress('ids: ' + tag.decode() + ': ' + str(len(new_idxes[self.index][0])) + - ' new blobs', self.index+1) - - new_idxes[self.index][1].set() # Tell that the tag is ready - - self.index += 1 - - # Wake up waiting threads - with tag_ready: - tag_ready.notify_all() - - tags_done = True - progress('ids: Thread finished', self.index) - - def update_blob_ids(self, tag): - - global hash_file_lock, blobs_lock - - if db.vars.exists('numBlobs'): - idx = db.vars.get('numBlobs') - else: - idx = 0 - - # Get blob hashes and associated file names (without path) - blobs = scriptLines('list-blobs', '-f', tag) - - new_idxes = [] - for blob in blobs: - hash, filename = blob.split(b' ',maxsplit=1) - with blobs_lock: - blob_exist = db.blob.exists(hash) - if not blob_exist: - db.blob.put(hash, idx) - - if not blob_exist: - with hash_file_lock: - db.hash.put(idx, hash) - db.file.put(idx, filename) - - new_idxes.append(idx) - if verbose: - print(f"New blob #{idx} {hash}:{filename}") - idx += 1 - db.vars.put('numBlobs', idx) - return new_idxes - - -class UpdateVersions(Thread): - def __init__(self, tag_buf): - Thread.__init__(self, name="UpdateVersionsElixir") - self.tag_buf = tag_buf - - def run(self): - global new_idxes, tag_ready - - index = 0 - - while index < len(self.tag_buf): - if index >= len(new_idxes): - # Wait for new tags - with tag_ready: - tag_ready.wait() - continue - - tag = self.tag_buf[index] - - new_idxes[index][1].wait() # Make sure the tag is ready - - self.update_versions(tag) - - new_idxes[index][4].set() # Tell that UpdateVersions processed the tag - - progress('vers: ' + tag.decode() + ' done', index+1) - - index += 1 - - progress('vers: Thread finished', index) - - def update_versions(self, tag): - global blobs_lock - - # Get blob hashes and associated file paths - blobs = scriptLines('list-blobs', '-p', tag) - buf = [] - - for blob in blobs: - hash, path = blob.split(b' ', maxsplit=1) - with blobs_lock: - idx = db.blob.get(hash) - buf.append((idx, path)) - - buf = sorted(buf) - obj = PathList() - for idx, path in buf: - obj.append(idx, path) - - # Store DT bindings documentation files to parse them later - if path[:33] == b'Documentation/devicetree/bindings': - bindings_idxes.append(idx) - - if verbose: - print(f"Tag {tag}: adding #{idx} {path}") - db.vers.put(tag, obj, sync=True) - - -class UpdateDefs(Thread): - def __init__(self, start, inc): - Thread.__init__(self, name="UpdateDefsElixir") - self.index = start - self.inc = inc # Equivalent to the number of defs threads - - def run(self): - global new_idxes, tags_done, tag_ready, tags_defs, tags_defs_lock - - while not (tags_done and self.index >= len(new_idxes)): - if self.index >= len(new_idxes): - # Wait for new tags - with tag_ready: - tag_ready.wait() - continue - - new_idxes[self.index][1].wait() # Make sure the tag is ready - - with tags_defs_lock: - tags_defs[0] += 1 - - self.update_definitions(new_idxes[self.index][0]) - - new_idxes[self.index][2].set() # Tell that UpdateDefs processed the tag - - self.index += self.inc - - with tags_defs_lock: - tags_defs[1] += 1 - progress('defs: Thread ' + str(tags_defs[1]) + '/' + str(self.inc) + ' finished', tags_defs[0]) - - - def update_definitions(self, idxes): - global hash_file_lock, defs_lock, tags_defs - - for idx in idxes: - if idx % 1000 == 0: progress('defs: ' + str(idx), tags_defs[0]) - - with hash_file_lock: - hash = db.hash.get(idx) - filename = db.file.get(idx) - - family = lib.getFileFamily(filename) - if family in [None, 'M']: continue - - lines = scriptLines('parse-defs', hash, filename, family) - - with defs_lock: - for l in lines: - ident, type, line = l.split(b' ') - type = type.decode() - line = int(line.decode()) - - defs_idxes[idx*idx_key_mod + line] = ident - - if db.defs.exists(ident): - obj = db.defs.get(ident) - elif lib.isIdent(ident): - obj = data.DefList() - else: - continue - - obj.append(idx, type, line, family) - if verbose: - print(f"def {type} {ident} in #{idx} @ {line}") - db.defs.put(ident, obj) - - -class UpdateRefs(Thread): - def __init__(self, start, inc): - Thread.__init__(self, name="UpdateRefsElixir") - self.index = start - self.inc = inc # Equivalent to the number of refs threads - - def run(self): - global new_idxes, tags_done, tags_refs, tags_refs_lock - - while not (tags_done and self.index >= len(new_idxes)): - if self.index >= len(new_idxes): - # Wait for new tags - with tag_ready: - tag_ready.wait() - continue - - new_idxes[self.index][1].wait() # Make sure the tag is ready - new_idxes[self.index][2].wait() # Make sure UpdateDefs processed the tag - - with tags_refs_lock: - tags_refs[0] += 1 - - self.update_references(new_idxes[self.index][0]) - - self.index += self.inc - - with tags_refs_lock: - tags_refs[1] += 1 - progress('refs: Thread ' + str(tags_refs[1]) + '/' + str(self.inc) + ' finished', tags_refs[0]) - - def update_references(self, idxes): - global hash_file_lock, defs_lock, refs_lock, tags_refs - - for idx in idxes: - if idx % 1000 == 0: progress('refs: ' + str(idx), tags_refs[0]) - - with hash_file_lock: - hash = db.hash.get(idx) - filename = db.file.get(idx) - - family = lib.getFileFamily(filename) - if family == None: continue - - prefix = b'' - # Kconfig values are saved as CONFIG_ - if family == 'K': - prefix = b'CONFIG_' - - tokens = scriptLines('tokenize-file', '-b', hash, family) - even = True - line_num = 1 - idents = {} - with defs_lock: - for tok in tokens: - even = not even - if even: - tok = prefix + tok - - if (db.defs.exists(tok) and - not ( (idx*idx_key_mod + line_num) in defs_idxes and - defs_idxes[idx*idx_key_mod + line_num] == tok ) and - (family != 'M' or tok.startswith(b'CONFIG_'))): - # We only index CONFIG_??? in makefiles - if tok in idents: - idents[tok] += ',' + str(line_num) - else: - idents[tok] = str(line_num) - - else: - line_num += tok.count(b'\1') - - with refs_lock: - for ident, lines in idents.items(): - if db.refs.exists(ident): - obj = db.refs.get(ident) - else: - obj = data.RefList() - - obj.append(idx, lines, family) - if verbose: - print(f"ref: {ident} in #{idx} @ {lines}") - db.refs.put(ident, obj) - - -class UpdateDocs(Thread): - def __init__(self, start, inc): - Thread.__init__(self, name="UpdateDocsElixir") - self.index = start - self.inc = inc # Equivalent to the number of docs threads - - def run(self): - global new_idxes, tags_done, tags_docs, tags_docs_lock - - while not (tags_done and self.index >= len(new_idxes)): - if self.index >= len(new_idxes): - # Wait for new tags - with tag_ready: - tag_ready.wait() - continue - - new_idxes[self.index][1].wait() # Make sure the tag is ready - - with tags_docs_lock: - tags_docs[0] += 1 - - self.update_doc_comments(new_idxes[self.index][0]) - - self.index += self.inc - - with tags_docs_lock: - tags_docs[1] += 1 - progress('docs: Thread ' + str(tags_docs[1]) + '/' + str(self.inc) + ' finished', tags_docs[0]) - - def update_doc_comments(self, idxes): - global hash_file_lock, docs_lock, tags_docs - - for idx in idxes: - if idx % 1000 == 0: progress('docs: ' + str(idx), tags_docs[0]) - - with hash_file_lock: - hash = db.hash.get(idx) - filename = db.file.get(idx) - - family = lib.getFileFamily(filename) - if family in [None, 'M']: continue - - lines = scriptLines('parse-docs', hash, filename) - with docs_lock: - for l in lines: - ident, line = l.split(b' ') - line = int(line.decode()) - - if db.docs.exists(ident): - obj = db.docs.get(ident) - else: - obj = data.RefList() - - obj.append(idx, str(line), family) - if verbose: - print(f"doc: {ident} in #{idx} @ {line}") - db.docs.put(ident, obj) - - -class UpdateComps(Thread): - def __init__(self, start, inc): - Thread.__init__(self, name="UpdateCompsElixir") - self.index = start - self.inc = inc # Equivalent to the number of comps threads - - def run(self): - global new_idxes, tags_done, tags_comps, tags_comps_lock - - while not (tags_done and self.index >= len(new_idxes)): - if self.index >= len(new_idxes): - # Wait for new tags - with tag_ready: - tag_ready.wait() - continue - - new_idxes[self.index][1].wait() # Make sure the tag is ready - - with tags_comps_lock: - tags_comps[0] += 1 - - self.update_compatibles(new_idxes[self.index][0]) - - new_idxes[self.index][3].set() # Tell that UpdateComps processed the tag - - self.index += self.inc - - with tags_comps_lock: - tags_comps[1] += 1 - progress('comps: Thread ' + str(tags_comps[1]) + '/' + str(self.inc) + ' finished', tags_comps[0]) - - def update_compatibles(self, idxes): - global hash_file_lock, comps_lock, tags_comps - - for idx in idxes: - if idx % 1000 == 0: progress('comps: ' + str(idx), tags_comps[0]) - - with hash_file_lock: - hash = db.hash.get(idx) - filename = db.file.get(idx) - - family = lib.getFileFamily(filename) - if family in [None, 'K', 'M']: continue - - lines = compatibles_parser.run(scriptLines('get-blob', hash), family) - comps = {} - for l in lines: - ident, line = l.split(' ') - - if ident in comps: - comps[ident] += ',' + str(line) - else: - comps[ident] = str(line) - - with comps_lock: - for ident, lines in comps.items(): - if db.comps.exists(ident): - obj = db.comps.get(ident) - else: - obj = data.RefList() - - obj.append(idx, lines, family) - if verbose: - print(f"comps: {ident} in #{idx} @ {line}") - db.comps.put(ident, obj) - - -class UpdateCompsDocs(Thread): - def __init__(self, start, inc): - Thread.__init__(self, name="UpdateCompsDocsElixir") - self.index = start - self.inc = inc # Equivalent to the number of comps_docs threads - - def run(self): - global new_idxes, tags_done, tags_comps_docs, tags_comps_docs_lock - - while not (tags_done and self.index >= len(new_idxes)): - if self.index >= len(new_idxes): - # Wait for new tags - with tag_ready: - tag_ready.wait() - continue - - new_idxes[self.index][1].wait() # Make sure the tag is ready - new_idxes[self.index][3].wait() # Make sure UpdateComps processed the tag - new_idxes[self.index][4].wait() # Make sure UpdateVersions processed the tag - - with tags_comps_docs_lock: - tags_comps_docs[0] += 1 - - self.update_compatibles_bindings(new_idxes[self.index][0]) - - self.index += self.inc - - with tags_comps_docs_lock: - tags_comps_docs[1] += 1 - progress('comps_docs: Thread ' + str(tags_comps_docs[1]) + '/' + str(self.inc) + ' finished', tags_comps_docs[0]) - - def update_compatibles_bindings(self, idxes): - global hash_file_lock, comps_lock, comps_docs_lock, tags_comps_docs, bindings_idxes - - for idx in idxes: - if idx % 1000 == 0: progress('comps_docs: ' + str(idx), tags_comps_docs[0]) - - if not idx in bindings_idxes: # Parse only bindings doc files - continue - - with hash_file_lock: - hash = db.hash.get(idx) - - family = 'B' - lines = compatibles_parser.run(scriptLines('get-blob', hash), family) - comps_docs = {} - with comps_lock: - for l in lines: - ident, line = l.split(' ') - - if db.comps.exists(ident): - if ident in comps_docs: - comps_docs[ident] += ',' + str(line) - else: - comps_docs[ident] = str(line) - - with comps_docs_lock: - for ident, lines in comps_docs.items(): - if db.comps_docs.exists(ident): - obj = db.comps_docs.get(ident) - else: - obj = data.RefList() - - obj.append(idx, lines, family) - if verbose: - print(f"comps_docs: {ident} in #{idx} @ {line}") - db.comps_docs.put(ident, obj) - - -def progress(msg, current): - print('{} - {} ({:.1%})'.format(project, msg, current/num_tags)) - - -# Main - -# Check number of threads arg -if len(argv) >= 2 and argv[1].isdigit() : - cpu = int(argv[1]) - - if cpu < 5 : - cpu = 5 - -# Distribute threads among functions using the following rules : -# There are more (or equal) refs threads than others -# There are more (or equal) defs threads than docs or comps threads -# Example : if cpu=6 : defs=1, refs=2, docs=1, comps=1, comps_docs=1 -# if cpu=7 : defs=2, refs=2, docs=1, comps=1, comps_docs=1 -# if cpu=8 : defs=2, refs=3, docs=1, comps=1, comps_docs=1 -# if cpu=11: defs=2, refs=3, docs=2, comps=2, comps_docs=2 -quo, rem = divmod(cpu, 5) -num_th_refs = quo -num_th_defs = quo -num_th_docs = quo - -# If DT bindings support is enabled, use $quo threads for each of the 2 threads -# Otherwise add them to the remaining threads -if dts_comp_support: - num_th_comps = quo - num_th_comps_docs = quo -else : - num_th_comps = 0 - num_th_comps_docs = 0 - rem += 2*quo - -quo, rem = divmod(rem, 2) -num_th_defs += quo -num_th_refs += quo + rem - -tag_buf = [] -for tag in scriptLines('list-tags'): - if not db.vers.exists(tag): - tag_buf.append(tag) - -num_tags = len(tag_buf) -project = lib.currentProject() - -print(project + ' - found ' + str(num_tags) + ' new tags') - -if not num_tags: - exit(0) - -threads_list.append(UpdateIds(tag_buf)) -threads_list.append(UpdateVersions(tag_buf)) - -# Define defs threads -for i in range(num_th_defs): - threads_list.append(UpdateDefs(i, num_th_defs)) -# Define refs threads -for i in range(num_th_refs): - threads_list.append(UpdateRefs(i, num_th_refs)) -# Define docs threads -for i in range(num_th_docs): - threads_list.append(UpdateDocs(i, num_th_docs)) -# Define comps threads -for i in range(num_th_comps): - threads_list.append(UpdateComps(i, num_th_comps)) -# Define comps_docs threads -for i in range(num_th_comps_docs): - threads_list.append(UpdateCompsDocs(i, num_th_comps_docs)) - - -# Start to process tags -threads_list[0].start() - -# Wait until the first tag is ready -with tag_ready: - tag_ready.wait() - -# Start remaining threads -for i in range(1, len(threads_list)): - threads_list[i].start() - -# Make sure all threads finished -for i in range(len(threads_list)): - threads_list[i].join() diff --git a/utils/index-all-repositories b/utils/index-all-repositories index eaf2fafa..81880237 100755 --- a/utils/index-all-repositories +++ b/utils/index-all-repositories @@ -48,8 +48,7 @@ index() { git fetch --tags other2 fi - cd $ELIXIR_INSTALL - ./update.py $ELIXIR_THREADS + python3 -m elixir.update # The above can take so much time on the first run that it's worth running a new update @@ -59,7 +58,7 @@ index() { echo "$project: 2nd indexing pass..." cd $ELIXIR_INSTALL - ./update.py $ELIXIR_THREADS + python3 -m elixir.update } export ELIXIR_INSTALL=$(dirname $(dirname $(readlink -f "$0"))) diff --git a/utils/index-repository b/utils/index-repository index 82314068..fa7169c5 100755 --- a/utils/index-repository +++ b/utils/index-repository @@ -26,4 +26,4 @@ done export LXR_REPO_DIR=$dir/repo export LXR_DATA_DIR=$dir/data -python3 /usr/local/elixir/update.py $ELIXIR_THREADS +PWD=/usr/local/elixir/ python3 -m elixir.update diff --git a/utils/update-elixir-data b/utils/update-elixir-data index de948ec4..e5f7576e 100755 --- a/utils/update-elixir-data +++ b/utils/update-elixir-data @@ -34,5 +34,5 @@ for dir_name in $LXR_PROJ_DIR/*; do git fetch --all --tags cd $ELIXIR_INSTALL - ./update.py $ELIXIR_THREADS + python3 -m elixir.update done