diff --git a/.gitignore b/.gitignore index 709a08c..de43841 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ __pycache__/ # Distribution / packaging .Python build/ +data/ develop-eggs/ dist/ downloads/ diff --git a/biofilter_modules/biofilter_class.py b/biofilter_modules/biofilter_class.py index 3f72de3..971ef20 100644 --- a/biofilter_modules/biofilter_class.py +++ b/biofilter_modules/biofilter_class.py @@ -104,7 +104,7 @@ class Biofilter( def getVersionTuple(cls): # tuple = (major,minor,revision,dev,build,date) # dev must be in ('a','b','rc','release') for lexicographic comparison - return (2, 4, 4, "release", "", "2024-12-01") + return (3, 0, 1, "release", "", "2025-01-01") @classmethod def getVersionString(cls): diff --git a/loki_modules/__init__.py b/loki_modules/__init__.py index 87d17d0..acdef18 100644 --- a/loki_modules/__init__.py +++ b/loki_modules/__init__.py @@ -3,7 +3,8 @@ sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) -from loki_modules.loki_db import Database +from loki_modules.loki_db import Database # noqa E402 +from loki_modules.loki_build import main # noqa E402 from loki_modules.loki_mixins import ( # noqa E402 Schema, VersionMixin, @@ -13,9 +14,17 @@ DatabaseOperationsMixin, DatabaseLiftOverMixin, DatabaseQueryMixin, + SourceUtilityMethods, + SourceDbOperations, + UpdaterDownloadMixin, + UpdaterDatabaseMixin, + UpdaterLiftOverMixin, + UpdaterOperationsMixin, ) + __all__ = [ + "main", "Database", "loki_db", "loki_source", @@ -30,4 +39,10 @@ "DatabaseOperationsMixin", "DatabaseLiftOverMixin", "DatabaseQueryMixin", + "SourceUtilityMethods", + "SourceDbOperations", + "UpdaterDownloadMixin", + "UpdaterDatabaseMixin", + "UpdaterLiftOverMixin", + "UpdaterOperationsMixin", ] diff --git a/loki_modules/loaders/loki_source_biogrid.py b/loki_modules/loaders/loki_source_biogrid.py index 75c9177..9a19f5d 100644 --- a/loki_modules/loaders/loki_source_biogrid.py +++ b/loki_modules/loaders/loki_source_biogrid.py @@ -6,7 +6,7 @@ class Source_biogrid(loki_source.Source): @classmethod def getVersionString(cls): - return "2.1 (2022-04-13)" + return "3.0.0 (2025-01-01)" def download(self, options, path): # download the latest source files diff --git a/loki_modules/loaders/loki_source_chainfiles.py b/loki_modules/loaders/loki_source_chainfiles.py index 36071c7..17c27f5 100644 --- a/loki_modules/loaders/loki_source_chainfiles.py +++ b/loki_modules/loaders/loki_source_chainfiles.py @@ -26,7 +26,7 @@ class Source_chainfiles(loki_source.Source): @classmethod def getVersionString(cls): - return "2.2 (2014-06-27)" + return "3.0.0 (2025-01-01)" def download(self, options, path): # define callback to search for all available hgX liftover chain files diff --git a/loki_modules/loaders/loki_source_dbsnp.py b/loki_modules/loaders/loki_source_dbsnp.py index af9c4e0..38c52d6 100644 --- a/loki_modules/loaders/loki_source_dbsnp.py +++ b/loki_modules/loaders/loki_source_dbsnp.py @@ -52,7 +52,7 @@ def _identifyLatestSNPContig(self, filenames): @classmethod def getVersionString(cls): - return "2.3 (2018-11-01)" + return "3.0.0 (2025-01-01)" @classmethod def getOptions(cls): @@ -132,7 +132,7 @@ def remFilesCallback(ftp, path): string = urlpath.read().decode("utf-8") onlyfiles = list( set( - re.findall(r"b([0-9]+)_SNPContigLocusId_(.*)\.bcp\.gz", string) + re.findall(r"b([0-9]+)_SNPContigLocusId_(.*)\.bcp\.gz", string) # noqa E501 ) # noqa E501 ) bestfile = self._identifyLatestSNPContig(onlyfiles) @@ -247,10 +247,10 @@ def update(self, options, path): name = words[1] desc = words[2] coding = ( - int(words[5]) if (len(words) > 5 and words[5] != "") else None + int(words[5]) if (len(words) > 5 and words[5] != "") else None # noqa E501 ) # noqa E501 exon = ( - int(words[6]) if (len(words) > 6 and words[6] != "") else None + int(words[6]) if (len(words) > 6 and words[6] != "") else None # noqa E501 ) # noqa E501 roleID[code] = self.addRole(name, desc, coding, exon) @@ -261,38 +261,38 @@ def update(self, options, path): # process SNP roles """ /* from human_9606_table.sql.gz */ -CREATE TABLE [b137_SNPContigLocusId] -( -[snp_id] [int] NULL , -[contig_acc] [varchar](32) NOT NULL , -[contig_ver] [tinyint] NULL , -[asn_from] [int] NULL , -[asn_to] [int] NULL , -[locus_id] [int] NULL , -[locus_symbol] [varchar](64) NULL , -[mrna_acc] [varchar](32) NOT NULL , -[mrna_ver] [smallint] NOT NULL , -[protein_acc] [varchar](32) NULL , -[protein_ver] [smallint] NULL , -[fxn_class] [int] NULL , -[reading_frame] [int] NULL , -[allele] [varchar](255) NULL , -[residue] [varchar](1000) NULL , -[aa_position] [int] NULL , -[build_id] [varchar](4) NOT NULL , -[ctg_id] [int] NULL , -[mrna_start] [int] NULL , -[mrna_stop] [int] NULL , -[codon] [varchar](1000) NULL , -[protRes] [char](3) NULL , -[contig_gi] [int] NULL , -[mrna_gi] [int] NULL , -[mrna_orien] [tinyint] NULL , -[cp_mrna_ver] [int] NULL , -[cp_mrna_gi] [int] NULL , -[verComp] [int] NULL -) -""" + CREATE TABLE [b137_SNPContigLocusId] + ( + [snp_id] [int] NULL , + [contig_acc] [varchar](32) NOT NULL , + [contig_ver] [tinyint] NULL , + [asn_from] [int] NULL , + [asn_to] [int] NULL , + [locus_id] [int] NULL , + [locus_symbol] [varchar](64) NULL , + [mrna_acc] [varchar](32) NOT NULL , + [mrna_ver] [smallint] NOT NULL , + [protein_acc] [varchar](32) NULL , + [protein_ver] [smallint] NULL , + [fxn_class] [int] NULL , + [reading_frame] [int] NULL , + [allele] [varchar](255) NULL , + [residue] [varchar](1000) NULL , + [aa_position] [int] NULL , + [build_id] [varchar](4) NOT NULL , + [ctg_id] [int] NULL , + [mrna_start] [int] NULL , + [mrna_stop] [int] NULL , + [codon] [varchar](1000) NULL , + [protRes] [char](3) NULL , + [contig_gi] [int] NULL , + [mrna_gi] [int] NULL , + [mrna_orien] [tinyint] NULL , + [cp_mrna_ver] [int] NULL , + [cp_mrna_gi] [int] NULL , + [verComp] [int] NULL + ) + """ self.log("processing SNP roles ...\n") setRole = set() numRole = numOrphan = numInc = 0 @@ -502,7 +502,7 @@ def processChmSNPs( for chm, listPos in listChrPos.items(): self.addChromosomeSNPLoci(self._loki.chr_num[chm], listPos) self.log( - "writing chromosome %s SNPs to the database completed\n" % fileChm + "writing chromosome %s SNPs to the database completed\n" % fileChm # noqa E501 ) # noqa E501 # print results @@ -527,6 +527,6 @@ def processChmSNPs( ) if setBadChr: self.log( - "WARNING: %d SNPs on mismatching chromosome\n" % (len(setBadChr)) + "WARNING: %d SNPs on mismatching chromosome\n" % (len(setBadChr)) # noqa E501 ) # noqa E501 listChrPos = setBadBuild = setBadVers = setBadFilter = setBadChr = None diff --git a/loki_modules/loaders/loki_source_entrez.py b/loki_modules/loaders/loki_source_entrez.py index 97224a8..b15e7f4 100644 --- a/loki_modules/loaders/loki_source_entrez.py +++ b/loki_modules/loaders/loki_source_entrez.py @@ -7,7 +7,7 @@ class Source_entrez(loki_source.Source): @classmethod def getVersionString(cls): - return "2.4 (2022-04-12)" + return "3.0.0 (2025-01-01)" @classmethod def getOptions(cls): @@ -378,9 +378,9 @@ def update(self, options, path): % (len(setBadChr)) # noqa E501 ) self.logPop() - entrezChm = setOrphan = setBadNC = setBadBuild = setBadChr = setBadVers = ( + entrezChm = setOrphan = setBadNC = setBadBuild = setBadChr = setBadVers = ( # noqa E501 buildGenes - ) = None # noqa E501 # noqa E501 + ) = None # noqa E501 # store gene regions self.log("writing gene regions to the database ...\n") diff --git a/loki_modules/loaders/loki_source_go.py b/loki_modules/loaders/loki_source_go.py index 94da8bc..ba12ff5 100644 --- a/loki_modules/loaders/loki_source_go.py +++ b/loki_modules/loaders/loki_source_go.py @@ -7,7 +7,7 @@ class Source_go(loki_source.Source): @classmethod def getVersionString(cls): - return "2.1 (2022-04-14)" + return "3.0.0 (2025-01-01)" # getVersionString() diff --git a/loki_modules/loaders/loki_source_gwas.py b/loki_modules/loaders/loki_source_gwas.py index b9a02c1..a8cd1ec 100644 --- a/loki_modules/loaders/loki_source_gwas.py +++ b/loki_modules/loaders/loki_source_gwas.py @@ -10,7 +10,7 @@ class Source_gwas(loki_source.Source): @classmethod def getVersionString(cls): - return "2.5 (2016-09-19)" + return "3.0.0 (2025-01-01)" def download(self, options, path): # download the latest source files diff --git a/loki_modules/loaders/loki_source_mint.py b/loki_modules/loaders/loki_source_mint.py index e66803c..1d566eb 100644 --- a/loki_modules/loaders/loki_source_mint.py +++ b/loki_modules/loaders/loki_source_mint.py @@ -34,7 +34,7 @@ def _identifyLatestFilename(self, filenames): @classmethod def getVersionString(cls): - return "2.2 (2018-02-20)" + return "3.0.0 (2025-01-01)" def download(self, options, path): self.downloadFilesFromHTTP( diff --git a/loki_modules/loaders/loki_source_oreganno.py b/loki_modules/loaders/loki_source_oreganno.py index 1aed067..66f2b95 100644 --- a/loki_modules/loaders/loki_source_oreganno.py +++ b/loki_modules/loaders/loki_source_oreganno.py @@ -15,7 +15,7 @@ class Source_oreganno(loki_source.Source): @classmethod def getVersionString(cls): - return "2.1 (2016-09-19)" + return "3.0.0 (2025-01-01)" def download(self, options): """ diff --git a/loki_modules/loaders/loki_source_pfam.py b/loki_modules/loaders/loki_source_pfam.py index 71552e1..138aa65 100644 --- a/loki_modules/loaders/loki_source_pfam.py +++ b/loki_modules/loaders/loki_source_pfam.py @@ -6,7 +6,7 @@ class Source_pfam(loki_source.Source): @classmethod def getVersionString(cls): - return "2.2 (2016-02-08)" + return "3.0.0 (2025-01-01)" def download(self, options, path): self.downloadFilesFromHTTP( diff --git a/loki_modules/loaders/loki_source_pharmgkb.py b/loki_modules/loaders/loki_source_pharmgkb.py index 3b9cce1..9a04dcb 100644 --- a/loki_modules/loaders/loki_source_pharmgkb.py +++ b/loki_modules/loaders/loki_source_pharmgkb.py @@ -6,7 +6,7 @@ class Source_pharmgkb(loki_source.Source): @classmethod def getVersionString(cls): - return "2.3 (2018-10-30)" + return "3.0.0 (2025-01-01)" def download(self, options, path): self.downloadFilesFromHTTPS( diff --git a/loki_modules/loaders/loki_source_reactome.py b/loki_modules/loaders/loki_source_reactome.py index e39f34d..1aeaeb1 100644 --- a/loki_modules/loaders/loki_source_reactome.py +++ b/loki_modules/loaders/loki_source_reactome.py @@ -7,7 +7,7 @@ class Source_reactome(loki_source.Source): @classmethod def getVersionString(cls): - return "2.1 (2015-01-23)" + return "3.0.0 (2025-01-01)" def download(self, options): # download the latest source files diff --git a/loki_modules/loaders/loki_source_ucsc_ecr.py b/loki_modules/loaders/loki_source_ucsc_ecr.py index b4b530b..6044d08 100644 --- a/loki_modules/loaders/loki_source_ucsc_ecr.py +++ b/loki_modules/loaders/loki_source_ucsc_ecr.py @@ -48,7 +48,7 @@ class Source_ucsc_ecr(loki_source.Source): @classmethod def getVersionString(cls): - return "2.0.1 (2013-03-01)" + return "3.0.0 (2025-01-01)" @classmethod def getOptions(cls): diff --git a/loki_modules/loki-build.py b/loki_modules/loki_build.py similarity index 99% rename from loki_modules/loki-build.py rename to loki_modules/loki_build.py index 950b677..0414cc0 100755 --- a/loki_modules/loki-build.py +++ b/loki_modules/loki_build.py @@ -81,7 +81,7 @@ from loki_modules import loki_db -if __name__ == "__main__": +def main(): version = "LOKI version %s" % (loki_db.Database.getVersionString()) # define arguments @@ -442,4 +442,7 @@ def rmtree_error(func, path, exc): db.testDatabaseWriteable() db.optimizeDatabase() # if knowledge -# __main__ + + +if __name__ == "__main__": + main() diff --git a/loki_modules/loki_mixins/source_utility_methods_mixin.py b/loki_modules/loki_mixins/source_utility_methods_mixin.py index cf19790..2168130 100644 --- a/loki_modules/loki_mixins/source_utility_methods_mixin.py +++ b/loki_modules/loki_mixins/source_utility_methods_mixin.py @@ -8,7 +8,7 @@ import urllib.request as urllib2 import zlib import wget -from datetime import datetime +from datetime import datetime, timezone class SourceUtilityMethods: @@ -211,7 +211,7 @@ def downloadFilesFromFTP(self, remHost, remFiles): if os.path.exists(locPath): stat = os.stat(locPath) locSize[locPath] = int(stat.st_size) - locTime[locPath] = datetime.datetime.fromtimestamp( + locTime[locPath] = datetime.fromtimestamp( stat.st_mtime ) # noqa E501 @@ -220,7 +220,7 @@ def downloadFilesFromFTP(self, remHost, remFiles): # format, but most servers return "ls -l"-ish space-delimited columns # (permissions) (?) (user) (group) (size) (month) (day) (year-or-time) # (filename) - now = datetime.datetime.utcnow() + now = datetime.now(timezone.utc) def ftpDirCB(rem_dir, line): words = line.split() @@ -229,15 +229,15 @@ def ftpDirCB(rem_dir, line): remSize[remFn] = int(words[4]) timestamp = " ".join(words[5:8]) try: - time = datetime.datetime.strptime(timestamp, "%b %d %Y") + time = datetime.strptime(timestamp, "%b %d %Y") except ValueError: try: - time = datetime.datetime.strptime( + time = datetime.strptime( "%s %d" % (timestamp, now.year), "%b %d %H:%M %Y" ) except ValueError: try: - time = datetime.datetime.strptime( + time = datetime.strptime( "%s %d" % (timestamp, now.year - 1), "%b %d %H:%M %Y", # noqa E501 ) @@ -342,7 +342,7 @@ def _downloadHTTP( if os.path.exists(locPath): stat = os.stat(locPath) locSize[locPath] = int(stat.st_size) - locTime[locPath] = datetime.datetime.fromtimestamp( + locTime[locPath] = datetime.fromtimestamp( stat.st_mtime ) # noqa E501 # check remote file sizes and times @@ -366,11 +366,11 @@ def _downloadHTTP( last_modified = info.get("last-modified") if last_modified: try: - remTime[locPath] = datetime.datetime.strptime( + remTime[locPath] = datetime.strptime( last_modified, "%a, %d %b %Y %H:%M:%S %Z" ) except ValueError: - remTime[locPath] = datetime.datetime.utcnow() + remTime[locPath] = datetime.now(timezone.utc) response.close() self.log(" OK\n") diff --git a/loki_modules/loki_mixins/updater_database.py b/loki_modules/loki_mixins/updater_database.py index b25bc90..1f09f59 100644 --- a/loki_modules/loki_mixins/updater_database.py +++ b/loki_modules/loki_mixins/updater_database.py @@ -80,8 +80,12 @@ def updateDatabase( # temp for now but should replace options everywhere below self._sourceOptions[srcName] = options + # ---------------------------------------------- + # Start Parallel Download and Hashing + downloadAndHashThreads = {} srcSetsToDownload = sorted(srcSet) + # Create buffer to run in parallel for srcName in srcSetsToDownload: # download files into a local cache if not cacheOnly: @@ -95,10 +99,14 @@ def updateDatabase( ) downloadAndHashThreads[srcName].start() + # Wait for all download and hash threads to finish for srcName in downloadAndHashThreads.keys(): downloadAndHashThreads[srcName].join() self.log(srcName + " rejoined main thread\n") + # Return the flow of the code to the main thread + # ---------------------------------------------- + for srcName in srcSetsToDownload: srcObj = self._sourceObjects[srcName] srcID = srcObj.getSourceID() diff --git a/tests/issues/b15_biofilter_group_annotation/data-out/outcome_run_2.configuration b/tests/issues/b15_biofilter_group_annotation/data-out/outcome_run_2.configuration new file mode 100644 index 0000000..e69de29 diff --git a/tests/issues/b15_biofilter_group_annotation/data-out/outcome_run_2.gene-group-source b/tests/issues/b15_biofilter_group_annotation/data-out/outcome_run_2.gene-group-source new file mode 100644 index 0000000..e69de29 diff --git a/tests/issues/l16_build_37_loki/data-out/outcomes.configuration b/tests/issues/l16_build_37_loki/data-out/outcomes.configuration new file mode 100644 index 0000000..e69de29 diff --git a/tests/issues/l16_build_37_loki/data-out/outcomes.position_label.snp-position-gene-upstream-downstream b/tests/issues/l16_build_37_loki/data-out/outcomes.position_label.snp-position-gene-upstream-downstream new file mode 100644 index 0000000..e69de29 diff --git a/tests/units/data/temp_config.txt b/tests/units/data/temp_config.txt index ed01fe8..418a020 100644 --- a/tests/units/data/temp_config.txt +++ b/tests/units/data/temp_config.txt @@ -1,4 +1,4 @@ # This is a comment option1 value1 option2 value2 -include /Users/andrerico/Works/Sys/biofilter/tests/units/data/another_config.txt +include /Users/andrerico/Works/Sys/biofilter/tests/units/data/temp_config.txt