-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
40 changed files
with
17,738 additions
and
8,488 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
- Close the 2.4.4 version | ||
- Merged development branch to Main branch | ||
- Change development branch to 3.0.1 version | ||
- Comparacao entre o Schema do Biofilter 2.4.4 e 3.0.0 (nao vi diferencas) |
101 changes: 101 additions & 0 deletions
101
loki_modules/loaders/loaders_unsupported/loki_source_disgenet.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#!/usr/bin/env python | ||
|
||
import collections | ||
import re | ||
import apsw | ||
from sh import gunzip | ||
from loki import loki_source | ||
|
||
|
||
class Source_disgenet(loki_source.Source): | ||
|
||
|
||
@classmethod | ||
def getVersionString(cls): | ||
return '1.0 (2023-08-08)' | ||
#getVersionString() | ||
|
||
|
||
def download(self, options): | ||
# download the latest source files | ||
self.downloadFilesFromHTTP('disgenet.org', { | ||
'disgenet_2020.db.gz': '/static/disgenet_ap1/files/sqlite_downloads/current/disgenet_2020.db.gz', | ||
}) | ||
#download() | ||
|
||
|
||
def update(self, options): | ||
# clear out all old data from this source | ||
self.log("deleting old records from the database ...") | ||
self.deleteAll() | ||
self.log(" OK\n") | ||
|
||
# get or create the required metadata records | ||
namespaceID = self.addNamespaces([ | ||
('disgenet_id', 0), | ||
('entrez_gid', 0), | ||
('disease', 0) | ||
]) | ||
typeID = self.addTypes([ | ||
('disease',), | ||
('gene',), | ||
]) | ||
subtypeID = self.addSubtypes([ | ||
('-',), | ||
]) | ||
|
||
# process disgenet sqlite file | ||
self.log("processing diseases ...") | ||
gunzip('disgenet_2020.db.gz') | ||
diseases = {} | ||
diseaseClass = {} | ||
con = apsw.Connection('disgenet_2020.db') | ||
cur = con.cursor() | ||
comm = 'select diseaseClassNID,diseaseClassName from diseaseClass' | ||
cur.execute(comm) | ||
diseaseClass = {diseaseclass[0]:diseaseclass[1].strip() for diseaseclass in cur.fetchall()} | ||
comm = 'SELECT a.diseaseId,a.diseaseName,b.diseaseClassNID FROM diseaseAttributes a LEFT JOIN disease2class b ON a.diseaseNID=b.diseaseNID order by a.diseaseNID' | ||
cur.execute(comm) | ||
diseases = {disease[0]:[disease[1],disease[2]] for disease in cur.fetchall()} | ||
#foreach line in diseaseFile | ||
self.log(" OK: %d disease\n" % (len(diseases),)) | ||
|
||
# store diseases | ||
self.log("writing diseases to the database ...") | ||
listSubtype = self.addSubtypes([(val,)for val in set(diseaseClass.values())]) | ||
listGroup = diseases.keys() | ||
listAID = self.addTypedGroups(typeID['disease'], ((subtypeID['-'] if diseases[diseaseID][1] is None else listSubtype[diseaseClass[diseases[diseaseID][1]]],diseases[diseaseID][0],None) for diseaseID in listGroup)) | ||
groupAID = dict(zip(listGroup,listAID)) | ||
self.log(" OK\n") | ||
|
||
# store diseases names | ||
self.log("writing diseases names to the database ...") | ||
self.addGroupNamespacedNames(namespaceID['disgenet_id'], ((groupAID[diseaseID],diseaseID) for diseaseID in listGroup)) | ||
self.addGroupNamespacedNames(namespaceID['disease'], ((groupAID[diseaseID],diseases[diseaseID][0]) for diseaseID in listGroup)) | ||
diseases = None | ||
diseaseClass = None | ||
self.log(" OK\n") | ||
|
||
# process disgenet disease identifiers | ||
self.log("processing diseases identifiers ...") | ||
diseaseGene = set() | ||
comm = 'SELECT b.geneId,c.diseaseId FROM geneDiseaseNetwork a LEFT JOIN geneAttributes b ON a.geneNID=b.geneNID LEFT JOIN diseaseAttributes c ON a.diseaseNID=c.diseaseNID ORDER BY c.diseaseId' | ||
cur.execute(comm) | ||
diseaseGeneResult = cur.fetchall() | ||
con.close() | ||
numAssoc = 0 | ||
for pair in diseaseGeneResult: | ||
if pair[1] in listGroup: | ||
numAssoc += 1 | ||
diseaseGene.add( (groupAID[pair[1]],numAssoc,pair[0]) ) | ||
self.log(" OK: %d diseases and gene pairs\n" % (len(diseaseGene),)) | ||
|
||
# store gaad disease identifiers | ||
self.log("writing diseases and gene pairs to the database ...") | ||
self.addGroupMemberTypedNamespacedNames(typeID['gene'], namespaceID['entrez_gid'], diseaseGene) | ||
diseaseGene = None | ||
self.log(" OK\n") | ||
|
||
#update() | ||
|
||
#Source_go |
148 changes: 148 additions & 0 deletions
148
loki_modules/loaders/loaders_unsupported/loki_source_gaad.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
#!/usr/bin/env python | ||
|
||
import collections | ||
import re | ||
from loki import loki_source | ||
|
||
|
||
class Source_gaad(loki_source.Source): | ||
|
||
|
||
@classmethod | ||
def getVersionString(cls): | ||
return '1.0 (2023-06-08)' | ||
#getVersionString() | ||
|
||
|
||
def download(self, options): | ||
# download the latest source files | ||
self.downloadFilesFromHTTPS('gaad.medgenius.info', { | ||
'diseases2.txt.gz': '/Downloads/diseases2.txt.gz', # disease name by AID | ||
'disease_relationships.txt.gz': '/Downloads/disease_relationships.txt.gz', | ||
'disease_association_database_annotations_uniprot_ncbiGene.txt.gz': '/Downloads/disease_association_database_annotations_uniprot_ncbiGene.txt.gz', | ||
'disease_association_genecards.txt.gz': '/Downloads/disease_association_genecards.txt.gz', | ||
'disease_gene_association_pubmed_textmining_zhao.txt.gz': '/Downloads/disease_gene_association_pubmed_textmining_zhao.txt.gz', | ||
}) | ||
#download() | ||
|
||
|
||
def update(self, options): | ||
# clear out all old data from this source | ||
self.log("deleting old records from the database ...") | ||
self.deleteAll() | ||
self.log(" OK\n") | ||
|
||
# get or create the required metadata records | ||
namespaceID = self.addNamespaces([ | ||
('gaad_id', 0), | ||
('entrez_gid', 0), | ||
('disease', 0) | ||
]) | ||
relationshipID = self.addRelationships([ | ||
('disease_co-occurring',), | ||
]) | ||
typeID = self.addTypes([ | ||
('disease',), | ||
('gene',), | ||
]) | ||
subtypeID = self.addSubtypes([ | ||
('-',), | ||
]) | ||
|
||
# process gaad disease | ||
self.log("processing diseases ...") | ||
diseaseFile = self.zfile('diseases2.txt.gz') | ||
diseases = {} | ||
for line in diseaseFile: | ||
if not line.startswith("AID"): | ||
continue | ||
words = line.split("\t") | ||
diseaseID = words[0] | ||
name = words[1].rstrip() | ||
# store disease name of each disease ID (AID) | ||
diseases[diseaseID] = name | ||
#foreach line in diseaseFile | ||
self.log(" OK: %d disease\n" % (len(diseases),)) | ||
|
||
# store diseases | ||
self.log("writing diseases to the database ...") | ||
listGroup = diseases.keys() | ||
listAID = self.addTypedGroups(typeID['disease'], ((subtypeID['-'],group,diseases[group]) for group in listGroup)) | ||
groupAID = dict(zip(listGroup,listAID)) | ||
self.log(" OK\n") | ||
|
||
# store diseases names | ||
self.log("writing diseases names to the database ...") | ||
self.addGroupNamespacedNames(namespaceID['gaad_id'], ((groupAID[group],group) for group in listGroup)) | ||
self.addGroupNamespacedNames(namespaceID['disease'], ((groupAID[group],diseases[group]) for group in listGroup)) | ||
diseases = None | ||
self.log(" OK\n") | ||
|
||
# process gaad disease relationships | ||
self.log("processing diseases relationships ...") | ||
relationshipFile = self.zfile('disease_relationships.txt.gz') | ||
relationships = [] | ||
num = 0 | ||
for line in relationshipFile: | ||
if line.startswith("disease_uid1"): | ||
continue | ||
words = line.split("\t") | ||
diseaseID = words[0] | ||
diseaseID2 = words[1] | ||
# store disease pairs that shares genes | ||
relationships.append( (diseaseID,diseaseID2,relationshipID['disease_co-occurring'],None) ) | ||
num+=1 | ||
#foreach line in diseaseFile | ||
self.log(" OK: %d disease relationships\n" % (num,)) | ||
|
||
# store gaad disease relationships | ||
self.log("writing diseases relationships to the database ...") | ||
self.addGroupRelationships(relationships) | ||
relationships = None | ||
self.log(" OK\n") | ||
|
||
# process gaad disease identifiers | ||
self.log("processing diseases identifiers ...") | ||
ncbiFile = self.zfile('disease_association_database_annotations_uniprot_ncbiGene.txt.gz') | ||
genecardsFile = self.zfile('disease_association_genecards.txt.gz') | ||
pubmedFile = self.zfile('disease_gene_association_pubmed_textmining_zhao.txt.gz') | ||
diseaseGene = [] | ||
num = 0 | ||
for line in ncbiFile: | ||
if line.startswith("disease_"): | ||
continue | ||
words = line.split("\t") | ||
diseaseID = words[0].strip() | ||
entrezID = words[1].strip() | ||
num+=1 | ||
diseaseGene.append((groupAID[diseaseID], num, entrezID)) | ||
#foreach line in ncbiFile: | ||
for line in genecardsFile: | ||
if line.startswith("disease_"): | ||
continue | ||
words = line.split("\t") | ||
diseaseID = words[0].strip() | ||
entrezID = words[1].strip() | ||
num+=1 | ||
diseaseGene.append((groupAID[diseaseID], num, entrezID)) | ||
#foreach line in genecardsFile: | ||
for line in pubmedFile: | ||
if line.startswith("disease_"): | ||
continue | ||
words = line.split("\t") | ||
diseaseID = words[2].strip() | ||
entrezID = words[1].strip() | ||
num+=1 | ||
diseaseGene.append((groupAID[diseaseID], num, entrezID)) | ||
#foreach line in pubmedFile: | ||
self.log(" OK: %d diseases and gene pairs\n" % (len(diseaseGene),)) | ||
|
||
# store gaad disease identifiers | ||
self.log("writing diseases and gene pairs to the database ...") | ||
self.addGroupMemberTypedNamespacedNames(typeID['gene'], namespaceID['entrez_gid'], diseaseGene) | ||
diseaseGene = None | ||
self.log(" OK\n") | ||
|
||
#update() | ||
|
||
#Source_go |
Oops, something went wrong.