Skip to content

Commit

Permalink
some further improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
JuliaGeh committed Mar 13, 2019
1 parent ce5f2ee commit b54c994
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 31 deletions.
27 changes: 10 additions & 17 deletions rgt/MotifSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def __init__(self, preload_motifs=None, motif_dbs=False):
self.motif_data = MotifData()
self.motif_data.set_custom(preload_motifs)
# add motifs to self.motifs_map (repositories_list = preload motifs)
self.load_directory(self.motif_data.repositories_list)
self.load_directory(self.motif_data.pwm_list)
else:
self.motif_data = MotifData(repositories=preload_motifs)
# add motifs to self.motifs_map
Expand Down Expand Up @@ -286,11 +286,11 @@ def read_mtf(self, mtf_filenames):

def load_directory(self, db_list):

for dir in db_list:
for file in os.listdir(dir):
tf_id = os.path.splitext(os.path.basename(file))[0]
name = os.path.splitext(os.path.basename(file))[0]
database = os.path.basename(dir)
for directory in db_list:
for file_name in glob.glob(directory + "/*.pwm"):
tf_id = os.path.splitext(os.path.basename(file_name))[0]
name = tf_id
database = os.path.basename(directory)
version = "0"
gene_names = None
tf_class = None
Expand All @@ -301,7 +301,7 @@ def load_directory(self, db_list):
thresholds = {}

self.add(MotifAnnotation(tf_id, name, database, version, gene_names, tf_class, uniprot_ids, data_source,
tax_group, species, thresholds))
tax_group, species, thresholds))

def read_enrichment(self, enrichment_files, threshold=1):
"""
Expand Down Expand Up @@ -478,21 +478,14 @@ def create_motif_list(self, pseudocounts=1.0, fpr=0.0001):
if os.path.isfile(motif_file_name):
# check whether ma provides the motif matching threshold for the given fpr
# recalculate (and store) it otherwise
if len(ma.thresholds) == 0:
if fpr in ma.thresholds and ma.thresholds[fpr]:
threshold = ma.thresholds[fpr]
else:
pfm = parsers.pfm(str(motif_file_name))
bg = tools.flat_bg(len(pfm)) # total number of "points" to add, not per-row
pssm = tools.log_odds(pfm, bg, pseudocounts, 2)
threshold = tools.threshold_from_p(pssm, bg, fpr)
ma.thresholds[fpr] = threshold
else:
if fpr in ma.thresholds and ma.thresholds[fpr]:
threshold = ma.thresholds[fpr]
else:
pfm = parsers.pfm(str(motif_file_name))
bg = tools.flat_bg(len(pfm)) # total number of "points" to add, not per-row
pssm = tools.log_odds(pfm, bg, pseudocounts, 2)
threshold = tools.threshold_from_p(pssm, bg, fpr)
ma.thresholds[fpr] = threshold

motif_list.append(Motif(motif_file_name, pseudocounts, threshold))

Expand Down
4 changes: 2 additions & 2 deletions rgt/Util.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,11 @@ def get_mtf_list(self):
return self.mtf_list

def set_custom(self, repositories):
self.repositories_list = repositories
self.repositories_list = [os.path.basename(r) for r in repositories]
self.pwm_list = []
self.logo_list = []
self.mtf_list = []
for current_repository in self.repositories_list:
for current_repository in repositories:
self.pwm_list.append(npath(current_repository))
self.logo_list.append("")
self.mtf_list.append("")
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
20 changes: 8 additions & 12 deletions unittest/test_MotifSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,23 +377,19 @@ def test_create_motif_list(self):
class CustomDBTest(unittest.TestCase):
def setUp(self):
# use CustomDB
self.motif_set = MotifSet(preload_motifs=["/home/julia/reg-gen/unittest/motifanalysis/TestCustomDB"],
self.motif_set = MotifSet(preload_motifs=[os.path.join(os.path.dirname(__file__), "TestCustomDB")],
motif_dbs=True)

def test_loading(self):
ms = MotifSet(preload_motifs=["/home/julia/reg-gen/unittest/motifanalysis/TestCustomDB"],
motif_dbs=True)
self.assertEqual(len(ms.motifs_map), 3, msg="loaded wrong number of motifs")
self.assertIsNone(ms.motifs_map["firstMotif_5.0.B"].gene_names, msg="gene_names not None")
self.assertIsNone(ms.motifs_map["secondMotif_5.0.B"].data_source, msg="data_source not None")
self.assertEqual(len(ms.motifs_map["thirdMotif_5.0.B"].thresholds), 0, msg="thresholds is not an empty dict")
self.assertEqual(len(self.motif_set.motifs_map), 3, msg="loaded wrong number of motifs")
self.assertIsNone(self.motif_set.motifs_map["firstMotif_5.0.B"].gene_names, msg="gene_names not None")
self.assertIsNone(self.motif_set.motifs_map["secondMotif_5.0.B"].data_source, msg="data_source not None")
self.assertEqual(len(self.motif_set.motifs_map["thirdMotif_5.0.B"].thresholds), 0, msg="thresholds is not an empty dict")

def test_built_in_functions(self):
ms = MotifSet(preload_motifs=["/home/julia/reg-gen/unittest/motifanalysis/TestCustomDB"],
motif_dbs=True)
self.assertTrue(str(ms).startswith("MotifSet:{"), msg="str(ms): wrong format")
self.assertTrue(repr(ms) == str(ms), msg="MotifSet: repr does not equal str")
ms2 = ms.filter({'name': ['firstMotif_5.0.B']}, search="exact")
self.assertTrue(str(self.motif_set).startswith("MotifSet:{"), msg="str(ms): wrong format")
self.assertTrue(repr(self.motif_set) == str(self.motif_set), msg="MotifSet: repr does not equal str")
ms2 = self.motif_set.filter({'name': ['firstMotif_5.0.B']}, search="exact")
self.assertTrue("'name': 'firstMotif_5.0.B'" in str(ms2), msg="str(ms2): wrong MotifMap")
self.assertTrue(str(ms2).startswith("MotifSet:{"), msg="str(ms2): wrong format")
ma = ms2.__getitem__("firstMotif_5.0.B")
Expand Down

0 comments on commit b54c994

Please sign in to comment.