some further improvements

CostaLab · Mar 13, 2019 · b54c994 · b54c994
1 parent ce5f2ee
commit b54c994
Show file tree

Hide file tree

Showing 6 changed files with 20 additions and 31 deletions.
diff --git a/rgt/MotifSet.py b/rgt/MotifSet.py
@@ -86,7 +86,7 @@ def __init__(self, preload_motifs=None, motif_dbs=False):
                 self.motif_data = MotifData()
                 self.motif_data.set_custom(preload_motifs)
                 # add motifs to self.motifs_map (repositories_list = preload motifs)
-                self.load_directory(self.motif_data.repositories_list)
+                self.load_directory(self.motif_data.pwm_list)
             else:
                 self.motif_data = MotifData(repositories=preload_motifs)
                 # add motifs to self.motifs_map
@@ -286,11 +286,11 @@ def read_mtf(self, mtf_filenames):
 
     def load_directory(self, db_list):
 
-        for dir in db_list:
-            for file in os.listdir(dir):
-                tf_id = os.path.splitext(os.path.basename(file))[0]
-                name = os.path.splitext(os.path.basename(file))[0]
-                database = os.path.basename(dir)
+        for directory in db_list:
+            for file_name in glob.glob(directory + "/*.pwm"):
+                tf_id = os.path.splitext(os.path.basename(file_name))[0]
+                name = tf_id
+                database = os.path.basename(directory)
                 version = "0"
                 gene_names = None
                 tf_class = None
@@ -301,7 +301,7 @@ def load_directory(self, db_list):
                 thresholds = {}
 
                 self.add(MotifAnnotation(tf_id, name, database, version, gene_names, tf_class, uniprot_ids, data_source,
-                                     tax_group, species, thresholds))
+                                         tax_group, species, thresholds))
 
     def read_enrichment(self, enrichment_files, threshold=1):
         """
@@ -478,21 +478,14 @@ def create_motif_list(self, pseudocounts=1.0, fpr=0.0001):
                 if os.path.isfile(motif_file_name):
                     # check whether ma provides the motif matching threshold for the given fpr
                     # recalculate (and store) it otherwise
-                    if len(ma.thresholds) == 0:
+                    if fpr in ma.thresholds and ma.thresholds[fpr]:
+                        threshold = ma.thresholds[fpr]
+                    else:
                         pfm = parsers.pfm(str(motif_file_name))
                         bg = tools.flat_bg(len(pfm))  # total number of "points" to add, not per-row
                         pssm = tools.log_odds(pfm, bg, pseudocounts, 2)
                         threshold = tools.threshold_from_p(pssm, bg, fpr)
                         ma.thresholds[fpr] = threshold
-                    else:
-                        if fpr in ma.thresholds and ma.thresholds[fpr]:
-                            threshold = ma.thresholds[fpr]
-                        else:
-                            pfm = parsers.pfm(str(motif_file_name))
-                            bg = tools.flat_bg(len(pfm))  # total number of "points" to add, not per-row
-                            pssm = tools.log_odds(pfm, bg, pseudocounts, 2)
-                            threshold = tools.threshold_from_p(pssm, bg, fpr)
-                            ma.thresholds[fpr] = threshold
 
                     motif_list.append(Motif(motif_file_name, pseudocounts, threshold))
 

diff --git a/rgt/Util.py b/rgt/Util.py
@@ -196,11 +196,11 @@ def get_mtf_list(self):
         return self.mtf_list
 
     def set_custom(self, repositories):
-        self.repositories_list = repositories
+        self.repositories_list = [os.path.basename(r) for r in repositories]
         self.pwm_list = []
         self.logo_list = []
         self.mtf_list = []
-        for current_repository in self.repositories_list:
+        for current_repository in repositories:
             self.pwm_list.append(npath(current_repository))
             self.logo_list.append("")
             self.mtf_list.append("")

diff --git a/...nalysis/TestCustomDB/firstMotif_5.0.B.pwm → unittest/TestCustomDB/firstMotif_5.0.B.pwm b/...nalysis/TestCustomDB/firstMotif_5.0.B.pwm → unittest/TestCustomDB/firstMotif_5.0.B.pwm
diff --git a/...alysis/TestCustomDB/secondMotif_5.0.B.pwm → unittest/TestCustomDB/secondMotif_5.0.B.pwm b/...alysis/TestCustomDB/secondMotif_5.0.B.pwm → unittest/TestCustomDB/secondMotif_5.0.B.pwm
diff --git a/...nalysis/TestCustomDB/thirdMotif_5.0.B.pwm → unittest/TestCustomDB/thirdMotif_5.0.B.pwm b/...nalysis/TestCustomDB/thirdMotif_5.0.B.pwm → unittest/TestCustomDB/thirdMotif_5.0.B.pwm
diff --git a/unittest/test_MotifSet.py b/unittest/test_MotifSet.py
@@ -377,23 +377,19 @@ def test_create_motif_list(self):
 class CustomDBTest(unittest.TestCase):
     def setUp(self):
         # use CustomDB
-        self.motif_set = MotifSet(preload_motifs=["/home/julia/reg-gen/unittest/motifanalysis/TestCustomDB"],
+        self.motif_set = MotifSet(preload_motifs=[os.path.join(os.path.dirname(__file__), "TestCustomDB")],
                                   motif_dbs=True)
 
     def test_loading(self):
-        ms = MotifSet(preload_motifs=["/home/julia/reg-gen/unittest/motifanalysis/TestCustomDB"],
-                      motif_dbs=True)
-        self.assertEqual(len(ms.motifs_map), 3, msg="loaded wrong number of motifs")
-        self.assertIsNone(ms.motifs_map["firstMotif_5.0.B"].gene_names, msg="gene_names not None")
-        self.assertIsNone(ms.motifs_map["secondMotif_5.0.B"].data_source, msg="data_source not None")
-        self.assertEqual(len(ms.motifs_map["thirdMotif_5.0.B"].thresholds), 0, msg="thresholds is not an empty dict")
+        self.assertEqual(len(self.motif_set.motifs_map), 3, msg="loaded wrong number of motifs")
+        self.assertIsNone(self.motif_set.motifs_map["firstMotif_5.0.B"].gene_names, msg="gene_names not None")
+        self.assertIsNone(self.motif_set.motifs_map["secondMotif_5.0.B"].data_source, msg="data_source not None")
+        self.assertEqual(len(self.motif_set.motifs_map["thirdMotif_5.0.B"].thresholds), 0, msg="thresholds is not an empty dict")
 
     def test_built_in_functions(self):
-        ms = MotifSet(preload_motifs=["/home/julia/reg-gen/unittest/motifanalysis/TestCustomDB"],
-                      motif_dbs=True)
-        self.assertTrue(str(ms).startswith("MotifSet:{"), msg="str(ms): wrong format")
-        self.assertTrue(repr(ms) == str(ms), msg="MotifSet: repr does not equal str")
-        ms2 = ms.filter({'name': ['firstMotif_5.0.B']}, search="exact")
+        self.assertTrue(str(self.motif_set).startswith("MotifSet:{"), msg="str(ms): wrong format")
+        self.assertTrue(repr(self.motif_set) == str(self.motif_set), msg="MotifSet: repr does not equal str")
+        ms2 = self.motif_set.filter({'name': ['firstMotif_5.0.B']}, search="exact")
         self.assertTrue("'name': 'firstMotif_5.0.B'" in str(ms2), msg="str(ms2): wrong MotifMap")
         self.assertTrue(str(ms2).startswith("MotifSet:{"), msg="str(ms2): wrong format")
         ma = ms2.__getitem__("firstMotif_5.0.B")