more python fixes and convertions

CostaLab · Oct 29, 2019 · 220d50c · 220d50c
1 parent cf879dd
commit 220d50c
Show file tree

Hide file tree

Showing 10 changed files with 65 additions and 82 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -6,11 +6,7 @@ matrix:
     - os: linux
       dist: xenial
       language: python
-      python: "2.7"
-    - os: linux
-      dist: xenial
-      language: python
-      python: "3.7"
+      python: "3.6"
     - os: osx
       language: generic
       addons:
@@ -19,14 +15,14 @@ matrix:
             - llvm
             - boost
   allow_failures:
-    - python: "3.7"
+    - python: "3.6"
 before_install:
-  - pip install --upgrade pip
+  - pip3 install --upgrade pip
 install:
-  - pip install cython
-  - pip install numpy scipy
-  - pip install coveralls
-  - pip install .
+  - pip3 install cython
+  - pip3 install numpy scipy
+  - pip3 install coveralls
+  - pip3 install .
 # command to run tests
 script:
   - coverage run --source ./ -m unittest discover unittest/ -p "*" -v

diff --git a/rgt/GenomicRegion.py b/rgt/GenomicRegion.py
@@ -86,6 +86,21 @@ def __eq__(self, other):
         return (self.chrom, self.initial, self.final, self.orientation) == \
                (other.chrom, other.initial, other.final, other.orientation)
 
+    def __ne__(self, other):
+        return not (self == other)
+
+    def __lt__(self, other):
+        return (self.chrom, self.initial, self.final) < (other.chrom, other.initial, other.final)
+
+    def __le__(self, other):
+        return (self.chrom, self.initial, self.final) <= (other.chrom, other.initial, other.final)
+
+    def __gt__(self, other):
+        return (self.chrom, self.initial, self.final) > (other.chrom, other.initial, other.final)
+
+    def __ge__(self, other):
+        return (self.chrom, self.initial, self.final) >= (other.chrom, other.initial, other.final)
+
     def toString(self, space=False, underline=False, strand=False):
         """Return a string of GenomicRegion by its position.
 
@@ -173,30 +188,6 @@ def __repr__(self):
         """Return official representation of GenomicRegion."""
         return ','.join([self.chrom, str(self.initial), str(self.final)])
 
-    def __cmp__(self, region):
-        """Return negative value if x < y, zero if x == y and strictly positive if x > y.
-
-        *Keyword arguments:*
-
-            - region -- Given GenomicRegion to compare.
-        """
-        if self.chrom < region.chrom:
-            return -1
-        elif self.chrom > region.chrom:
-            return 1
-        else:
-            if self.initial < region.initial:
-                return -1
-            elif self.initial > region.initial:
-                return 1
-            else:
-                if self.final < region.final:
-                    return -1
-                elif self.final > region.final:
-                    return 1
-                else:
-                    return 0
-
     def extract_blocks(self, keep_name=False):
         """Extract the block information in self.data into a GenomicRegionSet."""
         z = []

diff --git a/rgt/GenomicRegionSet.py b/rgt/GenomicRegionSet.py
@@ -378,7 +378,7 @@ def sort(self, key=None, reverse=False):
         if key:
             self.sequences.sort(key=key, reverse=reverse)
         else:
-            self.sequences.sort(cmp=GenomicRegion.__cmp__)
+            self.sequences.sort()
             self.sorted = True
 
     def get_sequences(self, genome_fasta, ex=0):
@@ -870,22 +870,22 @@ def intersect_c(self, y, mode=OverlapType.OVERLAP, rm_duplicates=False):
         lib = cdll.LoadLibrary(Lib.get_c_rgt())
         # C-Binding of intersect overlap function
         intersect_overlap_c = lib.intersectGenomicRegionSetsOverlap
-        intersect_overlap_c.argtypes = [POINTER(c_char_p), POINTER(c_int), POINTER(c_int), c_int, POINTER(c_char_p),
+        intersect_overlap_c.argtypes = [POINTER(c_wchar_p), POINTER(c_int), POINTER(c_int), c_int, POINTER(c_wchar_p),
                                         POINTER(c_int), POINTER(c_int), c_int, POINTER(POINTER(c_int)),
                                         POINTER(POINTER(c_int)), POINTER(POINTER(c_int)), POINTER(c_int)]
         intersect_overlap_c.restype = None
 
         # C-Binding of intersect original function
         intersect_original_c = lib.intersectGenomicRegionSetsOriginal
-        intersect_original_c.argtypes = [POINTER(c_char_p), POINTER(c_int), POINTER(c_int), c_int, POINTER(c_char_p),
+        intersect_original_c.argtypes = [POINTER(c_wchar_p), POINTER(c_int), POINTER(c_int), c_int, POINTER(c_wchar_p),
                                          POINTER(c_int), POINTER(c_int), c_int, POINTER(POINTER(c_int)),
                                          POINTER(POINTER(c_int)), POINTER(POINTER(c_int)), POINTER(c_int)]
         intersect_original_c.restype = None
 
         # C-Binding of intersect completely function
         intersect_completely_included_c = lib.intersectGenomicRegionSetsCompletelyIncluded
-        intersect_completely_included_c.argtypes = [POINTER(c_char_p), POINTER(c_int), POINTER(c_int), c_int,
-                                                    POINTER(c_char_p), POINTER(c_int), POINTER(c_int), c_int,
+        intersect_completely_included_c.argtypes = [POINTER(c_wchar_p), POINTER(c_int), POINTER(c_int), c_int,
+                                                    POINTER(c_wchar_p), POINTER(c_int), POINTER(c_int), c_int,
                                                     POINTER(POINTER(c_int)), POINTER(POINTER(c_int)),
                                                     POINTER(POINTER(c_int)), POINTER(c_int)]
         intersect_completely_included_c.restype = None
@@ -918,10 +918,10 @@ def intersect_c(self, y, mode=OverlapType.OVERLAP, rm_duplicates=False):
             max_len_result = len_self + len_y
 
             chromosomes_self_python = [gr.chrom for gr in a.sequences]
-            chromosomes_self_c = (c_char_p * len_self)(*chromosomes_self_python)
+            chromosomes_self_c = (c_wchar_p * len_self)(*chromosomes_self_python)
 
             chromosomes_y_python = [gr.chrom for gr in b.sequences]
-            chromosomes_y_c = (c_char_p * len_y)(*chromosomes_y_python)
+            chromosomes_y_c = (c_wchar_p * len_y)(*chromosomes_y_python)
 
             initials_self_python = [gr.initial for gr in a.sequences]
             initials_self_c = (c_int * len_self)(*initials_self_python)
@@ -954,7 +954,6 @@ def intersect_c(self, y, mode=OverlapType.OVERLAP, rm_duplicates=False):
                                                 chromosomes_y_c, initials_y_c, finals_y_c, len_y, pointer(indices_c),
                                                 pointer(initials_result_c), pointer(finals_result_c),
                                                 byref(size_result_c))
-
             # Construct result set
             for i in range(size_result_c.value):
                 ci = indices_c[i]
@@ -976,7 +975,7 @@ def intersect_count(self, regionset, mode_count="count", threshold=False):
             - threshold -- Define the cutoff of the proportion of the intersecting region (0~50%)
 
         *Return:*
-        
+
             - A tupple of numbers: (A-B, B-A, intersection)
         """
 
@@ -1009,15 +1008,15 @@ def intersect_count(self, regionset, mode_count="count", threshold=False):
                 return len_12, len_21, len_inter
 
     def closest(self, y, max_dis=10000, return_list=False, top_N=None):
-        """Return a new GenomicRegionSet including the region(s) of y which is closest to any self region. 
+        """Return a new GenomicRegionSet including the region(s) of y which is closest to any self region.
         If there are intersection, return False.
-        
+
         *Keyword arguments:*
 
             - y -- the GenomicRegionSet which to compare with
             - max_dis -- maximum distance (default=10000 bp)
             - return_list -- return a list of the distances
-            - top_N -- return a dictionary with region names as keys and the GenomicRegionSet containing N clostest regions as values. 
+            - top_N -- return a dictionary with region names as keys and the GenomicRegionSet containing N clostest regions as values.
 
         *Return:*
 
@@ -1115,12 +1114,12 @@ def window(self, y, adding_length=1000):
         """Return the overlapping regions of self and y with adding a specified number (1000, by default) of base pairs
            upstream and downstream of each region in self. In effect, this allows regions in y that are near regions
            in self to be detected.
-        
+
         *Keyword arguments:*
 
             - y -- the GenomicRegionSet which to compare with
             - adding_length -- the length of base pairs added to upstream and downstream of self (default 1000)
-        
+
         *Return:*
 
             - A GenomicRegionSet including the regions of overlapping between extended self and original y.
@@ -1135,7 +1134,7 @@ def window(self, y, adding_length=1000):
 
     def subtract(self, y, whole_region=False, merge=True, exact=False):
         """Return a GenomicRegionSet excluded the overlapping regions with y.
-        
+
         *Keyword arguments:*
 
             - y -- the GenomicRegionSet which to subtract by
@@ -1147,7 +1146,7 @@ def subtract(self, y, whole_region=False, merge=True, exact=False):
         *Return:*
 
             - A GenomicRegionSet which contains the remaining regions of self after subtraction
-        
+
         ::
 
             self     ----------              ------
@@ -1259,7 +1258,7 @@ def subtract(self, y, whole_region=False, merge=True, exact=False):
             elif len(self.sequences) == 1:
                 # GRS only contains 1 region, only check if this matches exactly with any region within y
                 for target_region in y.sequences:
-                    if target_region.__cmp__(self.sequences[0]) == 0:
+                    if target_region == self.sequences[0]:
                         return GenomicRegionSet("small_self")  # return empty GRS
                 return self
             else:
@@ -1391,15 +1390,15 @@ def subtract(self, y, whole_region=False, merge=True, exact=False):
 
     def subtract_aregion(self, y):
         """Return a GenomicRegionSet excluded the overlapping regions with y.
-        
+
         *Keyword arguments:*
 
             - y -- the GenomicRegion which to subtract by
-        
+
         *Return:*
 
             - the remaining regions of self after subtraction
-        
+
         ::
 
             self     ----------              ------
@@ -1537,21 +1536,21 @@ def combine(self, region_set, change_name=True, output=False):
 
     def cluster(self, max_distance):
         """Cluster the regions with a certain distance and return the result as a new GenomicRegionSet.
-        
+
         *Keyword arguments:*
 
             - max_distance -- the maximum distance between regions within the same cluster
-        
+
         *Return:*
 
             - A GenomicRegionSet including clusters
-        
+
         ::
 
             self           ----           ----            ----
                               ----             ----                 ----
             Result(d=1)    -------        ---------       ----      ----
-            Result(d=10)   ---------------------------------------------        
+            Result(d=10)   ---------------------------------------------
         """
 
         if not self.sorted: self.sort()
@@ -1577,15 +1576,15 @@ def cluster(self, max_distance):
 
     def flank(self, size):
         """Return two flanking intervals with given size from both ends of each region.
-        
+
         *Keyword arguments:*
 
             - size -- the length of flanking intervals (default = SAME length as the region)
-        
+
         *Return:*
 
             - z -- A GenomicRegionSet including all flanking intervals
-        
+
         ::
 
             self        -----           --            ---
@@ -1608,15 +1607,15 @@ def flank(self, size):
 
     def jaccard(self, query):
         """Return jaccard index, a value of similarity of these two GenomicRegionSet.
-        
+
         *Keyword arguments:*
 
             - query -- the GenomicRegionSet which to compare with.
-        
+
         *Return:*
 
             - similarity -- (Total length of overlapping regions)/(Total length of original regions)
-        
+
         ::
 
             self              --8--      ---10---    -4-
@@ -1659,7 +1658,7 @@ def jaccard_c(self, query):
         ctypes_jaccardC = lib.jaccard
 
         # Specify data types
-        ctypes_jaccardC.argtypes = [POINTER(c_char_p), POINTER(c_int), POINTER(c_int), c_int, POINTER(c_char_p),
+        ctypes_jaccardC.argtypes = [POINTER(c_wchar_p), POINTER(c_int), POINTER(c_int), c_int, POINTER(c_wchar_p),
                                     POINTER(c_int), POINTER(c_int), c_int]
         ctypes_jaccardC.restype = c_double
 
@@ -1673,11 +1672,11 @@ def jaccard_c(self, query):
 
         # Convert to ctypes
         chroms_self_python = [gr.chrom for gr in self.sequences]
-        chroms_self_c = (c_char_p * len(chroms_self_python))(*chroms_self_python)
+        chroms_self_c = (c_wchar_p * len(chroms_self_python))(*chroms_self_python)
         # print('Converted self.chroms to c', str(chroms_self_python[:4]), '...')
 
         chroms_query_python = [gr.chrom for gr in query.sequences]
-        chroms_query_c = (c_char_p * len(chroms_query_python))(*chroms_query_python)
+        chroms_query_c = (c_wchar_p * len(chroms_query_python))(*chroms_query_python)
         # print('Converted query.chroms to c', str(chroms_query_python[:4]), '...')
 
         initials_self_python = [gr.initial for gr in self.sequences]

diff --git a/rgt/GenomicVariantSet.py b/rgt/GenomicVariantSet.py
@@ -30,7 +30,7 @@ def sort(self):
         .. note:: By default, the genomic position is used as sorting criteria.
         
         """
-        self.sequences.sort(cmp=GenomicVariant.__cmp__)
+        self.sequences.sort()
         self.sorted = True
 
     def read_vcf(self, vcf_path):

diff --git a/rgt/tdf/BindingSiteSet.py b/rgt/tdf/BindingSiteSet.py
@@ -86,7 +86,7 @@ def __init__(self, name):
 
     def sort(self):
         """Sort Elements by criteria defined by a GenomicRegion."""
-        self.sequences.sort(cmp = GenomicRegion.__cmp__)
+        self.sequences.sort()
         self.sorted = True
 
     def get_bs(self, orientation):

diff --git a/rgt/tdf/RNADNABindingSet.py b/rgt/tdf/RNADNABindingSet.py
@@ -193,12 +193,12 @@ def get_dbs(self, sort=False, orientation=None, rm_duplicate=False, dbd_tag=Fals
 
     def sort_rbs(self):
         """Sort the dictionary by RNA"""
-        self.sequences = sorted(self.sequences, key=lambda x: x.rna, cmp=GenomicRegion.__cmp__)
+        self.sequences = sorted(self.sequences, key=lambda x: x.rna)
         self.sorted_rna = True
 
     def sort_dbs(self):
         """Sort the dictionary by DNA"""
-        self.sequences = sorted(self.sequences, key=lambda x: x.dna, cmp=GenomicRegion.__cmp__)
+        self.sequences = sorted(self.sequences, key=lambda x: x.dna)
         self.sorted_dna = True
 
     def sort_dbs_by_regions(self, regionset):

diff --git a/unittest/motifanalysis/test_Enrichment.py b/unittest/motifanalysis/test_Enrichment.py
@@ -1,5 +1,3 @@
-# Python 3 compatibility
-
 
 # Python
 import unittest
@@ -26,4 +24,4 @@ def test_subtract_exact(self):
         self.assertEqual(len(background_tmp.sequences), len(reference.sequences))
 
         for region, region_ref in zip(background_tmp.sequences, reference.sequences):
-            self.assertEqual(region.__cmp__(region_ref), 0)
+            self.assertEqual(region, region_ref)
diff --git a/unittest/motifanalysis/test_Statistics.py b/unittest/motifanalysis/test_Statistics.py
@@ -1,5 +1,3 @@
-# Python 3 compatibility
-
 
 # Python
 import unittest

diff --git a/unittest/test_GenomicRegionSet.py b/unittest/test_GenomicRegionSet.py
@@ -342,6 +342,10 @@ def test_intersect(self):
         self.region_sets([['chr1', 1, 10]],
                          [['chr2', 1, 10]])
         result = self.setA.intersect(self.setB)
+        print(self.setA.sequences)
+        print(self.setB.sequences)
+        print(result.sequences)
+        print(self.setA.sequences[0] < self.setB.sequences[0])
         self.assertEqual(len(result), 0)
 
         result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)