Skip to content

Commit

Permalink
more python fixes and convertions
Browse files Browse the repository at this point in the history
  • Loading branch information
fabio-t committed Oct 29, 2019
1 parent cf879dd commit 220d50c
Show file tree
Hide file tree
Showing 10 changed files with 65 additions and 82 deletions.
18 changes: 7 additions & 11 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,7 @@ matrix:
- os: linux
dist: xenial
language: python
python: "2.7"
- os: linux
dist: xenial
language: python
python: "3.7"
python: "3.6"
- os: osx
language: generic
addons:
Expand All @@ -19,14 +15,14 @@ matrix:
- llvm
- boost
allow_failures:
- python: "3.7"
- python: "3.6"
before_install:
- pip install --upgrade pip
- pip3 install --upgrade pip
install:
- pip install cython
- pip install numpy scipy
- pip install coveralls
- pip install .
- pip3 install cython
- pip3 install numpy scipy
- pip3 install coveralls
- pip3 install .
# command to run tests
script:
- coverage run --source ./ -m unittest discover unittest/ -p "*" -v
Expand Down
39 changes: 15 additions & 24 deletions rgt/GenomicRegion.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,21 @@ def __eq__(self, other):
return (self.chrom, self.initial, self.final, self.orientation) == \
(other.chrom, other.initial, other.final, other.orientation)

def __ne__(self, other):
return not (self == other)

def __lt__(self, other):
return (self.chrom, self.initial, self.final) < (other.chrom, other.initial, other.final)

def __le__(self, other):
return (self.chrom, self.initial, self.final) <= (other.chrom, other.initial, other.final)

def __gt__(self, other):
return (self.chrom, self.initial, self.final) > (other.chrom, other.initial, other.final)

def __ge__(self, other):
return (self.chrom, self.initial, self.final) >= (other.chrom, other.initial, other.final)

def toString(self, space=False, underline=False, strand=False):
"""Return a string of GenomicRegion by its position.
Expand Down Expand Up @@ -173,30 +188,6 @@ def __repr__(self):
"""Return official representation of GenomicRegion."""
return ','.join([self.chrom, str(self.initial), str(self.final)])

def __cmp__(self, region):
"""Return negative value if x < y, zero if x == y and strictly positive if x > y.
*Keyword arguments:*
- region -- Given GenomicRegion to compare.
"""
if self.chrom < region.chrom:
return -1
elif self.chrom > region.chrom:
return 1
else:
if self.initial < region.initial:
return -1
elif self.initial > region.initial:
return 1
else:
if self.final < region.final:
return -1
elif self.final > region.final:
return 1
else:
return 0

def extract_blocks(self, keep_name=False):
"""Extract the block information in self.data into a GenomicRegionSet."""
z = []
Expand Down
65 changes: 32 additions & 33 deletions rgt/GenomicRegionSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ def sort(self, key=None, reverse=False):
if key:
self.sequences.sort(key=key, reverse=reverse)
else:
self.sequences.sort(cmp=GenomicRegion.__cmp__)
self.sequences.sort()
self.sorted = True

def get_sequences(self, genome_fasta, ex=0):
Expand Down Expand Up @@ -870,22 +870,22 @@ def intersect_c(self, y, mode=OverlapType.OVERLAP, rm_duplicates=False):
lib = cdll.LoadLibrary(Lib.get_c_rgt())
# C-Binding of intersect overlap function
intersect_overlap_c = lib.intersectGenomicRegionSetsOverlap
intersect_overlap_c.argtypes = [POINTER(c_char_p), POINTER(c_int), POINTER(c_int), c_int, POINTER(c_char_p),
intersect_overlap_c.argtypes = [POINTER(c_wchar_p), POINTER(c_int), POINTER(c_int), c_int, POINTER(c_wchar_p),
POINTER(c_int), POINTER(c_int), c_int, POINTER(POINTER(c_int)),
POINTER(POINTER(c_int)), POINTER(POINTER(c_int)), POINTER(c_int)]
intersect_overlap_c.restype = None

# C-Binding of intersect original function
intersect_original_c = lib.intersectGenomicRegionSetsOriginal
intersect_original_c.argtypes = [POINTER(c_char_p), POINTER(c_int), POINTER(c_int), c_int, POINTER(c_char_p),
intersect_original_c.argtypes = [POINTER(c_wchar_p), POINTER(c_int), POINTER(c_int), c_int, POINTER(c_wchar_p),
POINTER(c_int), POINTER(c_int), c_int, POINTER(POINTER(c_int)),
POINTER(POINTER(c_int)), POINTER(POINTER(c_int)), POINTER(c_int)]
intersect_original_c.restype = None

# C-Binding of intersect completely function
intersect_completely_included_c = lib.intersectGenomicRegionSetsCompletelyIncluded
intersect_completely_included_c.argtypes = [POINTER(c_char_p), POINTER(c_int), POINTER(c_int), c_int,
POINTER(c_char_p), POINTER(c_int), POINTER(c_int), c_int,
intersect_completely_included_c.argtypes = [POINTER(c_wchar_p), POINTER(c_int), POINTER(c_int), c_int,
POINTER(c_wchar_p), POINTER(c_int), POINTER(c_int), c_int,
POINTER(POINTER(c_int)), POINTER(POINTER(c_int)),
POINTER(POINTER(c_int)), POINTER(c_int)]
intersect_completely_included_c.restype = None
Expand Down Expand Up @@ -918,10 +918,10 @@ def intersect_c(self, y, mode=OverlapType.OVERLAP, rm_duplicates=False):
max_len_result = len_self + len_y

chromosomes_self_python = [gr.chrom for gr in a.sequences]
chromosomes_self_c = (c_char_p * len_self)(*chromosomes_self_python)
chromosomes_self_c = (c_wchar_p * len_self)(*chromosomes_self_python)

chromosomes_y_python = [gr.chrom for gr in b.sequences]
chromosomes_y_c = (c_char_p * len_y)(*chromosomes_y_python)
chromosomes_y_c = (c_wchar_p * len_y)(*chromosomes_y_python)

initials_self_python = [gr.initial for gr in a.sequences]
initials_self_c = (c_int * len_self)(*initials_self_python)
Expand Down Expand Up @@ -954,7 +954,6 @@ def intersect_c(self, y, mode=OverlapType.OVERLAP, rm_duplicates=False):
chromosomes_y_c, initials_y_c, finals_y_c, len_y, pointer(indices_c),
pointer(initials_result_c), pointer(finals_result_c),
byref(size_result_c))

# Construct result set
for i in range(size_result_c.value):
ci = indices_c[i]
Expand All @@ -976,7 +975,7 @@ def intersect_count(self, regionset, mode_count="count", threshold=False):
- threshold -- Define the cutoff of the proportion of the intersecting region (0~50%)
*Return:*
- A tupple of numbers: (A-B, B-A, intersection)
"""

Expand Down Expand Up @@ -1009,15 +1008,15 @@ def intersect_count(self, regionset, mode_count="count", threshold=False):
return len_12, len_21, len_inter

def closest(self, y, max_dis=10000, return_list=False, top_N=None):
"""Return a new GenomicRegionSet including the region(s) of y which is closest to any self region.
"""Return a new GenomicRegionSet including the region(s) of y which is closest to any self region.
If there are intersection, return False.
*Keyword arguments:*
- y -- the GenomicRegionSet which to compare with
- max_dis -- maximum distance (default=10000 bp)
- return_list -- return a list of the distances
- top_N -- return a dictionary with region names as keys and the GenomicRegionSet containing N clostest regions as values.
- top_N -- return a dictionary with region names as keys and the GenomicRegionSet containing N clostest regions as values.
*Return:*
Expand Down Expand Up @@ -1115,12 +1114,12 @@ def window(self, y, adding_length=1000):
"""Return the overlapping regions of self and y with adding a specified number (1000, by default) of base pairs
upstream and downstream of each region in self. In effect, this allows regions in y that are near regions
in self to be detected.
*Keyword arguments:*
- y -- the GenomicRegionSet which to compare with
- adding_length -- the length of base pairs added to upstream and downstream of self (default 1000)
*Return:*
- A GenomicRegionSet including the regions of overlapping between extended self and original y.
Expand All @@ -1135,7 +1134,7 @@ def window(self, y, adding_length=1000):

def subtract(self, y, whole_region=False, merge=True, exact=False):
"""Return a GenomicRegionSet excluded the overlapping regions with y.
*Keyword arguments:*
- y -- the GenomicRegionSet which to subtract by
Expand All @@ -1147,7 +1146,7 @@ def subtract(self, y, whole_region=False, merge=True, exact=False):
*Return:*
- A GenomicRegionSet which contains the remaining regions of self after subtraction
::
self ---------- ------
Expand Down Expand Up @@ -1259,7 +1258,7 @@ def subtract(self, y, whole_region=False, merge=True, exact=False):
elif len(self.sequences) == 1:
# GRS only contains 1 region, only check if this matches exactly with any region within y
for target_region in y.sequences:
if target_region.__cmp__(self.sequences[0]) == 0:
if target_region == self.sequences[0]:
return GenomicRegionSet("small_self") # return empty GRS
return self
else:
Expand Down Expand Up @@ -1391,15 +1390,15 @@ def subtract(self, y, whole_region=False, merge=True, exact=False):

def subtract_aregion(self, y):
"""Return a GenomicRegionSet excluded the overlapping regions with y.
*Keyword arguments:*
- y -- the GenomicRegion which to subtract by
*Return:*
- the remaining regions of self after subtraction
::
self ---------- ------
Expand Down Expand Up @@ -1537,21 +1536,21 @@ def combine(self, region_set, change_name=True, output=False):

def cluster(self, max_distance):
"""Cluster the regions with a certain distance and return the result as a new GenomicRegionSet.
*Keyword arguments:*
- max_distance -- the maximum distance between regions within the same cluster
*Return:*
- A GenomicRegionSet including clusters
::
self ---- ---- ----
---- ---- ----
Result(d=1) ------- --------- ---- ----
Result(d=10) ---------------------------------------------
Result(d=10) ---------------------------------------------
"""

if not self.sorted: self.sort()
Expand All @@ -1577,15 +1576,15 @@ def cluster(self, max_distance):

def flank(self, size):
"""Return two flanking intervals with given size from both ends of each region.
*Keyword arguments:*
- size -- the length of flanking intervals (default = SAME length as the region)
*Return:*
- z -- A GenomicRegionSet including all flanking intervals
::
self ----- -- ---
Expand All @@ -1608,15 +1607,15 @@ def flank(self, size):

def jaccard(self, query):
"""Return jaccard index, a value of similarity of these two GenomicRegionSet.
*Keyword arguments:*
- query -- the GenomicRegionSet which to compare with.
*Return:*
- similarity -- (Total length of overlapping regions)/(Total length of original regions)
::
self --8-- ---10--- -4-
Expand Down Expand Up @@ -1659,7 +1658,7 @@ def jaccard_c(self, query):
ctypes_jaccardC = lib.jaccard

# Specify data types
ctypes_jaccardC.argtypes = [POINTER(c_char_p), POINTER(c_int), POINTER(c_int), c_int, POINTER(c_char_p),
ctypes_jaccardC.argtypes = [POINTER(c_wchar_p), POINTER(c_int), POINTER(c_int), c_int, POINTER(c_wchar_p),
POINTER(c_int), POINTER(c_int), c_int]
ctypes_jaccardC.restype = c_double

Expand All @@ -1673,11 +1672,11 @@ def jaccard_c(self, query):

# Convert to ctypes
chroms_self_python = [gr.chrom for gr in self.sequences]
chroms_self_c = (c_char_p * len(chroms_self_python))(*chroms_self_python)
chroms_self_c = (c_wchar_p * len(chroms_self_python))(*chroms_self_python)
# print('Converted self.chroms to c', str(chroms_self_python[:4]), '...')

chroms_query_python = [gr.chrom for gr in query.sequences]
chroms_query_c = (c_char_p * len(chroms_query_python))(*chroms_query_python)
chroms_query_c = (c_wchar_p * len(chroms_query_python))(*chroms_query_python)
# print('Converted query.chroms to c', str(chroms_query_python[:4]), '...')

initials_self_python = [gr.initial for gr in self.sequences]
Expand Down
2 changes: 1 addition & 1 deletion rgt/GenomicVariantSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def sort(self):
.. note:: By default, the genomic position is used as sorting criteria.
"""
self.sequences.sort(cmp=GenomicVariant.__cmp__)
self.sequences.sort()
self.sorted = True

def read_vcf(self, vcf_path):
Expand Down
2 changes: 1 addition & 1 deletion rgt/tdf/BindingSiteSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def __init__(self, name):

def sort(self):
"""Sort Elements by criteria defined by a GenomicRegion."""
self.sequences.sort(cmp = GenomicRegion.__cmp__)
self.sequences.sort()
self.sorted = True

def get_bs(self, orientation):
Expand Down
4 changes: 2 additions & 2 deletions rgt/tdf/RNADNABindingSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,12 +193,12 @@ def get_dbs(self, sort=False, orientation=None, rm_duplicate=False, dbd_tag=Fals

def sort_rbs(self):
"""Sort the dictionary by RNA"""
self.sequences = sorted(self.sequences, key=lambda x: x.rna, cmp=GenomicRegion.__cmp__)
self.sequences = sorted(self.sequences, key=lambda x: x.rna)
self.sorted_rna = True

def sort_dbs(self):
"""Sort the dictionary by DNA"""
self.sequences = sorted(self.sequences, key=lambda x: x.dna, cmp=GenomicRegion.__cmp__)
self.sequences = sorted(self.sequences, key=lambda x: x.dna)
self.sorted_dna = True

def sort_dbs_by_regions(self, regionset):
Expand Down
4 changes: 1 addition & 3 deletions unittest/motifanalysis/test_Enrichment.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# Python 3 compatibility


# Python
import unittest
Expand All @@ -26,4 +24,4 @@ def test_subtract_exact(self):
self.assertEqual(len(background_tmp.sequences), len(reference.sequences))

for region, region_ref in zip(background_tmp.sequences, reference.sequences):
self.assertEqual(region.__cmp__(region_ref), 0)
self.assertEqual(region, region_ref)
2 changes: 0 additions & 2 deletions unittest/motifanalysis/test_Statistics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# Python 3 compatibility


# Python
import unittest
Expand Down
4 changes: 4 additions & 0 deletions unittest/test_GenomicRegionSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,10 @@ def test_intersect(self):
self.region_sets([['chr1', 1, 10]],
[['chr2', 1, 10]])
result = self.setA.intersect(self.setB)
print(self.setA.sequences)
print(self.setB.sequences)
print(result.sequences)
print(self.setA.sequences[0] < self.setB.sequences[0])
self.assertEqual(len(result), 0)

result = self.setA.intersect(self.setB, mode=OverlapType.ORIGINAL)
Expand Down
Loading

0 comments on commit 220d50c

Please sign in to comment.