Skip to content

Commit

Permalink
update GCoverageSet
Browse files Browse the repository at this point in the history
  • Loading branch information
chaochungkuo committed Mar 4, 2024
1 parent da1a92d commit af3c84a
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 34 deletions.
29 changes: 19 additions & 10 deletions genomkit/coverages/gcoverages.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pysam
from tqdm import tqdm
from genomkit import GRegions, GRegion
import os


class GCoverages:
Expand All @@ -24,13 +25,12 @@ def __init__(self, bin_size: int = 1, load: str = "", windows=None):
self.coverage = {}
self.bin_size = bin_size
if load.lower().endswith(".bw") or load.lower().endswith(".bigwig"):
self.load_coverage_from_bigwig(load)
self.load_coverage_from_bigwig(load, windows=windows)
elif load.lower().endswith(".bam"):
self.calculate_coverage_from_bam(load)
self.calculate_coverage_from_bam(load, windows=windows)
elif load.lower().endswith(".bed"):
from genomkit import GRegions
regions = GRegions(name=load, load=load)
self.calculate_coverage_GRegions(regions=regions,
self.calculate_coverage_GRegions(windows=windows,
scores=regions)

def load_coverage_from_bigwig(self, filename: str, windows=None):
Expand All @@ -50,23 +50,32 @@ def load_coverage_from_bigwig(self, filename: str, windows=None):
chrom_regions.add(GRegion(sequence=chrom,
start=0,
end=int(chrom_length)))
for r in chrom_regions:
for r in tqdm(chrom_regions,
desc=os.path.basename(filename),
total=len(chrom_regions)):
coverage = bw.values(r.sequence, r.start, r.end, numpy=True)
if self.bin_size > 1:
coverage = [np.mean(coverage[i:i+self.bin_size])
if i+self.bin_size <= len(coverage)
else np.mean(coverage[i:])
for i in range(0, len(coverage),
self.bin_size)]
self.coverage[r] = coverage
else:
# Get only the coverage on the defined regions
assert isinstance(windows, GRegions)
for window in windows:
self.coverage = {}
for window in tqdm(windows,
desc=os.path.basename(filename),
total=len(windows)):
coverage = bw.values(window.sequence,
window.start,
window.end,
numpy=True)
if self.bin_size > 1:
coverage = [np.mean(coverage[i:i+self.bin_size])
if i+self.bin_size <= len(coverage)
else np.mean(coverage[i:])
for i in range(0, len(coverage),
self.bin_size)]
self.coverage[window] = coverage
Expand All @@ -91,10 +100,10 @@ def calculate_coverage_from_bam(self, filename: str, windows=None):
self.coverage[r] = [0] * chrom_len
self.coverage[r][pileupcolumn.reference_pos] += 1
if windows:
for r in windows:
self.coverage[r] = [0] * len(r)
for pileupcolumn in bam.pileup(r.sequence, r.start, r.end):
self.coverage[r][pileupcolumn.reference_pos-r.start] += 1
for w in windows:
self.coverage[w] = [0] * len(w)
for pileupcolumn in bam.pileup(w.sequence, w.start, w.end):
self.coverage[w][pileupcolumn.reference_pos-w.start] += 1
bam.close()
# Adjust for bin size
for r in self.coverage.keys():
Expand Down
9 changes: 4 additions & 5 deletions genomkit/coverages/gcoverages_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class GCoveragesSet:
analyzing the interactions of many genomic coverages.
"""

def __init__(self, name: str = "", load_dict=None):
def __init__(self, name: str = "", load_dict=None, windows=None):
"""Initiate a GCoveragesSet object which can contain multiple
GCoverages.
Expand All @@ -27,8 +27,7 @@ def __init__(self, name: str = "", load_dict=None):
if load_dict:
for name, filename in load_dict.items():
self.add(name=name,
gcov=GCoverages(name=name,
load=filename))
gcov=GCoverages(windows=windows, load=filename))

def add(self, name, gcov):
"""Add a GCoverages into GCoveragesSet.
Expand Down Expand Up @@ -57,8 +56,8 @@ def __getattr__(self, key):
f" object has no attribute '{key}'"
)

def __setattr__(self, key, value):
self.collection[key] = value
# def __setattr__(self, key, value):
# self.collection[key] = value

def get_names(self):
"""Return the names of all GCoverages.
Expand Down
12 changes: 6 additions & 6 deletions genomkit/regions/gregion.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,25 +238,25 @@ def resize(self, extend_upstream: int, extend_downstream: int,
if center == "mid_point":
center = int(0.5*(self.end-self.start))
if self.orientation == "-":
start = center+extend_upstream
end = center-extend_downstream
start = center-extend_downstream
end = center+extend_upstream
else:
start = center-extend_upstream
end = center+extend_downstream
elif center == "5prime":
if self.orientation == "-":
center = self.end
start = center+extend_upstream
end = center-extend_downstream
start = center-extend_downstream
end = center+extend_upstream
else:
center = self.start
start = center-extend_upstream
end = center+extend_downstream
elif center == "3prime":
if self.orientation == "-":
center = self.start
start = center+extend_upstream
end = center-extend_downstream
start = center-extend_downstream
end = center+extend_upstream
else:
center = self.end
start = center-extend_upstream
Expand Down
16 changes: 8 additions & 8 deletions genomkit/regions/gregions_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@ def __len__(self):
"""
return len(self.collection)

def __getattr__(self, key):
if key in self.collection:
return self.collection[key]
else:
raise AttributeError(
f"'{self.collection.__class__.__name__}'"
f" object has no attribute '{key}'"
)
# def __getattr__(self, key):
# if key in self.collection:
# return self.collection[key]
# else:
# raise AttributeError(
# f"'{self.collection.__class__.__name__}'"
# f" object has no attribute '{key}'"
# )

def __setattr__(self, key, value):
self.collection[key] = value
Expand Down
Binary file added tests/test_files/bam/Col0_C1.100k.bam.bai
Binary file not shown.
41 changes: 36 additions & 5 deletions tests/test_gcoverages.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,33 @@
class TestGCoverages(unittest.TestCase):

def test_load_coverage_from_bigwig(self):
cov = GCoverages()
cov.load_coverage_from_bigwig(
filename=os.path.join(script_path,
"test_files/bigwig/test.bw")
)
cov = GCoverages(bin_size=1,
load=os.path.join(script_path,
"test_files/bigwig/test.bw"),
windows=None)
# cov.load_coverage_from_bigwig(filename="tests/test_files/bigwig/test.bw")
# {'1': 195471971, '10': 130694993}
self.assertEqual(len(cov.coverage), 2)
self.assertEqual(list(cov.coverage.keys())[0].sequence, "1")
self.assertEqual(list(cov.coverage.keys())[1].sequence, "10")
windows = GRegions(name="windows")
windows.add(GRegion(sequence="1", start=0, end=1000))
cov = GCoverages(bin_size=1,
load=os.path.join(script_path,
"test_files/bigwig/test.bw"),
windows=windows)
self.assertEqual(len(cov.coverage), 1)
self.assertEqual(list(cov.coverage.keys())[0].sequence, "1")
self.assertEqual(list(cov.coverage.keys())[0].end, 1000)
cov = GCoverages(bin_size=2,
load=os.path.join(script_path,
"test_files/bigwig/test.bw"),
windows=windows)
# cov.load_coverage_from_bigwig(filename="tests/test_files/bigwig/test.bw")
# {'1': 195471971, '10': 130694993}
self.assertEqual(len(cov.coverage), 1)
self.assertEqual(list(cov.coverage.keys())[0].sequence, "1")
self.assertEqual(list(cov.coverage.keys())[0].end, 1000)
cov = GCoverages()
cov.load_coverage_from_bigwig(
filename=os.path.join(script_path,
Expand All @@ -35,6 +52,20 @@ def test_calculate_coverage_from_bam(self):
# cov.calculate_coverage_from_bam(filename="tests/test_files/bam/Col0_C1.100k.bam")
# ['1']
self.assertEqual(len(cov.coverage.keys()), 1)
windows = GRegions(name="windows")
windows.add(GRegion(sequence="1", start=0, end=1000))
cov = GCoverages(bin_size=1,
load=os.path.join(script_path,
"test_files/bam/Col0_C1.100k.bam"),
windows=windows)
self.assertEqual(len(cov.coverage.keys()), 1)
self.assertEqual(len(cov.coverage[windows[0]]), 1000)
cov = GCoverages(bin_size=2,
load=os.path.join(script_path,
"test_files/bam/Col0_C1.100k.bam"),
windows=windows)
self.assertEqual(len(cov.coverage.keys()), 1)
self.assertEqual(len(cov.coverage[windows[0]]), 500)

def test_calculate_coverage_GRegions(self):
regions = GRegions(name="test",
Expand Down

0 comments on commit af3c84a

Please sign in to comment.