-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5291957
commit 81066e9
Showing
6 changed files
with
107 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
Genomic Variants | ||
======================= | ||
|
||
.. automodule:: genomkit.variants | ||
:members: | ||
|
||
.. automodule:: genomkit.variants.gvariant | ||
:members: | ||
|
||
.. automodule:: genomkit.variants.gvariants | ||
:members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,4 +28,5 @@ Contents | |
genomic_regions | ||
genomic_annotation | ||
genomic_coverage | ||
genomic_variants | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
""" | ||
Genomic Variants Modules | ||
These modules contain functions and classes for working with genomic variants. | ||
It provides utilities for handling and analyzing genomic variants. | ||
- **GVariants** is a collection of many genomic variants. | ||
- **GVariantsSet** is a set of many GVariants which represent different genomic variants. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
class GVariant: | ||
__slot__ = ["chrom", "pos", "vid", "ref", "alt", "qual", "filter_status", | ||
"info"] | ||
|
||
def __init__(self, chrom: str, pos: int, vid: str, ref: str, alt: str, | ||
qual: int, filter_status: str, info: str = []): | ||
self.chrom = chrom | ||
self.pos = pos | ||
self.vid = vid | ||
self.ref = ref | ||
self.alt = alt | ||
self.qual = qual | ||
self.filter_status = filter_status | ||
self.info = info | ||
|
||
def to_GRegion(self): | ||
from genomkit import GRegion | ||
region = GRegion(sequence=self.chrom, start=self.pos, end=self.pos, | ||
name=self.vid, data=[self.ref, self.alt, self.qual, | ||
self.filter_status] + self.info) | ||
return region |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
from genomkit import GVariant | ||
|
||
|
||
########################################################################### | ||
# GVariants | ||
########################################################################### | ||
class GVariants: | ||
""" | ||
GVariants module | ||
This module contains functions and classes for working with a collection of | ||
genomic variants. It provides utilities for handling and analyzing the | ||
interactions of many genomic variants. | ||
""" | ||
def __init__(self, name: str = "", load: str = ""): | ||
"""Initiate GVariants object. | ||
:param name: Define the name, defaults to "" | ||
:type name: str, optional | ||
:param load: Define the filename for loading a VCF file, defaults to "" | ||
:type load: str, optional | ||
""" | ||
self.name = name | ||
self.variants = [] | ||
if load: | ||
self.load(filename=load) | ||
|
||
def load(self, filename): | ||
"""Load variants from a VCF file. | ||
:param filename: Define the filename for loading a VCF file | ||
:type filename: str | ||
""" | ||
with open(filename, 'r') as vcf_file: | ||
for line in vcf_file: | ||
if line.startswith('#'): | ||
continue # Skip header lines | ||
fields = line.strip().split('\t') | ||
chrom = fields[0] | ||
pos = int(fields[1]) | ||
vid = fields[2] | ||
ref = fields[3] | ||
alt = fields[4] | ||
qual = fields[5] | ||
filter_status = fields[6] | ||
info = fields[7:] | ||
variant = GVariant(chrom=chrom, pos=pos, vid=vid, ref=ref, | ||
alt=alt, qual=qual, | ||
filter_status=filter_status, | ||
info=info) | ||
self.variants.append(variant) | ||
|
||
def to_GRegions(self): | ||
"""Convert the GVariants to a GRegions object. | ||
:return: A GRegions object | ||
:rtype: GRegions | ||
""" | ||
from genomkit import GRegions | ||
regions = GRegions(name=self.name) | ||
for variant in self.variants: | ||
regions.add(variant.to_GRegion()) | ||
return regions |