diff --git a/docs/source/genomic_variants.rst b/docs/source/genomic_variants.rst new file mode 100644 index 0000000..47fe370 --- /dev/null +++ b/docs/source/genomic_variants.rst @@ -0,0 +1,11 @@ +Genomic Variants +======================= + +.. automodule:: genomkit.variants + :members: + +.. automodule:: genomkit.variants.gvariant + :members: + +.. automodule:: genomkit.variants.gvariants + :members: \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index a733098..40bb35c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -28,4 +28,5 @@ Contents genomic_regions genomic_annotation genomic_coverage + genomic_variants diff --git a/genomkit/__init__.py b/genomkit/__init__.py index c13d431..3b69a7a 100644 --- a/genomkit/__init__.py +++ b/genomkit/__init__.py @@ -5,5 +5,7 @@ from .annotation.gannotation import GAnnotation from .alignments.galignments import GAlignments from .coverages.gcoverages import GCoverage +from .variants.gvariant import GVariant +from .variants.gvariants import GVariants __version__ = "0.2.4" diff --git a/genomkit/variants/__init__.py b/genomkit/variants/__init__.py new file mode 100644 index 0000000..94654e5 --- /dev/null +++ b/genomkit/variants/__init__.py @@ -0,0 +1,9 @@ +""" +Genomic Variants Modules + +These modules contain functions and classes for working with genomic variants. +It provides utilities for handling and analyzing genomic variants. + +- **GVariants** is a collection of many genomic variants. +- **GVariantsSet** is a set of many GVariants which represent different genomic variants. +""" \ No newline at end of file diff --git a/genomkit/variants/gvariant.py b/genomkit/variants/gvariant.py new file mode 100644 index 0000000..dbfe5d6 --- /dev/null +++ b/genomkit/variants/gvariant.py @@ -0,0 +1,21 @@ +class GVariant: + __slot__ = ["chrom", "pos", "vid", "ref", "alt", "qual", "filter_status", + "info"] + + def __init__(self, chrom: str, pos: int, vid: str, ref: str, alt: str, + qual: int, filter_status: str, info: str = []): + self.chrom = chrom + self.pos = pos + self.vid = vid + self.ref = ref + self.alt = alt + self.qual = qual + self.filter_status = filter_status + self.info = info + + def to_GRegion(self): + from genomkit import GRegion + region = GRegion(sequence=self.chrom, start=self.pos, end=self.pos, + name=self.vid, data=[self.ref, self.alt, self.qual, + self.filter_status] + self.info) + return region diff --git a/genomkit/variants/gvariants.py b/genomkit/variants/gvariants.py new file mode 100644 index 0000000..b894b97 --- /dev/null +++ b/genomkit/variants/gvariants.py @@ -0,0 +1,63 @@ +from genomkit import GVariant + + +########################################################################### +# GVariants +########################################################################### +class GVariants: + """ + GVariants module + + This module contains functions and classes for working with a collection of + genomic variants. It provides utilities for handling and analyzing the + interactions of many genomic variants. + """ + def __init__(self, name: str = "", load: str = ""): + """Initiate GVariants object. + + :param name: Define the name, defaults to "" + :type name: str, optional + :param load: Define the filename for loading a VCF file, defaults to "" + :type load: str, optional + """ + self.name = name + self.variants = [] + if load: + self.load(filename=load) + + def load(self, filename): + """Load variants from a VCF file. + + :param filename: Define the filename for loading a VCF file + :type filename: str + """ + with open(filename, 'r') as vcf_file: + for line in vcf_file: + if line.startswith('#'): + continue # Skip header lines + fields = line.strip().split('\t') + chrom = fields[0] + pos = int(fields[1]) + vid = fields[2] + ref = fields[3] + alt = fields[4] + qual = fields[5] + filter_status = fields[6] + info = fields[7:] + variant = GVariant(chrom=chrom, pos=pos, vid=vid, ref=ref, + alt=alt, qual=qual, + filter_status=filter_status, + info=info) + self.variants.append(variant) + + def to_GRegions(self): + """Convert the GVariants to a GRegions object. + + :return: A GRegions object + :rtype: GRegions + """ + from genomkit import GRegions + regions = GRegions(name=self.name) + for variant in self.variants: + regions.add(variant.to_GRegion()) + return regions