From db892b0a750c15d8b9b50e34f0c7df526610aff8 Mon Sep 17 00:00:00 2001 From: "Documenter.jl" Date: Sat, 8 Jun 2024 21:52:59 +0000 Subject: [PATCH] build based on 9bd6c66 --- dev/404.html | 2 +- dev/api.html | 2 +- dev/article01.html | 2 +- dev/assets/{index.md.C2Xkp6cL.js => index.md.CX17Dqia.js} | 2 +- ...{index.md.C2Xkp6cL.lean.js => index.md.CX17Dqia.lean.js} | 2 +- dev/biomarkovchains.html | 2 +- dev/getstarted.html | 2 +- dev/hashmap.json | 2 +- dev/home.html | 2 +- dev/index.html | 6 +++--- 10 files changed, 12 insertions(+), 12 deletions(-) rename dev/assets/{index.md.C2Xkp6cL.js => index.md.CX17Dqia.js} (84%) rename dev/assets/{index.md.C2Xkp6cL.lean.js => index.md.CX17Dqia.lean.js} (87%) diff --git a/dev/404.html b/dev/404.html index 9800aaf..ab8feca 100644 --- a/dev/404.html +++ b/dev/404.html @@ -15,7 +15,7 @@
- + \ No newline at end of file diff --git a/dev/api.html b/dev/api.html index dd24a61..6d21e5f 100644 --- a/dev/api.html +++ b/dev/api.html @@ -75,7 +75,7 @@ 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0

source


- + \ No newline at end of file diff --git a/dev/article01.html b/dev/article01.html index cc6d38c..4dff6d2 100644 --- a/dev/article01.html +++ b/dev/article01.html @@ -36,7 +36,7 @@ 0.43478260869565216 0.34782608695652173 0.13043478260869565
- + \ No newline at end of file diff --git a/dev/assets/index.md.C2Xkp6cL.js b/dev/assets/index.md.CX17Dqia.js similarity index 84% rename from dev/assets/index.md.C2Xkp6cL.js rename to dev/assets/index.md.CX17Dqia.js index 8e63a95..dff10aa 100644 --- a/dev/assets/index.md.C2Xkp6cL.js +++ b/dev/assets/index.md.CX17Dqia.js @@ -1 +1 @@ -import{_ as e,c as a,o as t,a6 as i}from"./chunks/framework.Cvv1S4VC.js";const g=JSON.parse('{"title":"","description":"","frontmatter":{"layout":"home","hero":{"name":"BioMarkovChains.jl","text":"Representing biological sequences as Markov chains","tagline":null,"image":{"src":"/logo.png","alt":"BioMarkovChains"},"actions":[{"theme":"brand","text":"Get Started","link":"/getstarted.md"},{"theme":"alt","text":"View on Github","link":"https://github.com/camilogarciabotero/BioMarkovChains.jl"}]}},"headers":[],"relativePath":"index.md","filePath":"index.md","lastUpdated":null}'),s={name:"index.md"},n=i('

Install BioMarkovChains from the julia REPL. Press ] to enter pkg mode, and enter the add BioMarkovChains command.

julia
(@v1.10) pkg> add BioMarkovChains

For more information, see the Get Started section.

',3),o=[n];function d(l,r,h,c,p,k){return t(),a("div",null,o)}const m=e(s,[["render",d]]);export{g as __pageData,m as default}; +import{_ as e,c as a,o as t,a6 as i}from"./chunks/framework.Cvv1S4VC.js";const _=JSON.parse('{"title":"","description":"","frontmatter":{"layout":"home","hero":{"name":"BioMarkovChains.jl","text":"Representing biological sequences as Markov chains","tagline":null,"image":{"src":"/logo.png","alt":"BioMarkovChains"},"actions":[{"theme":"brand","text":"Get Started","link":"/getstarted.md"},{"theme":"alt","text":"View on Github","link":"https://github.com/camilogarciabotero/BioMarkovChains.jl"}]}},"headers":[],"relativePath":"index.md","filePath":"index.md","lastUpdated":null}'),s={name:"index.md"},n=i('

Install BioMarkovChains from the julia REPL. Press ] to enter pkg mode, and enter the add BioMarkovChains command.

julia
(@v1.10) pkg> add BioMarkovChains

For more information, see the Get Started section.

',3),o=[n];function d(l,r,h,c,p,k){return t(),a("div",null,o)}const m=e(s,[["render",d]]);export{_ as __pageData,m as default}; diff --git a/dev/assets/index.md.C2Xkp6cL.lean.js b/dev/assets/index.md.CX17Dqia.lean.js similarity index 87% rename from dev/assets/index.md.C2Xkp6cL.lean.js rename to dev/assets/index.md.CX17Dqia.lean.js index 381b6d3..7ec2c39 100644 --- a/dev/assets/index.md.C2Xkp6cL.lean.js +++ b/dev/assets/index.md.CX17Dqia.lean.js @@ -1 +1 @@ -import{_ as e,c as a,o as t,a6 as i}from"./chunks/framework.Cvv1S4VC.js";const g=JSON.parse('{"title":"","description":"","frontmatter":{"layout":"home","hero":{"name":"BioMarkovChains.jl","text":"Representing biological sequences as Markov chains","tagline":null,"image":{"src":"/logo.png","alt":"BioMarkovChains"},"actions":[{"theme":"brand","text":"Get Started","link":"/getstarted.md"},{"theme":"alt","text":"View on Github","link":"https://github.com/camilogarciabotero/BioMarkovChains.jl"}]}},"headers":[],"relativePath":"index.md","filePath":"index.md","lastUpdated":null}'),s={name:"index.md"},n=i("",3),o=[n];function d(l,r,h,c,p,k){return t(),a("div",null,o)}const m=e(s,[["render",d]]);export{g as __pageData,m as default}; +import{_ as e,c as a,o as t,a6 as i}from"./chunks/framework.Cvv1S4VC.js";const _=JSON.parse('{"title":"","description":"","frontmatter":{"layout":"home","hero":{"name":"BioMarkovChains.jl","text":"Representing biological sequences as Markov chains","tagline":null,"image":{"src":"/logo.png","alt":"BioMarkovChains"},"actions":[{"theme":"brand","text":"Get Started","link":"/getstarted.md"},{"theme":"alt","text":"View on Github","link":"https://github.com/camilogarciabotero/BioMarkovChains.jl"}]}},"headers":[],"relativePath":"index.md","filePath":"index.md","lastUpdated":null}'),s={name:"index.md"},n=i("",3),o=[n];function d(l,r,h,c,p,k){return t(),a("div",null,o)}const m=e(s,[["render",d]]);export{_ as __pageData,m as default}; diff --git a/dev/biomarkovchains.html b/dev/biomarkovchains.html index 05ccd6d..330a841 100644 --- a/dev/biomarkovchains.html +++ b/dev/biomarkovchains.html @@ -18,7 +18,7 @@
Skip to content

Towards Markov Chains

DNA as a Markov chain

Several packages (e.g. MarkovChainsHammer.jl, DiscreteMarkovChains.jl, etc.) in the Julia ecosystem have been implemented to work with Markov chains with a state space of integers, those could be efficient in many ways, but they are clumsy to work with a specialized biological types as in the BioJulia ecosystem. Therefore, in the GeneFinder package we dedicated some implementations to work with BioSequence types so that we can expand the functionality in an efficient way (see complete API).

One important step towards many gene finding algorithms is to represent a DNA sequence as a Markov chain. In this representation a DNA sequence of a reduced alphabet A={A,C,G,T} is draw as a four-vertex graph, where each letter of A is a state (vertex) and the edges of the graph represent transitions from one nucleotide to another in a sequence (e.g. AT represent a single nucleotide to nucleotide transition). This is also considered more specifically as a Discrete Markov chain (Axelson-Fisk 2015). The complete set of transitions and states of a DNA sequence of alphabet A.

More formally a Markov chain is a random process where each state is a random variable Xt where tT is a discrete time in a finite sequence T and the probability to jump from one state into another is only dependent of the current state. Therefore a definition of this Markov property is given by:

P(Xt=j|Xt1=i)

where i,jA . This property led us to generalize a way to calculate the probability of a sequence T from a process (X1...XT) where each random variable is a nucleotide from A so that:

P(X1=i1,...,XT=iT)=P(X1=i1)t=2TP(Xt=it|Xt1=it1)

Note that previous equations has two terms, a initial probability P(X1=i1) and the the product of all transitions beginning at t=2. So, to calculate the initial probability distribution of each of the nucleotides of a string T with the alphabet 𝒜 we can first calculate the transition probability matrix M^ out of the frequency count of the transitions. In an alphabet 𝒜 we got 42 transitions of one order, that is the AA,AC,AG,... which coincides with the frequency of the dinucleotides in the sequence. So we can later in fact build a 4x4 matrix representing all the transitions. For instance in a DNA sequence T of 24 nucleotides:

CCTCCCGGACCCTGGGCTCGGGAC

We can calculate each frequency nucleotide to any other nucleotide m^ij=cijci where cij is the actual count of the dinucleotide, and therefore ci is the counts of the nucleotide i to any other nucleotide and build the transition probability matrix:

[ACGTA0.001.000.000.00C0.000.560.220.30G0.250.120.620.00T0.000.670.330.00]

It is noteworthy that initial probabilities can also be obtained from the counts of each nucleotide transitions cij over the total sum of the dinucleotide counts ck:

π^i=cikck

That way for the previous example example we can can calculate the initial probabilities π^=(0.08,0.43,0.34,0.13). Both set of probabilities composed a transition model that can be used to predict the probability of any DNA sequence using equation (2).

References

Axelson-Fisk, Marina. 2015. Comparative Gene Finding. Vol. 20. Computational Biology. London: Springer London. http://link.springer.com/10.1007/978-1-4471-6693-1.

- + \ No newline at end of file diff --git a/dev/getstarted.html b/dev/getstarted.html index 228b378..c0a77b1 100644 --- a/dev/getstarted.html +++ b/dev/getstarted.html @@ -36,7 +36,7 @@ 0.0833 0.375 0.3417 0.2 - Initial Probabilities -> Vector{Float64}(4 × 1): 0.087 0.4348 0.3478 0.1304 - + \ No newline at end of file diff --git a/dev/hashmap.json b/dev/hashmap.json index 3717b64..b22ec80 100644 --- a/dev/hashmap.json +++ b/dev/hashmap.json @@ -1 +1 @@ -{"article01.md":"BrGXw77Y","index.md":"C2Xkp6cL","getstarted.md":"CT3qMIGg","biomarkovchains.md":"0rg0z1ev","home.md":"DYCAAGxb","api.md":"BiqC9tVJ"} +{"article01.md":"BrGXw77Y","index.md":"CX17Dqia","home.md":"DYCAAGxb","biomarkovchains.md":"0rg0z1ev","getstarted.md":"CT3qMIGg","api.md":"BiqC9tVJ"} diff --git a/dev/home.html b/dev/home.html index 4b57c44..01dc1b3 100644 --- a/dev/home.html +++ b/dev/home.html @@ -18,7 +18,7 @@
Skip to content


Representing biological sequences as Markov chains


Overview

This package aim to represent BioSequences types as Markov chains to perform different operations and predictions

- + \ No newline at end of file diff --git a/dev/index.html b/dev/index.html index 73125a5..12030b5 100644 --- a/dev/index.html +++ b/dev/index.html @@ -12,13 +12,13 @@ - + -
Skip to content

BioMarkovChains.jl

Representing biological sequences as Markov chains

BioMarkovChains

Install BioMarkovChains from the julia REPL. Press ] to enter pkg mode, and enter the add BioMarkovChains command.

julia
(@v1.10) pkg> add BioMarkovChains

For more information, see the Get Started section.

- +
Skip to content

BioMarkovChains.jl

Representing biological sequences as Markov chains

BioMarkovChains

Install BioMarkovChains from the julia REPL. Press ] to enter pkg mode, and enter the add BioMarkovChains command.

julia
(@v1.10) pkg> add BioMarkovChains

For more information, see the Get Started section.

+ \ No newline at end of file