From 4dbe694b044d6634de500a488846f0a03a8f235f Mon Sep 17 00:00:00 2001 From: Robert McArthur Date: Fri, 13 Dec 2024 04:38:12 +0000 Subject: [PATCH 1/7] DOC: add docstring for `make_model` --- src/piqtree/model/_model.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/piqtree/model/_model.py b/src/piqtree/model/_model.py index d209ad5..d75b02b 100644 --- a/src/piqtree/model/_model.py +++ b/src/piqtree/model/_model.py @@ -92,6 +92,18 @@ def invariant_sites(self) -> bool: def make_model(iqtree_str: str) -> Model: + """Convert an IQ-TREE model specification into a Model class. + + Parameters + ---------- + iqtree_str : str + The IQ-TREE model string. + + Returns + ------- + Model + The equivalent Model class. + """ if "+" not in iqtree_str: return Model(iqtree_str) From 30905e2fd060814444b603b679182607fab5e7f3 Mon Sep 17 00:00:00 2001 From: Robert McArthur Date: Fri, 13 Dec 2024 04:49:02 +0000 Subject: [PATCH 2/7] DOC: add docstrings for model_finder and result --- src/piqtree/iqtree/_model_finder.py | 46 +++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/piqtree/iqtree/_model_finder.py b/src/piqtree/iqtree/_model_finder.py index 20a25e4..f21b0e7 100644 --- a/src/piqtree/iqtree/_model_finder.py +++ b/src/piqtree/iqtree/_model_finder.py @@ -46,6 +46,24 @@ def from_string(cls, val: str) -> "ModelResultValue": @dataclasses.dataclass(slots=True) class ModelFinderResult: + """Data returned by ModelFinder. + + Attributes + ---------- + source: str + Source of the alignment. + raw_data: dict[str, Any] + Raw data returned by ModelFinder. + best_aic: Model + The best AIC model. + best_aicc: Model + The best AICc model. + best_bic: Model + The best BIC model. + model_stats: + Semi-processed representation of raw_data. + """ + source: str raw_data: dataclasses.InitVar[dict[str, Any]] best_aic: Model = dataclasses.field(init=False) @@ -104,6 +122,34 @@ def model_finder( rand_seed: int | None = None, num_threads: int | None = None, ) -> ModelFinderResult | c3_types.SerialisableType: + """Find the models of best fit for an alignment. + + _extended_summary_ + + Parameters + ---------- + aln : c3_types.AlignedSeqsType + The alignment to find the model of best fit for. + model_set : Iterable[str] | None, optional + Search space for models. + Equivalent to IQ-TREE's mset parameter, by default None + freq_set : Iterable[str] | None, optional + Search space for frequency types. + Equivalent to IQ-TREE's mfreq parameter, by default None + rate_set : Iterable[str] | None, optional + Search space for rate heterogeneity types. + Equivalent to IQ-TREE's mrate parameter, by default None + rand_seed : int | None, optional + The random seed - 0 or None means no seed, by default None. + num_threads: int | None, optional + Number of threads for IQ-TREE 2 to use, by default None (single-threaded). + If 0 is specified, IQ-TREE attempts to find the optimal number of threads. + + Returns + ------- + ModelFinderResult | c3_types.SerialisableType + Collection of data returned from IQ-TREE's ModelFinder. + """ source = aln.info.source if rand_seed is None: rand_seed = 0 # The default rand_seed in IQ-TREE From bcf09317ea6911d1eec74775643ea2b0e0d34d8d Mon Sep 17 00:00:00 2001 From: Robert McArthur Date: Fri, 13 Dec 2024 04:52:50 +0000 Subject: [PATCH 3/7] DOC: fix template doc in `model_finder` and enhance description --- src/piqtree/iqtree/_model_finder.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/piqtree/iqtree/_model_finder.py b/src/piqtree/iqtree/_model_finder.py index f21b0e7..72ffbd0 100644 --- a/src/piqtree/iqtree/_model_finder.py +++ b/src/piqtree/iqtree/_model_finder.py @@ -122,9 +122,7 @@ def model_finder( rand_seed: int | None = None, num_threads: int | None = None, ) -> ModelFinderResult | c3_types.SerialisableType: - """Find the models of best fit for an alignment. - - _extended_summary_ + """Find the models of best fit for an alignment using ModelFinder. Parameters ---------- From 0e405f207588b5b1b7418b41eda7ee304a9ae059 Mon Sep 17 00:00:00 2001 From: Robert McArthur Date: Fri, 13 Dec 2024 04:53:29 +0000 Subject: [PATCH 4/7] DOC: add `ModelFinderResult` to api --- docs/api/index.md | 1 + docs/api/model/ModelFinderResult.md | 3 +++ src/piqtree/__init__.py | 2 ++ 3 files changed, 6 insertions(+) create mode 100644 docs/api/model/ModelFinderResult.md diff --git a/docs/api/index.md b/docs/api/index.md index 25cb75a..28e0f71 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -14,6 +14,7 @@ | Name | Summary | |------|---------| | [model_finder](model/model_finder.md) | Determine the best-fit model for your data. | +| [ModelFinderResult](model/ModelFinderResult.md) | Collection of data returned by IQ-TREE's ModelFinder. | | [Model](model/Model.md) | Class for substitution models. | | [SubstitutionModel](model/SubstitutionModel.md) | Enums for substitution models. | | [FreqType](model/FreqType.md) | Enum for base frequencies. | diff --git a/docs/api/model/ModelFinderResult.md b/docs/api/model/ModelFinderResult.md new file mode 100644 index 0000000..99f6b24 --- /dev/null +++ b/docs/api/model/ModelFinderResult.md @@ -0,0 +1,3 @@ +# ModelFinderResult + +::: piqtree.ModelFinderResult diff --git a/src/piqtree/__init__.py b/src/piqtree/__init__.py index e3afee7..9905f7a 100644 --- a/src/piqtree/__init__.py +++ b/src/piqtree/__init__.py @@ -4,6 +4,7 @@ from piqtree._data import dataset_names, download_dataset from piqtree.iqtree import ( + ModelFinderResult, TreeGenMode, build_tree, fit_tree, @@ -25,6 +26,7 @@ __all__ = [ "Model", + "ModelFinderResult", "TreeGenMode", "__iqtree_version__", "available_freq_type", From 838a371f7de8331c38a44b00baacec16905f1a5a Mon Sep 17 00:00:00 2001 From: Robert McArthur Date: Fri, 13 Dec 2024 04:56:10 +0000 Subject: [PATCH 5/7] DOC: add make_model to API --- docs/api/index.md | 1 + docs/api/model/make_model.md | 7 +++++++ mkdocs.yml | 2 ++ 3 files changed, 10 insertions(+) create mode 100644 docs/api/model/make_model.md diff --git a/docs/api/index.md b/docs/api/index.md index 28e0f71..6e8356c 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -16,6 +16,7 @@ | [model_finder](model/model_finder.md) | Determine the best-fit model for your data. | | [ModelFinderResult](model/ModelFinderResult.md) | Collection of data returned by IQ-TREE's ModelFinder. | | [Model](model/Model.md) | Class for substitution models. | +| [make_model](model/make_model.md) | Function to construct Model classes from IQ-TREE strings. | | [SubstitutionModel](model/SubstitutionModel.md) | Enums for substitution models. | | [FreqType](model/FreqType.md) | Enum for base frequencies. | | [RateModel](model/RateModel.md) | Classes for rate heterogeneity. | diff --git a/docs/api/model/make_model.md b/docs/api/model/make_model.md new file mode 100644 index 0000000..4d3247b --- /dev/null +++ b/docs/api/model/make_model.md @@ -0,0 +1,7 @@ +# make_model + +::: piqtree.make_model + +## Usage + +For usage, see ["Use different kinds of substitution models"](../../quickstart/using_substitution_models.md). diff --git a/mkdocs.yml b/mkdocs.yml index 6f166a8..079f3fc 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -61,7 +61,9 @@ nav: - api/tree/random_trees.md - Substitution Models: - api/model/model_finder.md + - api/model/ModelFinderResult.md - api/model/Model.md + - api/model/make_model.md - api/model/SubstitutionModel.md - api/model/FreqType.md - api/model/RateModel.md From 79a0d397d2ca97a6d8bad7d7eef76391f5c6c1b9 Mon Sep 17 00:00:00 2001 From: Robert McArthur Date: Fri, 13 Dec 2024 05:00:02 +0000 Subject: [PATCH 6/7] DOC: add quickstart for `make_model` --- docs/quickstart/using_substitution_models.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/quickstart/using_substitution_models.md b/docs/quickstart/using_substitution_models.md index 7c5adaa..465b682 100644 --- a/docs/quickstart/using_substitution_models.md +++ b/docs/quickstart/using_substitution_models.md @@ -107,6 +107,16 @@ sym_discrete_gamma_4 = Model("SYM", rate_model=FreeRateModel()) sym_invar_discrete_gamma_8 = Model("SYM", rate_model=FreeRateModel(8), invariant_sites=True) ``` +### Making Model Classes from IQ-TREE Strings + +For the supported model types, the Model class can be created by using [`make_model`](../api/model/make_model.md) on the IQ-TREE string representation of the model. + +```python +from piqtree import make_model + +model = make_model("GTR+FQ+I+R3") +``` + ## See also - Use a [`Model`](../api/model/Model.md) to construct a maximum likelihood tree: ["Construct a maximum likelihood phylogenetic tree"](construct_ml_tree.md). From fa92e9fd4ef48827f60336520ed661785008cc1c Mon Sep 17 00:00:00 2001 From: Robert McArthur Date: Fri, 13 Dec 2024 05:18:39 +0000 Subject: [PATCH 7/7] DOC: add quickstart for model_finder --- docs/api/model/ModelFinderResult.md | 4 ++ docs/api/model/model_finder.md | 4 ++ docs/quickstart/construct_ml_tree.md | 1 + docs/quickstart/using_model_finder.md | 85 ++++++++++++++++++++++++++- 4 files changed, 93 insertions(+), 1 deletion(-) diff --git a/docs/api/model/ModelFinderResult.md b/docs/api/model/ModelFinderResult.md index 99f6b24..9a61b38 100644 --- a/docs/api/model/ModelFinderResult.md +++ b/docs/api/model/ModelFinderResult.md @@ -1,3 +1,7 @@ # ModelFinderResult ::: piqtree.ModelFinderResult + +## Usage + +For usage, see ["Find the model of best fit with ModelFinder"](using_model_finder.md). diff --git a/docs/api/model/model_finder.md b/docs/api/model/model_finder.md index 77770e7..7f31db7 100644 --- a/docs/api/model/model_finder.md +++ b/docs/api/model/model_finder.md @@ -1,3 +1,7 @@ # model_finder ::: piqtree.model_finder + +## Usage + +For usage, see ["Find the model of best fit with ModelFinder"](using_model_finder.md). diff --git a/docs/quickstart/construct_ml_tree.md b/docs/quickstart/construct_ml_tree.md index 62cff23..e43a133 100644 --- a/docs/quickstart/construct_ml_tree.md +++ b/docs/quickstart/construct_ml_tree.md @@ -72,4 +72,5 @@ tree = build_tree(aln, model, num_threads=4) ## See also - For how to specify a `Model`, see ["Use different kinds of substitution models"](using_substitution_models.md). +- For selecting the best `Model`, see ["Find the model of best fit with ModelFinder"](using_model_finder.md). - For fitting branch lengths to a tree topology see ["Fit branch lengths to a tree topology from an alignment"](fit_tree_topology.md). diff --git a/docs/quickstart/using_model_finder.md b/docs/quickstart/using_model_finder.md index 9516fd4..3c0de1f 100644 --- a/docs/quickstart/using_model_finder.md +++ b/docs/quickstart/using_model_finder.md @@ -1,3 +1,86 @@ # Find the model of best fit with ModelFinder -⚠️ This page is under construction ⚠️ +IQ-TREE's ModelFinder can be used to automatically find the model of best fit for an alignment using [`model_finder`](../api/model/model_finder.md). +The best scoring model under either **the *Akaike information criterion* (AIC), *corrected Akaike information criterion* (AICc), or the *Bayesian information criterion* (BIC) can be selected. + +## Usage + +### Basic Usage + +Construct a `cogent3` alignment object, then construct a maximum-likelihood tree. + +```python +from cogent3 import load_aligned_seqs +from piqtree import model_finder + +aln = load_aligned_seqs("my_alignment.fasta", moltype="dna") + +result = model_finder(aln) + +best_aic_model = result.best_aic +best_aicc_model = result.best_aicc +best_bic_model = result.best_bic +``` + +### Specifying the Search Space + +We expose the `mset`, `mfreq` and `mrate` parameters from IQ-TREE's ModelFinder to specify the substitution model search space, base frequency search space, and rate heterogeneity search space respectively. They can be specified as a set of strings in either `model_set`, `freq_set` or `rate_set` respectively. + +```python +from cogent3 import load_aligned_seqs +from piqtree import model_finder + +aln = load_aligned_seqs("my_alignment.fasta", moltype="dna") + +result = model_finder(aln, model_set={"HKY", "TIM"}) + +best_aic_model = result.best_aic +best_aicc_model = result.best_aicc +best_bic_model = result.best_bic +``` + +### Reproducible Results + +For reproducible results, a random seed may be specified. +> **Caution:** 0 and None are equivalent to no random seed being specified. + +```python +from cogent3 import load_aligned_seqs +from piqtree import model_finder + +aln = load_aligned_seqs("my_alignment.fasta", moltype="dna") + +result = model_finder(aln, rand_seed=5) + +best_aic_model = result.best_aic +best_aicc_model = result.best_aicc +best_bic_model = result.best_bic +``` + +### Multithreading + +To speed up computation, the number of threads to be used may be specified. +By default, the computation is done on a single thread. If 0 is specified, +then IQ-TREE attempts to determine the optimal number of threads. + +> **Caution:** If 0 is specified with small datasets, the time to determine the +> optimal number of threads may exceed the time to find the maximum likelihood +> tree. + +```python +from cogent3 import load_aligned_seqs +from piqtree import model_finder + +aln = load_aligned_seqs("my_alignment.fasta", moltype="dna") + +result = model_finder(aln, num_threads=4) + +best_aic_model = result.best_aic +best_aicc_model = result.best_aicc +best_bic_model = result.best_bic +``` + +## See also + +- For constructing a maximum likelihood tree, see ["Construct a maximum likelihood phylogenetic tree"](construct_ml_tree.md). +- For how to specify a `Model`, see ["Use different kinds of substitution models"](using_substitution_models.md).