diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/404.html b/404.html new file mode 100644 index 0000000..7d38fb3 --- /dev/null +++ b/404.html @@ -0,0 +1,545 @@ + + + +
+ + + + + + + + + + + + + + +pypythia.custom_errors.PyPythiaException
+
+
+
+ Bases: Exception
pypythia/custom_errors.py
4 +5 |
|
pypythia.custom_errors.RAxMLNGError
+
+
+
+ Bases: Exception
Custom RAxML-NG Exception used when running RAxML-NG commands. +In case of a subprocess.CalledProcessError, the output of this Exception is either the entire RAxML-NG output, +or only the lines containing the cause for the RAxML-NG error if the RAxML-NG output contains +lines starting with "ERROR"
+ + + + + + +pypythia/custom_errors.py
8 + 9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 |
|
pypythia.custom_types.DataType
+
+
+
+ Bases: Enum
Data type for MSAs. +- DNA = DNA data +- AA = Protein data +- MORPH = morphological data
+ + + + + + +pypythia/custom_types.py
5 + 6 + 7 + 8 + 9 +10 +11 +12 +13 +14 |
|
pypythia.custom_types.FileFormat
+
+
+
+ Bases: Enum
File formats for MSAs +- PHYLIP = phylip-relaxed +- FASTA = fasta
+ + + + + + +pypythia/custom_types.py
17 +18 +19 +20 +21 +22 +23 +24 |
|
pypythia.logger.get_header()
+
+pypythia/logger.py
18 +19 +20 +21 +22 +23 +24 |
|
pypythia.logger.log_runtime_information(message, log_runtime=True)
+
+pypythia/logger.py
27 +28 +29 +30 +31 +32 +33 +34 |
|
pypythia.main.main()
+
+pypythia/main.py
154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 |
|
pypythia.msa.MSA
+
+
+pypythia/msa.py
134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 |
|
get_raxmlng_model()
+
+Returns a RAxML-NG model string based on the data type
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
model_string |
+ string
+ |
+
+
+
+ RAxML-NG model string +For DNA data: GTR+G +For Protein (AA) data: LG+G +For morphological data: MULTIx_GTR where x refers to the maximum state value in the alignment + |
+
pypythia/msa.py
224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 |
|
pypythia.msa.parse(msa_file, file_format=None, data_type=None)
+
+pypythia/msa.py
255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 |
|
pypythia.msa.remove_full_gap_sequences(msa, msa_name=None)
+
+pypythia/msa.py
285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 |
|
pypythia.msa.deduplicate_sequences(msa, msa_name=None)
+
+pypythia/msa.py
298 +299 +300 +301 +302 +303 +304 +305 +306 +307 |
|
pypythia.prediction.predict_difficulty(msa_file, model_file=DEFAULT_MODEL_FILE, raxmlng=DEFAULT_RAXMLNG_EXE, threads=None, seed=0, deduplicate=True, remove_full_gaps=True, reduced_msa_file=None)
+
+Predicts the difficulty of an MSA using the given difficulty predictor.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ msa_file
+ |
+
+ FilePath
+ |
+
+
+
+ Path to the MSA file the difficulty should be predicted for. The file must be either in "fasta" or "phylip" format. + |
+ + required + | +
+ model_file
+ |
+
+ FilePath
+ |
+
+
+
+ Path to a trained difficulty predictor. + |
+
+ DEFAULT_MODEL_FILE
+ |
+
+ raxmlng
+ |
+
+ Executable
+ |
+
+
+
+ Path to an executable of RAxML-NG. See https://github.com/amkozlov/raxml-ng for install instructions. + |
+
+ DEFAULT_RAXMLNG_EXE
+ |
+
+ threads
+ |
+
+ int
+ |
+
+
+
+ The number of threads to use for parallel parsimony tree inference. Uses the RAxML-NG auto parallelization scheme if none is set. + |
+
+ None
+ |
+
+ seed
+ |
+
+ int
+ |
+
+
+
+ Seed for the RAxML-NG parsimony tree inference. Default is 0. + |
+
+ 0
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
difficulty |
+ float
+ |
+
+
+
+ The predicted difficulty for the given MSA. + |
+
Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If the file format of the given MSA is not FASTA or PHYLIP. + |
+
+ ValueError
+ |
+
+
+
+ If the data type of the given MSA cannot be inferred. + |
+
+ PyPythiaException
+ |
+
+
+
+ If the provided difficulty predictor was trained with a subset incompatible to Pythia. + |
+
pypythia/prediction.py
16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 |
|
pypythia.prediction.collect_features(msa, msa_file, raxmlng, pars_trees_file=None, log_info=True, threads=None, seed=0)
+
+Helper function to collect all features required for predicting the difficulty of the MSA.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ msa
+ |
+
+ MSA
+ |
+
+
+
+ MSA object corresponding to the MSA file to compute the features for. + |
+ + required + | +
+ raxmlng
+ |
+
+ RAxMLNG
+ |
+
+
+
+ Initialized RAxMLNG object. + |
+ + required + | +
+ store_trees
+ |
+
+ bool
+ |
+
+
+
+ If True, store the inferred parsimony trees as "{msa_name}.parsimony.trees" file in the current workdir. + |
+ + required + | +
+ log_info
+ |
+
+ bool
+ |
+
+
+
+ If True, log intermediate progress information using the default logger. + |
+
+ True
+ |
+
+ threads
+ |
+
+ int
+ |
+
+
+
+ The number of threads to use for parallel parsimony tree inference. Uses the RAxML-NG auto parallelization scheme if none is set. + |
+
+ None
+ |
+
Returns: + all_features (Dict): Dictionary containing all features required for predicting the difficulty of the MSA. The keys correspond to the feature names the predictor was trained with.
+ +pypythia/prediction.py
71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 |
|
pypythia.predictor.DifficultyPredictor
+
+
+Class structure for the trained difficulty predictor.
+This class provides methods for predicting the difficulty of an MSA.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ predictor_file
+ |
+
+ file object
+ |
+
+
+
+ Open file handle for the trained predictor. We do not guarantee the functionality of this class +for predictors other than lightGBM Regressors and scikit-learn RandomForestRegressors + |
+ + required + | +
+ features
+ |
+
+ optional list[string]
+ |
+
+
+
+ Names of the features the passed predictor was trained with. +If you are using a LightGBM based predictor, the order of the features needs to be the same +order as the order the predictor was trained with! +If no list is passed and the predictor is either a lightGBM Regressor or scikit-learn RandomForestRegressor, +the features will be automatically determined. +For any other predictor type features cannot be None. + |
+
+ None
+ |
+
Attributes:
+Name | +Type | +Description | +
---|---|---|
predictor |
+ + | +
+
+
+ Loaded trained predictor. + |
+
features |
+ + | +
+
+
+ Names of the features the predictor was trained with. + |
+
pypythia/predictor.py
16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 |
|
predict(query)
+
+Predicts the difficulty for the given set of MSA features. +TODO: adjust documentation -> also allows batch prediction!
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ query
+ |
+
+ Dict
+ |
+
+
+
+ Dict containing the features of the MSA to predict the difficulty for. +query needs to contain at least the features the predictor was trained with. +You can check this using the DifficultyPredictor.features attribute + |
+ + required + | +
Returns:
+Name | Type | +Description | +
---|---|---|
difficulty |
+ float
+ |
+
+
+
+ The predicted difficulty for the given set of MSA features. + |
+
Raises:
+Type | +Description | +
---|---|
+ PyPythiaException
+ |
+
+
+
+ If not all features the predictor was trained with are present in the given query. + |
+
pypythia/predictor.py
61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 |
|
pypythia.raxmlng.RAxMLNG
+
+
+Class structure for features computed using RAxML-NG.
+This class provides methods for computing MSA attributes using RAxML-NG.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exe_path
+ |
+
+ Executable
+ |
+
+
+
+ Path to an executable of RAxML-NG. See https://github.com/amkozlov/raxml-ng for install instructions. + |
+
+ DEFAULT_RAXMLNG_EXE
+ |
+
Attributes:
+Name | +Type | +Description | +
---|---|---|
exe_path |
+
+ Executable
+ |
+
+
+
+ Path to an executable of RAxML-NG. + |
+
pypythia/raxmlng.py
74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 |
|
get_rfdistance_results(trees_file, prefix=None, **kwargs)
+
+Method that computes the number of unique topologies, relative RF-Distance, and absolute RF-Distance for the given set of trees.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ trees_file
+ |
+
+ Path
+ |
+
+
+
+ Filepath of a file containing > 1 Newick strings. + |
+ + required + | +
+ prefix
+ |
+
+ str
+ |
+
+
+
+ Optional prefix to use when running RAxML-NG + |
+
+ None
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
num_topos |
+ float
+ |
+
+
+
+ Number of unique topologies of the given set of trees. + |
+
rel_rfdist |
+ float
+ |
+
+
+
+ Relative RF-Distance of the given set of trees. Computed as average over all pairwise RF-Distances. Value between 0.0 and 1.0. + |
+
abs_rfdist |
+ float
+ |
+
+
+
+ Absolute RF-Distance of the given set of trees. + |
+
pypythia/raxmlng.py
165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 |
|
infer_parsimony_trees(msa_file, model, prefix, n_trees=24, **kwargs)
+
+Method that infers n_trees using the RAxML-NG implementation of maximum parsimony.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ msa_file
+ |
+
+ str
+ |
+
+
+
+ Filepath of the MSA to compute the parsimony trees for. + |
+ + required + | +
+ model
+ |
+
+ str
+ |
+
+
+
+ String representation of the substitution model to use. Needs to be a valid RAxML-NG model. For example "GTR+G" for DNA data or "LG+G" for protein data. + |
+ + required + | +
+ prefix
+ |
+
+ str
+ |
+
+
+
+ Prefix of where to store the RAxML-NG results. + |
+ + required + | +
+ n_trees
+ |
+
+ int
+ |
+
+
+
+ Number of parsimony trees to compute. + |
+
+ 24
+ |
+
+ **kwargs
+ |
+ + | +
+
+
+ Optional additional RAxML-NG settings. +The name of the kwarg needs to be a valid RAxML-NG flag. +For flags with a value pass it like this: "flag=value", for flags without a value pass it like this: "flag=None". +See https://github.com/amkozlov/raxml-ng for all options. + |
+
+ {}
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
output_trees_file |
+ str
+ |
+
+
+
+ Filepath pointing to the computed trees. + |
+
pypythia/raxmlng.py
135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 |
|
pypythia.raxmlng.run_raxmlng_command(cmd)
+
+pypythia/raxmlng.py
10 +11 +12 +13 +14 +15 +16 |
|