From 39d42a29c2693d5ce40cdc94eaa0f6aa29087d12 Mon Sep 17 00:00:00 2001
From: spiros <s.denaxas@gmail.com>
Date: Wed, 17 Jul 2019 11:37:18 +0300
Subject: [PATCH 01/44] added missing word

---
 tpot/driver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tpot/driver.py b/tpot/driver.py
index bdc95c7b..09e60f5e 100755
--- a/tpot/driver.py
+++ b/tpot/driver.py
@@ -282,7 +282,7 @@ def _get_arg_parser():
         type=float,
         help=(
             'Subsample ratio of the training instance. Setting it to 0.5 means that TPOT '
-            'use a random subsample of half of training data for the pipeline optimization process.'
+            'will use a random subsample of half of training data for the pipeline optimization process.'
         )
     )
 

From d026ed632bc7aafd914d3e3c6ff081897585ae2b Mon Sep 17 00:00:00 2001
From: Chapman Siu <chapm0n.siu@gmail.com>
Date: Sat, 27 Jul 2019 07:48:45 +1000
Subject: [PATCH 02/44] Update and rename Portuguese Bank Marketing
 Stratergy.ipynb to Portuguese Bank Marketing Strategy.ipynb

---
 ...Stratergy.ipynb => Portuguese Bank Marketing Strategy.ipynb} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename tutorials/Portuguese Bank Marketing/{Portuguese Bank Marketing Stratergy.ipynb => Portuguese Bank Marketing Strategy.ipynb} (99%)

diff --git a/tutorials/Portuguese Bank Marketing/Portuguese Bank Marketing Stratergy.ipynb b/tutorials/Portuguese Bank Marketing/Portuguese Bank Marketing Strategy.ipynb
similarity index 99%
rename from tutorials/Portuguese Bank Marketing/Portuguese Bank Marketing Stratergy.ipynb
rename to tutorials/Portuguese Bank Marketing/Portuguese Bank Marketing Strategy.ipynb
index 7ffe43fc..005b2f99 100644
--- a/tutorials/Portuguese Bank Marketing/Portuguese Bank Marketing Stratergy.ipynb	
+++ b/tutorials/Portuguese Bank Marketing/Portuguese Bank Marketing Strategy.ipynb	
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Portuguese Bank Marketing Stratergy- TPOT Tutorial"
+    "# Portuguese Bank Marketing Strategy- TPOT Tutorial"
    ]
   },
   {

From 32668834f02c78d6919cc3bef2af69b379657db8 Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Wed, 14 Aug 2019 09:31:14 -0400
Subject: [PATCH 03/44] fix dask installation

---
 .appveyor.yml              | 4 ++--
 .travis.yml                | 6 +++---
 ci/.travis_install.sh      | 2 +-
 docs_sources/installing.md | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 0b894893..27c16ea4 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -4,7 +4,7 @@ environment:
   matrix:
     - PYTHON_VERSION: 3.7
       MINICONDA: C:/Miniconda36-x64
-      DASK_ML_VERSION: 0.13.0
+      DASK_ML_VERSION: 1.0.0
     - PYTHON_VERSION: 2.7
       MINICONDA: C:/Miniconda-x64
       DASK_ML_VERSION: 0.12.0
@@ -23,7 +23,7 @@ install:
   - conda info -a
   - conda create -q -n test-environment python=%PYTHON_VERSION% numpy scipy scikit-learn nose cython pandas pywin32 joblib
   - activate test-environment
-  - pip install deap tqdm update_checker stopit dask[delayed] cloudpickle==0.5.6
+  - pip install deap tqdm update_checker stopit dask[delayed] dask[dataframe] cloudpickle==0.5.6
   - pip install dask_ml==%DASK_ML_VERSION%
 
 
diff --git a/.travis.yml b/.travis.yml
index 1c080ebf..54d46574 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,12 +4,12 @@ matrix:
   include:
   - name: "Python 3.7 on Xenial Linux"
     dist: xenial        # required for Python >= 3.7
-    env: PYTHON_VERSION="3.7"  DASK_ML_VERSION="0.13.0"
+    env: PYTHON_VERSION="3.7"  DASK_ML_VERSION="1.0.0"
     before_install:
       - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
   - name: "Python 3.7 on Xenial Linux with coverage"
     dist: xenial        # required for Python >= 3.7
-    env: PYTHON_VERSION="3.7"  COVERAGE="true"  DASK_ML_VERSION="0.13.0"
+    env: PYTHON_VERSION="3.7"  COVERAGE="true"  DASK_ML_VERSION="1.0.0"
     before_install:
       - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
   - name: "Python 2.7 on Xenial Linux"
@@ -21,7 +21,7 @@ matrix:
     os: osx
     osx_image: xcode10.2  # Python 3.7.2 running on macOS 10.14.3
     language: shell       # 'language: python' is an error on Travis CI macOS
-    env: PYTHON_VERSION="3.7"  DASK_ML_VERSION="0.13.0"
+    env: PYTHON_VERSION="3.7"  DASK_ML_VERSION="1.0.0"
     before_install:
       - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh
 install: source ./ci/.travis_install.sh
diff --git a/ci/.travis_install.sh b/ci/.travis_install.sh
index edf2fee1..31e2e80a 100755
--- a/ci/.travis_install.sh
+++ b/ci/.travis_install.sh
@@ -38,7 +38,7 @@ conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
 source activate testenv
 
 pip install deap tqdm update_checker stopit \
-    dask[delayed] xgboost cloudpickle==0.5.6
+    dask[delayed] dask[dataframe] xgboost cloudpickle==0.5.6
 pip install dask_ml==$DASK_ML_VERSION
 
 if [[ "$COVERAGE" == "true" ]]; then
diff --git a/docs_sources/installing.md b/docs_sources/installing.md
index 0ed18fa1..113fcc0e 100644
--- a/docs_sources/installing.md
+++ b/docs_sources/installing.md
@@ -46,10 +46,10 @@ pip install xgboost
 
 If you have issues installing XGBoost, check the [XGBoost installation documentation](http://xgboost.readthedocs.io/en/latest/build.html).
 
-If you plan to use [Dask](http://dask.pydata.org/en/latest/) for parallel training, make sure to install [dask[delay]](http://dask.pydata.org/en/latest/install.html) and [dask_ml](https://dask-ml.readthedocs.io/en/latest/install.html).
+If you plan to use [Dask](http://dask.pydata.org/en/latest/) for parallel training, make sure to install [dask[delay] and dask[dataframe]](https://docs.dask.org/en/latest/install.html) and [dask_ml](https://dask-ml.readthedocs.io/en/latest/install.html).
 
 ```Shell
-pip install dask[delayed] dask-ml
+pip install dask[delayed] dask[dataframe] dask-ml
 ```
 
 If you plan to use the [TPOT-MDR configuration](https://arxiv.org/abs/1702.01780), make sure to install [scikit-mdr](https://github.com/EpistasisLab/scikit-mdr) and [scikit-rebate](https://github.com/EpistasisLab/scikit-rebate):

From 312a59c6482ee27a7bf3e117be93039cfe85218c Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Wed, 14 Aug 2019 10:05:06 -0400
Subject: [PATCH 04/44] refine installation of dask

---
 .appveyor.yml              | 3 +--
 ci/.travis_install.sh      | 3 +--
 docs_sources/installing.md | 2 +-
 tests/test_dask_based.py   | 6 ++++--
 tpot/gp_deap.py            | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 27c16ea4..4323016a 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -23,8 +23,7 @@ install:
   - conda info -a
   - conda create -q -n test-environment python=%PYTHON_VERSION% numpy scipy scikit-learn nose cython pandas pywin32 joblib
   - activate test-environment
-  - pip install deap tqdm update_checker stopit dask[delayed] dask[dataframe] cloudpickle==0.5.6
-  - pip install dask_ml==%DASK_ML_VERSION%
+  - pip install deap tqdm update_checker stopit dask[delayed] dask[dataframe] cloudpickle==0.5.6 fsspec>=0.3.3 dask_ml==%DASK_ML_VERSION%
 
 
 test_script:
diff --git a/ci/.travis_install.sh b/ci/.travis_install.sh
index 31e2e80a..9e3cbda3 100755
--- a/ci/.travis_install.sh
+++ b/ci/.travis_install.sh
@@ -38,8 +38,7 @@ conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
 source activate testenv
 
 pip install deap tqdm update_checker stopit \
-    dask[delayed] dask[dataframe] xgboost cloudpickle==0.5.6
-pip install dask_ml==$DASK_ML_VERSION
+    dask[delayed] dask[dataframe] xgboost cloudpickle==0.5.6 fsspec>=0.3.3 dask_ml==$DASK_ML_VERSION
 
 if [[ "$COVERAGE" == "true" ]]; then
     pip install coverage coveralls
diff --git a/docs_sources/installing.md b/docs_sources/installing.md
index 113fcc0e..5322ad8a 100644
--- a/docs_sources/installing.md
+++ b/docs_sources/installing.md
@@ -49,7 +49,7 @@ If you have issues installing XGBoost, check the [XGBoost installation documenta
 If you plan to use [Dask](http://dask.pydata.org/en/latest/) for parallel training, make sure to install [dask[delay] and dask[dataframe]](https://docs.dask.org/en/latest/install.html) and [dask_ml](https://dask-ml.readthedocs.io/en/latest/install.html).
 
 ```Shell
-pip install dask[delayed] dask[dataframe] dask-ml
+pip install dask[delayed] dask[dataframe] dask-ml fsspec>=0.3.3
 ```
 
 If you plan to use the [TPOT-MDR configuration](https://arxiv.org/abs/1702.01780), make sure to install [scikit-mdr](https://github.com/EpistasisLab/scikit-mdr) and [scikit-rebate](https://github.com/EpistasisLab/scikit-rebate):
diff --git a/tests/test_dask_based.py b/tests/test_dask_based.py
index ad94f8b3..04ddfa1d 100644
--- a/tests/test_dask_based.py
+++ b/tests/test_dask_based.py
@@ -27,7 +27,8 @@ def test_dask_matches(self):
                     cv=3,
                     random_state=42,
                     n_jobs=n_jobs,
-                    use_dask=False
+                    use_dask=False,
+                    verbosity=3
                 )
                 b = TPOTClassifier(
                     generations=0,
@@ -35,7 +36,8 @@ def test_dask_matches(self):
                     cv=3,
                     random_state=42,
                     n_jobs=n_jobs,
-                    use_dask=True
+                    use_dask=True,
+                    verbosity=3
                 )
                 a.fit(X, y)
                 b.fit(X, y)
diff --git a/tpot/gp_deap.py b/tpot/gp_deap.py
index f170c4fc..71566ce0 100644
--- a/tpot/gp_deap.py
+++ b/tpot/gp_deap.py
@@ -422,8 +422,8 @@ def _wrapped_cross_val_score(sklearn_pipeline, features, target,
             import dask_ml.model_selection  # noqa
             import dask  # noqa
             from dask.delayed import Delayed
-        except ImportError:
-            msg = "'use_dask' requires the optional dask and dask-ml depedencies."
+        except Exception as e:
+            msg = "'use_dask' requires the optional dask and dask-ml depedencies.\n{}".format(e)
             raise ImportError(msg)
 
         dsk, keys, n_splits = dask_ml.model_selection._search.build_graph(

From 29c72ef5368386709a7822996f3a2df04bede01c Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Wed, 14 Aug 2019 10:12:21 -0400
Subject: [PATCH 05/44] refine fsspec>=0.3.3 installation

---
 .appveyor.yml         | 3 ---
 ci/.travis_install.sh | 5 ++++-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 4323016a..d597ba7b 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -5,9 +5,6 @@ environment:
     - PYTHON_VERSION: 3.7
       MINICONDA: C:/Miniconda36-x64
       DASK_ML_VERSION: 1.0.0
-    - PYTHON_VERSION: 2.7
-      MINICONDA: C:/Miniconda-x64
-      DASK_ML_VERSION: 0.12.0
 
 platform:
   - x64
diff --git a/ci/.travis_install.sh b/ci/.travis_install.sh
index 9e3cbda3..d59053b8 100755
--- a/ci/.travis_install.sh
+++ b/ci/.travis_install.sh
@@ -38,7 +38,10 @@ conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
 source activate testenv
 
 pip install deap tqdm update_checker stopit \
-    dask[delayed] dask[dataframe] xgboost cloudpickle==0.5.6 fsspec>=0.3.3 dask_ml==$DASK_ML_VERSION
+    dask[delayed] dask[dataframe] xgboost cloudpickle==0.5.6 dask_ml==$DASK_ML_VERSION
+
+if $PYTHON_VERSION==3.7:
+    pip install fsspec>=0.3.3
 
 if [[ "$COVERAGE" == "true" ]]; then
     pip install coverage coveralls

From 6d20bf5028debcbc28da0f7b12e1063170d1e6c7 Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Wed, 14 Aug 2019 10:18:43 -0400
Subject: [PATCH 06/44] add xgboost installaion

---
 .appveyor.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index d597ba7b..8f3ddebb 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -20,7 +20,7 @@ install:
   - conda info -a
   - conda create -q -n test-environment python=%PYTHON_VERSION% numpy scipy scikit-learn nose cython pandas pywin32 joblib
   - activate test-environment
-  - pip install deap tqdm update_checker stopit dask[delayed] dask[dataframe] cloudpickle==0.5.6 fsspec>=0.3.3 dask_ml==%DASK_ML_VERSION%
+  - pip install deap tqdm update_checker stopit xgboost dask[delayed] dask[dataframe] cloudpickle==0.5.6 fsspec>=0.3.3 dask_ml==%DASK_ML_VERSION%
 
 
 test_script:

From 931785617a7977c77606fe71552d4acd714baf4f Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Wed, 14 Aug 2019 10:20:48 -0400
Subject: [PATCH 07/44] remove py2.7 test env

---
 .travis.yml           | 5 -----
 ci/.travis_install.sh | 5 +----
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 54d46574..7ed24724 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,11 +12,6 @@ matrix:
     env: PYTHON_VERSION="3.7"  COVERAGE="true"  DASK_ML_VERSION="1.0.0"
     before_install:
       - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
-  - name: "Python 2.7 on Xenial Linux"
-    dist: xenial
-    env: PYTHON_VERSION="2.7"  DASK_ML_VERSION="0.12.0"
-    before_install:
-      - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
   - name: "Python 3.7 on macOS"
     os: osx
     osx_image: xcode10.2  # Python 3.7.2 running on macOS 10.14.3
diff --git a/ci/.travis_install.sh b/ci/.travis_install.sh
index d59053b8..e77697d2 100755
--- a/ci/.travis_install.sh
+++ b/ci/.travis_install.sh
@@ -38,10 +38,7 @@ conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
 source activate testenv
 
 pip install deap tqdm update_checker stopit \
-    dask[delayed] dask[dataframe] xgboost cloudpickle==0.5.6 dask_ml==$DASK_ML_VERSION
-
-if $PYTHON_VERSION==3.7:
-    pip install fsspec>=0.3.3
+    dask[delayed] dask[dataframe] xgboost cloudpickle==0.5.6 dask_ml==$DASK_ML_VERSION fsspec>=0.3.3
 
 if [[ "$COVERAGE" == "true" ]]; then
     pip install coverage coveralls

From eafbe13693fd83e603c24cc78c648e9c8d239cc2 Mon Sep 17 00:00:00 2001
From: Jan-Hendrik Menke <mail@jhmenke.de>
Date: Fri, 13 Sep 2019 08:43:29 +0200
Subject: [PATCH 08/44] avoid SCORERS dict for custom scorer instance #914

fixed scoring tests for scorer objects
---
 tests/tpot_tests.py | 10 +++++-----
 tpot/base.py        | 19 ++++++++++---------
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/tests/tpot_tests.py b/tests/tpot_tests.py
index 73ebdefb..dfe95579 100644
--- a/tests/tpot_tests.py
+++ b/tests/tpot_tests.py
@@ -182,7 +182,7 @@ def test_init_default_scoring_2():
     assert len(w) == 1 # deap 1.2.2 warning message made this unit test failed
     assert issubclass(w[-1].category, DeprecationWarning) # deap 1.2.2 warning message made this unit test failed
     assert "This scoring type was deprecated" in str(w[-1].message) # deap 1.2.2 warning message made this unit test failed
-    assert tpot_obj.scoring_function == 'balanced_accuracy'
+    assert tpot_obj.scoring_function._score_func == balanced_accuracy
 
 
 def test_init_default_scoring_3():
@@ -191,7 +191,7 @@ def test_init_default_scoring_3():
         tpot_obj = TPOTClassifier(scoring=make_scorer(balanced_accuracy))
         tpot_obj._fit_init()
     assert len(w) == 0 # deap 1.2.2 warning message made this unit test failed
-    assert tpot_obj.scoring_function == 'balanced_accuracy'
+    assert tpot_obj.scoring_function._score_func == balanced_accuracy
 
 
 def test_init_default_scoring_4():
@@ -203,7 +203,7 @@ def my_scorer(clf, X, y):
         tpot_obj = TPOTClassifier(scoring=my_scorer)
         tpot_obj._fit_init()
     assert len(w) == 0 # deap 1.2.2 warning message made this unit test failed
-    assert tpot_obj.scoring_function == 'my_scorer'
+    assert tpot_obj.scoring_function == my_scorer
 
 
 def test_init_default_scoring_5():
@@ -214,7 +214,7 @@ def test_init_default_scoring_5():
     assert len(w) == 1
     assert issubclass(w[-1].category, DeprecationWarning)
     assert "This scoring type was deprecated" in str(w[-1].message)
-    assert tpot_obj.scoring_function == 'roc_auc_score'
+    assert tpot_obj.scoring_function._score_func == roc_auc_score
 
 
 def test_init_default_scoring_6():
@@ -228,7 +228,7 @@ def my_scorer(y_true, y_pred):
     assert issubclass(w[-1].category, DeprecationWarning)
     assert "This scoring type was deprecated" in str(w[-1].message)
 
-    assert tpot_obj.scoring_function == 'my_scorer'
+    assert tpot_obj.scoring_function._score_func == my_scorer
 
 
 def test_invalid_score_warning():
diff --git a/tpot/base.py b/tpot/base.py
index 626bb247..6f170c1f 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -326,6 +326,7 @@ def _setup_scoring_function(self, scoring):
                         'choose a valid scoring function from the TPOT '
                         'documentation.'.format(scoring)
                     )
+                self.scoring_function = scoring
             elif callable(scoring):
                 # Heuristic to ensure user has not passed a metric
                 module = getattr(scoring, '__module__', None)
@@ -342,21 +343,15 @@ def _setup_scoring_function(self, scoring):
                     not module.startswith('sklearn.metrics.tests.')):
                     scoring_name = scoring.__name__
                     greater_is_better = 'loss' not in scoring_name and 'error' not in scoring_name
-                    SCORERS[scoring_name] = make_scorer(scoring, greater_is_better=greater_is_better)
+                    self.scoring_function = make_scorer(scoring, greater_is_better=greater_is_better)
                     warnings.simplefilter('always', DeprecationWarning)
                     warnings.warn('Scoring function {} looks like it is a metric function '
                                   'rather than a scikit-learn scorer. This scoring type was deprecated '
                                   'in version TPOT 0.9.1 and will be removed in version 0.11. '
                                   'Please update your custom scoring function.'.format(scoring), DeprecationWarning)
                 else:
-                    if isinstance(scoring, _BaseScorer):
-                        scoring_name = scoring._score_func.__name__
-                    else:
-                        scoring_name = scoring.__name__
-                    SCORERS[scoring_name] = scoring
-                scoring = scoring_name
+                    self.scoring_function = scoring
 
-            self.scoring_function = scoring
 
     def _setup_config(self, config_dict):
         if config_dict:
@@ -969,7 +964,13 @@ def score(self, testing_features, testing_target):
 
         # If the scoring function is a string, we must adjust to use the sklearn
         # scoring interface
-        score = SCORERS[self.scoring_function](
+        if isinstance(self.scoring_function, str):
+            scorer = SCORERS[self.scoring_function]
+        elif callable(self.scoring_function):
+            scorer = self.scoring_function
+        else:
+            raise RuntimeError('The scoring function should either be the name of a scikit-learn scorer or a scorer object')
+        score = scorer(
             self.fitted_pipeline_,
             testing_features.astype(np.float64),
             testing_target.astype(np.float64)

From 27ad3942d25dd7a99c3dd615e403c9d36b01e943 Mon Sep 17 00:00:00 2001
From: c-bata <contact@c-bata.link>
Date: Mon, 16 Sep 2019 00:20:01 +0900
Subject: [PATCH 09/44] Fix requirements of joblib version

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index f17a2aba..616ce627 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,4 +7,4 @@ tqdm==4.26.0
 update-checker==0.16
 stopit==1.1.1
 pandas==0.20.2
-joblib=0.10.3
+joblib==0.10.3

From 3e7197cc410b23ab51121eafa05e4e6571fb6693 Mon Sep 17 00:00:00 2001
From: pietro <peter.zamb@gmail.com>
Date: Fri, 27 Sep 2019 22:29:55 +0200
Subject: [PATCH 10/44] Substitute cachedir with location

From the joblib.Memory docs:
    'cachedir' has been deprecated in 0.12 and will be
     removed in 0.14. Use the 'location' parameter instead.
---
 tests/tpot_tests.py | 2 +-
 tpot/base.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/tpot_tests.py b/tests/tpot_tests.py
index 73ebdefb..ccc226e0 100644
--- a/tests/tpot_tests.py
+++ b/tests/tpot_tests.py
@@ -1064,7 +1064,7 @@ def test_memory_4():
 def test_memory_5():
     """Assert that the TPOT _setup_memory function runs normally with a Memory object."""
     cachedir = mkdtemp()
-    memory = Memory(cachedir=cachedir, verbose=0)
+    memory = Memory(location=cachedir, verbose=0)
     tpot_obj = TPOTClassifier(
         random_state=42,
         population_size=1,
diff --git a/tpot/base.py b/tpot/base.py
index 626bb247..edd0b966 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -803,7 +803,7 @@ def _setup_memory(self):
                             )
                     self._cachedir = self.memory
 
-                self._memory = Memory(cachedir=self._cachedir, verbose=0)
+                self._memory = Memory(location=self._cachedir, verbose=0)
             elif isinstance(self.memory, Memory):
                 self._memory = self.memory
             else:

From cae4b801378d837c14a7d2744d050b84c5284989 Mon Sep 17 00:00:00 2001
From: pietro <peter.zamb@gmail.com>
Date: Fri, 27 Sep 2019 23:09:00 +0200
Subject: [PATCH 11/44] Fix missing '=' in the joblib line

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index f17a2aba..616ce627 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,4 +7,4 @@ tqdm==4.26.0
 update-checker==0.16
 stopit==1.1.1
 pandas==0.20.2
-joblib=0.10.3
+joblib==0.10.3

From fbd26982106bd5c0ef94532ee75659cf350d13af Mon Sep 17 00:00:00 2001
From: Anderson Chaves <andersonpachaves@gmail.com>
Date: Tue, 1 Oct 2019 15:26:23 +0200
Subject: [PATCH 12/44] Update README.md with full name.

Update README.md to add TPOT's full name meaning.
---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 54988709..e3e79d6c 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,8 @@ Package information: [![Python 2.7](https://img.shields.io/badge/python-2.7-blue
 <img src="https://raw.githubusercontent.com/EpistasisLab/tpot/master/images/tpot-logo.jpg" width=300 />
 </p>
 
+**TPOT** stands for Tree-based Pipeline Optimization Tool and can be referenced on the paper: https://arxiv.org/abs/1603.06212.
+
 Consider TPOT your **Data Science Assistant**. TPOT is a Python Automated Machine Learning tool that optimizes machine learning pipelines using genetic programming.
 
 ![TPOT Demo](https://github.com/EpistasisLab/tpot/blob/master/images/tpot-demo.gif "TPOT Demo")

From 4276330ee76f7952b19f1d73713132aa49c5af95 Mon Sep 17 00:00:00 2001
From: Anderson Chaves <andersonpachaves@gmail.com>
Date: Wed, 2 Oct 2019 16:17:43 +0200
Subject: [PATCH 13/44] [Amend Commit] Update README.md with full name.

Removing link to Arxiv from the beginning and keeping full name explanation.
---
 README.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/README.md b/README.md
index e3e79d6c..911b64c8 100644
--- a/README.md
+++ b/README.md
@@ -15,9 +15,7 @@ Package information: [![Python 2.7](https://img.shields.io/badge/python-2.7-blue
 <img src="https://raw.githubusercontent.com/EpistasisLab/tpot/master/images/tpot-logo.jpg" width=300 />
 </p>
 
-**TPOT** stands for Tree-based Pipeline Optimization Tool and can be referenced on the paper: https://arxiv.org/abs/1603.06212.
-
-Consider TPOT your **Data Science Assistant**. TPOT is a Python Automated Machine Learning tool that optimizes machine learning pipelines using genetic programming.
+**TPOT** stands for **T**ree-based **P**ipeline **O**ptimization **T**ool. Consider TPOT your **Data Science Assistant**. TPOT is a Python Automated Machine Learning tool that optimizes machine learning pipelines using genetic programming.
 
 ![TPOT Demo](https://github.com/EpistasisLab/tpot/blob/master/images/tpot-demo.gif "TPOT Demo")
 

From 92296812cf25c8b41853210389b2e485a01e389b Mon Sep 17 00:00:00 2001
From: OwenZhu <398094982@qq.com>
Date: Mon, 7 Oct 2019 17:54:57 +1100
Subject: [PATCH 14/44] use np.unique instead of convert to set and list

---
 tpot/metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tpot/metrics.py b/tpot/metrics.py
index 38dbe049..8e202ee3 100644
--- a/tpot/metrics.py
+++ b/tpot/metrics.py
@@ -46,7 +46,7 @@ def balanced_accuracy(y_true, y_pred):
         Returns a float value indicating the individual's balanced accuracy
         0.5 is as good as chance, and 1.0 is perfect predictive accuracy
     """
-    all_classes = list(set(np.append(y_true, y_pred)))
+    all_classes = np.unique(np.append(y_true, y_pred))
     all_class_accuracies = []
     for this_class in all_classes:
         this_class_sensitivity = 0.

From 2fd8619de0f7a722b32d557ea7baeb2d66197bc1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Crypto=20Jer=C3=B4nimo?= <crypto.jeronimo@gmail.com>
Date: Sun, 13 Oct 2019 10:20:25 +0100
Subject: [PATCH 15/44] fix typos in documentation

---
 docs_sources/installing.md | 2 +-
 docs_sources/using.md      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs_sources/installing.md b/docs_sources/installing.md
index 0ed18fa1..161c1d27 100644
--- a/docs_sources/installing.md
+++ b/docs_sources/installing.md
@@ -32,7 +32,7 @@ DEAP, update_checker, tqdm and stopit can be installed with `pip` via the comman
 pip install deap update_checker tqdm stopit
 ```
 
-**For the Windows users**, the pywin32 module is required if Python is NOT installed via the [Anaconda Python distribution](https://www.continuum.io/downloads) and can be installed with `pip` for Python verion <=3.3 or `conda` (e.g. miniconda) for any Python version:
+**For the Windows users**, the pywin32 module is required if Python is NOT installed via the [Anaconda Python distribution](https://www.continuum.io/downloads) and can be installed with `pip` for Python version <=3.3 or `conda` (e.g. miniconda) for any Python version:
 
 ```Shell
 conda install pywin32
diff --git a/docs_sources/using.md b/docs_sources/using.md
index 69b40399..044bf023 100644
--- a/docs_sources/using.md
+++ b/docs_sources/using.md
@@ -540,7 +540,7 @@ Note that you must have all of the corresponding packages for the operators inst
 
 Template option provides a way to specify a desired structure for machine learning pipeline, which may reduce TPOT computation time and potentially provide more interpretable results. Current implementation only supports linear pipelines.
 
-Below is a simple example to use `template` option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17)), 2nd step is a feature transformer (a subclass of [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html)) and 3rd step is a classifier for classification (a subclass of [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html)). The last step must be `Classifier` for `TPOTClassifier`'s template but `Regressor` for `TPOTRegressor`. **Note: although `SelectorMixin` is subclass of `TransformerMixin` in scikit-leawrn, but `Transformer` in this option excludes those subclasses of `SelectorMixin`.**
+Below is a simple example to use `template` option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17)), 2nd step is a feature transformer (a subclass of [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html)) and 3rd step is a classifier for classification (a subclass of [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html)). The last step must be `Classifier` for `TPOTClassifier`'s template but `Regressor` for `TPOTRegressor`. **Note: although `SelectorMixin` is subclass of `TransformerMixin` in scikit-learn, but `Transformer` in this option excludes those subclasses of `SelectorMixin`.**
 
 ```Python
 tpot_obj = TPOTClassifier(
@@ -548,7 +548,7 @@ tpot_obj = TPOTClassifier(
                 )
 ```
 
-If a specific operator, e.g. `SelectPercentile`, is prefered to used in the 1st step of pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'.
+If a specific operator, e.g. `SelectPercentile`, is preferred to used in the 1st step of pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'.
 
 
 # FeatureSetSelector in TPOT

From c3888fc547a2aca650bb5f52d2438b99a2318aeb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Crypto=20Jer=C3=B4nimo?= <crypto.jeronimo@gmail.com>
Date: Sun, 13 Oct 2019 10:27:41 +0100
Subject: [PATCH 16/44] improve documentation grammar

---
 docs_sources/using.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs_sources/using.md b/docs_sources/using.md
index 044bf023..41d3dfa2 100644
--- a/docs_sources/using.md
+++ b/docs_sources/using.md
@@ -548,7 +548,7 @@ tpot_obj = TPOTClassifier(
                 )
 ```
 
-If a specific operator, e.g. `SelectPercentile`, is preferred to used in the 1st step of pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'.
+If a specific operator, e.g. `SelectPercentile`, is preferred for usage in the 1st step of the pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'.
 
 
 # FeatureSetSelector in TPOT

From 344166620178952db311960dd2355f5cde1699c0 Mon Sep 17 00:00:00 2001
From: UvA <jorijn.smit@student.uva.nl>
Date: Sat, 26 Oct 2019 23:07:54 +0200
Subject: [PATCH 17/44] added a boolean switch to export to screen instead of
 to file

---
 tpot/base.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tpot/base.py b/tpot/base.py
index 626bb247..83a25e8d 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -1088,16 +1088,18 @@ def _create_periodic_checkpoint_folder(self):
                 raise ValueError('Failed creating the periodic_checkpoint_folder:\n{}'.format(e))
 
 
-    def export(self, output_file_name, data_file_path=''):
+    def export(self, output_file_name='tpot_pipeline.py', data_file_path='', to_screen=False):
         """Export the optimized pipeline as Python code.
 
         Parameters
         ----------
-        output_file_name: string
+        output_file_name: string (default: 'tpot_pipeline.py')
             String containing the path and file name of the desired output file
         data_file_path: string (default: '')
             By default, the path of input dataset is 'PATH/TO/DATA/FILE' by default.
             If data_file_path is another string, the path will be replaced.
+        to_screen: boolean (default: False)
+            If set to True, the full text of the export is printed to screen instead of to file.
 
         Returns
         -------
@@ -1114,9 +1116,11 @@ def export(self, output_file_name, data_file_path=''):
                                     self.random_state,
                                     data_file_path=data_file_path)
 
-        with open(output_file_name, 'w') as output_file:
-            output_file.write(to_write)
-
+        if to_screen:
+            print(to_write)
+        else:
+            with open(output_file_name, 'w') as output_file:
+                output_file.write(to_write)
 
     def _impute_values(self, features):
         """Impute missing values in a feature set.

From b75191de64d0033f30fa9cc1aab45ecbf845e1a1 Mon Sep 17 00:00:00 2001
From: Jan-Hendrik Menke <mail@jhmenke.de>
Date: Wed, 23 Oct 2019 16:25:52 +0200
Subject: [PATCH 18/44] allow both max_time_mins and generations as valid
 parameters concurrently

---
 tests/tpot_tests.py | 12 ++++++++++--
 tpot/base.py        |  2 +-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/tests/tpot_tests.py b/tests/tpot_tests.py
index 989c1aa0..d7414c85 100644
--- a/tests/tpot_tests.py
+++ b/tests/tpot_tests.py
@@ -275,12 +275,20 @@ def test_invalid_mut_rate_plus_xo_rate():
 
 def test_init_max_time_mins():
     """Assert that the TPOT init stores max run time and sets generations to 1000000."""
-    tpot_obj = TPOTClassifier(max_time_mins=30, generations=1000)
+    tpot_obj = TPOTClassifier(max_time_mins=30, generations=None)
     tpot_obj._fit_init()
     assert tpot_obj.generations == 1000000
     assert tpot_obj.max_time_mins == 30
 
 
+def test_init_max_time_mins_and_generations():
+    """Assert that the TPOT init stores max run time but keeps the generations at the user-supplied value."""
+    tpot_obj = TPOTClassifier(max_time_mins=30, generations=1000)
+    tpot_obj._fit_init()
+    assert tpot_obj.generations == 1000
+    assert tpot_obj.max_time_mins == 30
+
+
 def test_init_n_jobs():
     """Assert that the TPOT init stores current number of processes."""
     tpot_obj = TPOTClassifier(n_jobs=2)
@@ -945,7 +953,7 @@ def test_fit_4():
     tpot_obj = TPOTClassifier(
         random_state=42,
         population_size=2,
-        generations=1,
+        generations=None,
         verbosity=0,
         max_time_mins=2/60.,
         config_dict='TPOT light'
diff --git a/tpot/base.py b/tpot/base.py
index 221818d9..32a3b9e9 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -569,7 +569,7 @@ def _fit_init(self):
         # Schedule TPOT to run for many generations if the user specifies a
         # run-time limit TPOT will automatically interrupt itself when the timer
         # runs out
-        if self.max_time_mins is not None:
+        if self.max_time_mins is not None and self.generations is None :
             self.generations = 1000000
 
         # Prompt the user if their version is out of date

From eb810d1cba79f774c9605499e3e130eff0c47d98 Mon Sep 17 00:00:00 2001
From: Jan-Hendrik Menke <mail@jhmenke.de>
Date: Mon, 28 Oct 2019 16:47:56 +0100
Subject: [PATCH 19/44] change docstring of API in regards to the max_time_mins
 parameter

---
 docs_sources/api.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs_sources/api.md b/docs_sources/api.md
index 9255bfe3..e1464130 100644
--- a/docs_sources/api.md
+++ b/docs_sources/api.md
@@ -115,7 +115,7 @@ Setting <em>n_jobs</em>=-1 will use as many cores as available on the computer.
 <blockquote>
 How many minutes TPOT has to optimize the pipeline.
 <br /><br />
-If not None, this setting will override the <em>generations</em> parameter and allow TPOT to run until <em>max_time_mins</em> minutes elapse.
+If not None, this setting will allow TPOT to run until <em>max_time_mins</em> minutes elapsed and then stop. TPOT will stop earlier if <em>generations</em> is set and all generations are already evaluated.
 </blockquote>
 
 <strong>max_eval_time_mins</strong>: float, optional (default=5)

From 1998df3e96a4094fc44a88abd7f1b65f4951109a Mon Sep 17 00:00:00 2001
From: Jan-Hendrik Menke <mail@jhmenke.de>
Date: Sat, 2 Nov 2019 14:16:47 +0100
Subject: [PATCH 20/44] add ValueError if generations=None and
 max_time_mins=None; improved API docs

---
 docs_sources/api.md | 2 +-
 tpot/base.py        | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/docs_sources/api.md b/docs_sources/api.md
index e1464130..e4978b19 100644
--- a/docs_sources/api.md
+++ b/docs_sources/api.md
@@ -34,7 +34,7 @@ Read more in the [User Guide](using/#tpot-with-code).
 <td width="80%" style="background:white;">
 <strong>generations</strong>: int, optional (default=100)
 <blockquote>
-Number of iterations to the run pipeline optimization process. Must be a positive number.
+Number of iterations to the run pipeline optimization process. Must be a positive number or None. If None, the parameter <em>max_time_mins</em> must be defined as the runtime limit.
 <br /><br />
 Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.
 <br /><br />
diff --git a/tpot/base.py b/tpot/base.py
index 32a3b9e9..24186c75 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -566,9 +566,11 @@ def _fit_init(self):
                 self.operators.append(op_class)
                 self.arguments += arg_types
 
+        if self.max_time_mins is None and self.generations is None:
+            raise ValueError("Either the parameter generations should bet set or a maximum evaluation time should be defined via max_time_mins")
+
         # Schedule TPOT to run for many generations if the user specifies a
-        # run-time limit TPOT will automatically interrupt itself when the timer
-        # runs out
+        # run-time limit TPOT will automatically interrupt itself when the timer runs out
         if self.max_time_mins is not None and self.generations is None :
             self.generations = 1000000
 
@@ -1261,7 +1263,7 @@ def _stop_by_max_time_mins(self):
         if self.max_time_mins:
             total_mins_elapsed = (datetime.now() - self._start_datetime).total_seconds() / 60.
             if total_mins_elapsed >= self.max_time_mins:
-                raise KeyboardInterrupt('{} minutes have elapsed. TPOT will close down.'.format(total_mins_elapsed))
+                raise KeyboardInterrupt('{:.2f} minutes have elapsed. TPOT will close down.'.format(total_mins_elapsed))
 
     def _combine_individual_stats(self, operator_count, cv_score, individual_stats):
         """Combine the stats with operator count and cv score and preprare to be written to _evaluated_individuals

From 356115f6cdc39d3eb2d3e90d318f7ff608bda8a4 Mon Sep 17 00:00:00 2001
From: UvA <jorijn.smit@student.uva.nl>
Date: Sun, 3 Nov 2019 11:43:58 +0100
Subject: [PATCH 21/44] export always returns pipeline as string and writing to
 file is optional

---
 tpot/base.py         | 19 ++++++++-----------
 tpot/export_utils.py |  2 +-
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/tpot/base.py b/tpot/base.py
index 83a25e8d..f6276d2c 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -1088,24 +1088,21 @@ def _create_periodic_checkpoint_folder(self):
                 raise ValueError('Failed creating the periodic_checkpoint_folder:\n{}'.format(e))
 
 
-    def export(self, output_file_name='tpot_pipeline.py', data_file_path='', to_screen=False):
+    def export(self, output_file_name='', data_file_path=''):
         """Export the optimized pipeline as Python code.
 
         Parameters
         ----------
-        output_file_name: string (default: 'tpot_pipeline.py')
-            String containing the path and file name of the desired output file
+        output_file_name: string (default: '')
+            String containing the path and file name of the desired output file. If left empty, writing to file will be skipped.
         data_file_path: string (default: '')
             By default, the path of input dataset is 'PATH/TO/DATA/FILE' by default.
             If data_file_path is another string, the path will be replaced.
-        to_screen: boolean (default: False)
-            If set to True, the full text of the export is printed to screen instead of to file.
 
         Returns
         -------
-        False if it skipped writing the pipeline to file
-        True if the pipeline was actually written
-
+        to_write: str
+            The whole pipeline text as a string.
         """
         if self._optimized_pipeline is None:
             raise RuntimeError('A pipeline has not yet been optimized. Please call fit() first.')
@@ -1116,11 +1113,11 @@ def export(self, output_file_name='tpot_pipeline.py', data_file_path='', to_scre
                                     self.random_state,
                                     data_file_path=data_file_path)
 
-        if to_screen:
-            print(to_write)
-        else:
+        if output_file_name is not '':
             with open(output_file_name, 'w') as output_file:
                 output_file.write(to_write)
+        return to_write
+
 
     def _impute_values(self, features):
         """Impute missing values in a feature set.
diff --git a/tpot/export_utils.py b/tpot/export_utils.py
index 5464a05b..0260a384 100644
--- a/tpot/export_utils.py
+++ b/tpot/export_utils.py
@@ -113,7 +113,7 @@ def export_pipeline(exported_pipeline,
 """
 
     if pipeline_score is not None:
-        pipeline_text += '\n# Average CV score on the training set was:{}'.format(pipeline_score)
+        pipeline_text += '\n# Average CV score on the training set was: {}'.format(pipeline_score)
     pipeline_text += '\n'
 
     # Replace the function calls with their corresponding Python code

From 4167dad67fcf41ddbb75c5996da4859a0080a448 Mon Sep 17 00:00:00 2001
From: UvA <jorijn.smit@student.uva.nl>
Date: Sun, 3 Nov 2019 11:50:15 +0100
Subject: [PATCH 22/44] added space also added in expected code

---
 tests/export_tests.py | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/tests/export_tests.py b/tests/export_tests.py
index a77429c4..ab153a2a 100644
--- a/tests/export_tests.py
+++ b/tests/export_tests.py
@@ -104,6 +104,38 @@ def test_export():
     remove("test_export.py") # clean up exported file
 
 
+def test_export_2():
+    """Assert that TPOT's export function returns the expected pipeline text as a string."""
+
+    pipeline_string = (
+        'KNeighborsClassifier('
+        'input_matrix, '
+        'KNeighborsClassifier__n_neighbors=10, '
+        'KNeighborsClassifier__p=1, '
+        'KNeighborsClassifier__weights=uniform'
+        ')'
+    )
+    pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
+    tpot_obj._optimized_pipeline = pipeline
+    expected_code = """import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsClassifier
+
+# NOTE: Make sure that the class is labeled 'target' in the data file
+tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
+features = tpot_data.drop('target', axis=1).values
+training_features, testing_features, training_target, testing_target = \\
+            train_test_split(features, tpot_data['target'].values, random_state=None)
+
+exported_pipeline = KNeighborsClassifier(n_neighbors=10, p=1, weights="uniform")
+
+exported_pipeline.fit(training_features, training_target)
+results = exported_pipeline.predict(testing_features)
+"""
+    assert expected_code == tpot_obj.export()
+
+
 def test_generate_pipeline_code():
     """Assert that generate_pipeline_code() returns the correct code given a specific pipeline."""
 
@@ -559,7 +591,7 @@ def test_pipeline_score_save():
 training_features, testing_features, training_target, testing_target = \\
             train_test_split(features, tpot_data['target'].values, random_state=None)
 
-# Average CV score on the training set was:0.929813743
+# Average CV score on the training set was: 0.929813743
 exported_pipeline = make_pipeline(
     SelectPercentile(score_func=f_classif, percentile=20),
     DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5)

From 07852f1404364b354d075b76908bec5cecf6054e Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Mon, 4 Nov 2019 11:05:05 -0500
Subject: [PATCH 23/44] documents for new generations parameter #941

---
 docs_sources/api.md   | 10 +++++-----
 docs_sources/using.md |  6 +++---
 tests/driver_tests.py | 24 +++++++++++++++++++++++-
 tpot/base.py          |  9 ++++++---
 tpot/driver.py        | 38 +++++++++++++++++++++++++++++++++-----
 5 files changed, 70 insertions(+), 17 deletions(-)

diff --git a/docs_sources/api.md b/docs_sources/api.md
index e4978b19..a1106417 100644
--- a/docs_sources/api.md
+++ b/docs_sources/api.md
@@ -32,9 +32,9 @@ Read more in the [User Guide](using/#tpot-with-code).
 <tr>
 <td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Parameters:</strong></td>
 <td width="80%" style="background:white;">
-<strong>generations</strong>: int, optional (default=100)
+<strong>generations</strong>: int or None optional (default=100)
 <blockquote>
-Number of iterations to the run pipeline optimization process. Must be a positive number or None. If None, the parameter <em>max_time_mins</em> must be defined as the runtime limit.
+Number of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter <em>max_time_mins</em> must be defined as the runtime limit.
 <br /><br />
 Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.
 <br /><br />
@@ -524,9 +524,9 @@ Read more in the [User Guide](using/#tpot-with-code).
 <tr>
 <td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Parameters:</strong></td>
 <td width="80%" style="background:white;">
-<strong>generations</strong>: int, optional (default=100)
+<strong>generations</strong>: int or None, optional (default=100)
 <blockquote>
-Number of iterations to the run pipeline optimization process. Must be a positive number.
+Number of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter <em>max_time_mins</em> must be defined as the runtime limit.
 <br /><br />
 Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.
 <br /><br />
@@ -608,7 +608,7 @@ Setting <em>n_jobs</em>=-1 will use as many cores as available on the computer.
 <blockquote>
 How many minutes TPOT has to optimize the pipeline.
 <br /><br />
-If not None, this setting will override the <em>generations</em> parameter and allow TPOT to run until <em>max_time_mins</em> minutes elapse.
+If not None, this setting will allow TPOT to run until <em>max_time_mins</em> minutes elapsed and then stop. TPOT will stop earlier if <em>generations</em> is set and all generations are already evaluated.
 </blockquote>
 
 <strong>max_eval_time_mins</strong>: float, optional (default=5)
diff --git a/docs_sources/using.md b/docs_sources/using.md
index 41d3dfa2..a8a55785 100644
--- a/docs_sources/using.md
+++ b/docs_sources/using.md
@@ -170,8 +170,8 @@ Detailed descriptions of the command-line arguments are below.
 <tr>
 <td>-g</td>
 <td>GENERATIONS</td>
-<td>Any positive integer</td>
-<td>Number of iterations to run the pipeline optimization process. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.
+<td>Any positive integer or None</td>
+<td>Number of iterations to run the pipeline optimization process. It must be a positive number or None. If None, the parameter max_time_mins must be defined as the runtime limit. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.
 <br /><br />
 TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total.</td>
 </tr>
@@ -248,7 +248,7 @@ Assigning this to -1 will use as many cores as available on the computer. For n_
 <td>Any positive integer</td>
 <td>How many minutes TPOT has to optimize the pipeline.
 <br /><br />
-If provided, this setting will override the "generations" parameter and allow TPOT to run until it runs out of time.</td>
+How many minutes TPOT has to optimize the pipeline.If not None, this setting will allow TPOT to run until max_time_mins minutes elapsed and then stop. TPOT will stop earlier if generationsis set and all generations are already evaluated.</td>
 </tr>
 <tr>
 <td>-maxeval</td>
diff --git a/tests/driver_tests.py b/tests/driver_tests.py
index 99cebb8f..5532d960 100644
--- a/tests/driver_tests.py
+++ b/tests/driver_tests.py
@@ -38,7 +38,9 @@
 import pandas as pd
 import sklearn
 
-from tpot.driver import positive_integer, float_range, _get_arg_parser, _print_args, _read_data_file, load_scoring_function, tpot_driver
+from tpot.driver import positive_integer, float_range, _get_arg_parser, \
+    _print_args, _read_data_file, load_scoring_function, tpot_driver, \
+    positive_integer_or_none
 from nose.tools import assert_raises, assert_equal, assert_in
 from unittest import TestCase
 
@@ -359,6 +361,26 @@ def test_positive_integer_3():
     """Assert that the TPOT CLI interface's integer parsing throws an exception when n is not an integer."""
     assert_raises(Exception, positive_integer, 'foobar')
 
+def test_positive_integer_or_none():
+    """Assert that the TPOT CLI interface's positive_integer_or_none parsing throws an exception when n < 0."""
+    assert_raises(Exception, positive_integer_or_none, '-1')
+
+
+def test_positive_integer_or_none_2():
+    """Assert that the TPOT CLI interface's positive_integer_or_none parsing returns the integer value of a string encoded integer when n > 0."""
+    assert 1 == positive_integer_or_none('1')
+
+
+def test_positive_integer_or_none_3():
+    """Assert that the TPOT CLI interface's positive_integer_or_none parsing throws an exception when n is not an integer and not None."""
+    assert_raises(Exception, positive_integer_or_none, 'foobar')
+
+
+def test_positive_integer_or_none_4():
+    """Assert that the TPOT CLI interface's positive_integer_or_none parsing return None when value is string 'None' or 'none'."""
+    assert positive_integer_or_none('none') is None
+    assert positive_integer_or_none('None') is None
+
 
 def test_float_range():
     """Assert that the TPOT CLI interface's float range returns a float with input is in 0. - 1.0."""
diff --git a/tpot/base.py b/tpot/base.py
index 8be71246..865f0620 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -119,8 +119,10 @@ def __init__(self, generations=100, population_size=100, offspring_size=None,
 
         Parameters
         ----------
-        generations: int, optional (default: 100)
+        generations: int or None, optional (default: 100)
             Number of iterations to the run pipeline optimization process.
+            It must be a positive number or None. If None, the parameter 
+            max_time_mins must be defined as the runtime limit.
             Generally, TPOT will work better when you give it more generations (and
             therefore time) to optimize the pipeline. TPOT will evaluate
             POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total.
@@ -182,8 +184,9 @@ def __init__(self, generations=100, population_size=100, offspring_size=None,
             Thus for n_jobs = -2, all CPUs but one are used.
         max_time_mins: int, optional (default: None)
             How many minutes TPOT has to optimize the pipeline.
-            If provided, this setting will override the "generations" parameter and allow
-            TPOT to run until it runs out of time.
+            If not None, this setting will allow TPOT to run until max_time_mins minutes
+            elapsed and then stop. TPOT will stop earlier if generationsis set and all
+            generations are already evaluated.
         max_eval_time_mins: float, optional (default: 5)
             How many minutes TPOT has to optimize a single pipeline.
             Setting this parameter to higher values will allow TPOT to explore more
diff --git a/tpot/driver.py b/tpot/driver.py
index 09e60f5e..480fe251 100755
--- a/tpot/driver.py
+++ b/tpot/driver.py
@@ -42,7 +42,7 @@ def positive_integer(value):
 
     Parameters
     ----------
-    value: int
+    value: string
         The number to evaluate
 
     Returns
@@ -59,6 +59,31 @@ def positive_integer(value):
     return value
 
 
+def positive_integer_or_none(value):
+    """Ensure that the provided value is a positive integer or None.
+
+    Parameters
+    ----------
+    value: string
+        The number to evaluate
+
+    Returns
+    -------
+    value: int or None
+        Returns a positive integer or None
+    """
+    if value.lower() == 'none':
+        value = None
+    else:
+        try:
+            value = int(value)
+        except Exception:
+            raise argparse.ArgumentTypeError('Invalid int value: \'{}\''.format(value))
+        if value < 0:
+            raise argparse.ArgumentTypeError('Invalid positive int value: \'{}\''.format(value))
+    return value
+
+
 def float_range(value):
     """Ensure that the provided value is a float integer in the range [0., 1.].
 
@@ -152,9 +177,11 @@ def _get_arg_parser():
         action='store',
         dest='GENERATIONS',
         default=100,
-        type=positive_integer,
+        type=positive_integer_or_none,
         help=(
             'Number of iterations to run the pipeline optimization process. '
+            'It must be a positive number or None. If None, the parameter '
+            'max_time_mins must be defined as the runtime limit. '
             'Generally, TPOT will work better when you give it more '
             'generations (and therefore time) to optimize the pipeline. TPOT '
             'will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE '
@@ -308,9 +335,10 @@ def _get_arg_parser():
         default=None,
         type=int,
         help=(
-            'How many minutes TPOT has to optimize the pipeline. This setting '
-            'will override the GENERATIONS parameter and allow TPOT to run '
-            'until it runs out of time.'
+            'How many minutes TPOT has to optimize the pipeline. '
+            'If not None, this setting will allow TPOT to run until max_time_mins minutes '
+            'elapsed and then stop. TPOT will stop earlier if generationsis set and all '
+            'generations are already evaluated. '
         )
     )
 

From 4391536db086dbb03a816a75bc55659db567c8a2 Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Mon, 4 Nov 2019 12:46:51 -0500
Subject: [PATCH 24/44] add Stochastic Gradient Descent into default tpot
 config

---
 tpot/config/classifier.py | 11 +++++++++++
 tpot/config/regressor.py  | 11 +++++++++++
 2 files changed, 22 insertions(+)

diff --git a/tpot/config/classifier.py b/tpot/config/classifier.py
index 1441e3f6..9f1e9f2a 100644
--- a/tpot/config/classifier.py
+++ b/tpot/config/classifier.py
@@ -107,6 +107,17 @@
         'nthread': [1]
     },
 
+    'sklearn.linear_model.SGDClassifier': {
+        'loss': ['log', 'hinge', 'modified_huber', 'squared_hinge', 'perceptron'],
+        'penalty': ['elasticnet'],
+        'alpha': [0.0, 0.01, 0.001],
+        'learning_rate': ['invscaling', 'constant'],
+        'fit_intercept': [True, False],
+        'l1_ratio': [0.25, 0.0, 1.0, 0.75, 0.5],
+        'eta0': [0.1, 1.0, 0.01],
+        'power_t': [0.5, 0.0, 1.0, 0.1, 100.0, 10.0, 50.0]
+    },
+
     # Preprocesssors
     'sklearn.preprocessing.Binarizer': {
         'threshold': np.arange(0.0, 1.01, 0.05)
diff --git a/tpot/config/regressor.py b/tpot/config/regressor.py
index 6c7aa3da..33ec7478 100644
--- a/tpot/config/regressor.py
+++ b/tpot/config/regressor.py
@@ -105,6 +105,17 @@
         'objective': ['reg:squarederror']
     },
 
+    'sklearn.linear_model.SGDRegressor': {
+        'loss': ['squared_loss', 'huber', 'epsilon_insensitive'],
+        'penalty': ['elasticnet'],
+        'alpha': [0.0, 0.01, 0.001] ,
+        'learning_rate': ['invscaling', 'constant'] ,
+        'fit_intercept': [True, False],
+        'l1_ratio': [0.25, 0.0, 1.0, 0.75, 0.5],
+        'eta0': [0.1, 1.0, 0.01],
+        'power_t': [0.5, 0.0, 1.0, 0.1, 100.0, 10.0, 50.0]
+    },
+
     # Preprocesssors
     'sklearn.preprocessing.Binarizer': {
         'threshold': np.arange(0.0, 1.01, 0.05)

From b91c22aec7797c24155a2941af685925cf3c145b Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Mon, 4 Nov 2019 14:56:50 -0500
Subject: [PATCH 25/44] refine random state in exported codes #933

---
 tests/export_tests.py | 80 ++++++++++++++++++++++++++++++++++++++++---
 tests/tpot_tests.py   | 61 +--------------------------------
 tpot/base.py          | 45 ++++--------------------
 tpot/export_utils.py  | 62 +++++++++++++++++++++++++++++----
 4 files changed, 138 insertions(+), 110 deletions(-)

diff --git a/tests/export_tests.py b/tests/export_tests.py
index ab153a2a..5b715e30 100644
--- a/tests/export_tests.py
+++ b/tests/export_tests.py
@@ -28,7 +28,8 @@
 from os import remove, path
 
 from tpot import TPOTClassifier, TPOTRegressor
-from tpot.export_utils import export_pipeline, generate_import_code, _indent, generate_pipeline_code, get_by_name
+from tpot.export_utils import export_pipeline, generate_import_code, _indent, \
+    generate_pipeline_code, get_by_name, set_param_recursive
 from tpot.operator_utils import TPOTOperatorClassFactory
 from tpot.config.classifier import classifier_config_dict
 
@@ -70,6 +71,7 @@ def test_export_random_ind():
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.naive_bayes import BernoulliNB
+from tpot.export_utils import set_param_recursive
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
@@ -78,11 +80,15 @@ def test_export_random_ind():
             train_test_split(features, tpot_data['target'].values, random_state=39)
 
 exported_pipeline = BernoulliNB(alpha=1.0, fit_prior=False)
+# Fix random state for all the steps in exported pipeline
+set_param_recursive(exported_pipeline.steps, 'random_state', 39)
 
 exported_pipeline.fit(training_features, training_target)
 results = exported_pipeline.predict(testing_features)
 """
-    assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, random_state=tpot_obj.random_state)
+    exported_code = export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, random_state=tpot_obj.random_state)
+
+    assert expected_code == exported_code
 
 
 def test_export():
@@ -493,6 +499,7 @@ def test_export_pipeline_6():
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
+from tpot.export_utils import set_param_recursive
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('test_path', sep='COLUMN_SEPARATOR', dtype=np.float64)
@@ -501,13 +508,17 @@ def test_export_pipeline_6():
             train_test_split(features, tpot_data['target'].values, random_state=42)
 
 exported_pipeline = KNeighborsClassifier(n_neighbors=10, p=1, weights="uniform")
+# Fix random state for all the steps in exported pipeline
+set_param_recursive(exported_pipeline.steps, 'random_state', 42)
 
 exported_pipeline.fit(training_features, training_target)
 results = exported_pipeline.predict(testing_features)
 """
-    assert expected_code == export_pipeline(pipeline, tpot_obj.operators,
-                                            tpot_obj._pset, random_state=42,
-                                            data_file_path='test_path')
+    exported_code = export_pipeline(pipeline, tpot_obj.operators,
+                                    tpot_obj._pset, random_state=42,
+                                    data_file_path='test_path')
+
+    assert expected_code == exported_code
 
 
 def test_operator_export():
@@ -657,3 +668,62 @@ def test_imputer_in_export():
 """
 
     assert_equal(export_code, expected_code)
+
+
+def test_set_param_recursive():
+    tpot_obj = TPOTClassifier()
+    tpot_obj._fit_init()
+    """Assert that _set_param_recursive sets \"random_state\" to 42 in all steps in a simple pipeline."""
+    pipeline_string = (
+        'DecisionTreeClassifier(PCA(input_matrix, PCA__iterated_power=5, PCA__svd_solver=randomized), '
+        'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8, '
+        'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
+    )
+
+    deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
+    sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
+    set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)
+    # assert "random_state" of PCA at step 1
+    assert getattr(sklearn_pipeline.steps[0][1], 'random_state') == 42
+    # assert "random_state" of DecisionTreeClassifier at step 2
+    assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
+
+
+def test_set_param_recursive_2():
+    """Assert that set_param_recursive sets \"random_state\" to 42 in nested estimator in SelectFromModel."""
+    pipeline_string = (
+        'DecisionTreeRegressor(SelectFromModel(input_matrix, '
+        'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, '
+        'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,'
+        'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)'
+    )
+    tpot_obj = TPOTRegressor()
+    tpot_obj._fit_init()
+    deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
+    sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
+    set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)
+
+    assert getattr(getattr(sklearn_pipeline.steps[0][1], 'estimator'), 'random_state') == 42
+    assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
+
+
+def test_set_param_recursive_3():
+    """Assert that set_param_recursive sets \"random_state\" to 42 in nested estimator in StackingEstimator in a complex pipeline."""
+    pipeline_string = (
+        'DecisionTreeClassifier(CombineDFs('
+        'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, '
+        'DecisionTreeClassifier__max_depth=8, DecisionTreeClassifier__min_samples_leaf=5,'
+        'DecisionTreeClassifier__min_samples_split=5),input_matrix) '
+        'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8, '
+        'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
+    )
+    tpot_obj = TPOTClassifier()
+    tpot_obj._fit_init()
+
+    deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
+    sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
+    set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)
+
+    # StackingEstimator under the transformer_list of FeatureUnion
+    assert getattr(getattr(sklearn_pipeline.steps[0][1].transformer_list[0][1], 'estimator'), 'random_state') == 42
+    assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
diff --git a/tests/tpot_tests.py b/tests/tpot_tests.py
index d7414c85..490628e4 100644
--- a/tests/tpot_tests.py
+++ b/tests/tpot_tests.py
@@ -556,7 +556,7 @@ def test_score_3():
     """Assert that the TPOTRegressor score function outputs a known score for a fixed pipeline."""
     tpot_obj = TPOTRegressor(scoring='neg_mean_squared_error', random_state=72)
     tpot_obj._fit_init()
-    known_score = -11.682841148312662
+    known_score = -11.708199875921563
 
     # Reify pipeline with known score
     pipeline_string = (
@@ -576,7 +576,6 @@ def test_score_3():
 
     # Get score from TPOT
     score = tpot_obj.score(testing_features_r, testing_target_r)
-
     assert np.allclose(known_score, score)
 
 
@@ -1332,61 +1331,6 @@ def test_summary_of_best_pipeline():
     assert_raises(RuntimeError, tpot_obj._summary_of_best_pipeline, features=training_features, target=training_target)
 
 
-def test_set_param_recursive():
-    """Assert that _set_param_recursive sets \"random_state\" to 42 in all steps in a simple pipeline."""
-    pipeline_string = (
-        'DecisionTreeClassifier(PCA(input_matrix, PCA__iterated_power=5, PCA__svd_solver=randomized), '
-        'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8, '
-        'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
-    )
-
-    deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
-    sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
-    tpot_obj._set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)
-    # assert "random_state" of PCA at step 1
-    assert getattr(sklearn_pipeline.steps[0][1], 'random_state') == 42
-    # assert "random_state" of DecisionTreeClassifier at step 2
-    assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
-
-
-def test_set_param_recursive_2():
-    """Assert that _set_param_recursive sets \"random_state\" to 42 in nested estimator in SelectFromModel."""
-    pipeline_string = (
-        'DecisionTreeRegressor(SelectFromModel(input_matrix, '
-        'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, '
-        'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,'
-        'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)'
-    )
-    tpot_obj = TPOTRegressor()
-    tpot_obj._fit_init()
-    deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
-    sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
-    tpot_obj._set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)
-
-    assert getattr(getattr(sklearn_pipeline.steps[0][1], 'estimator'), 'random_state') == 42
-    assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
-
-
-def test_set_param_recursive_3():
-    """Assert that _set_param_recursive sets \"random_state\" to 42 in nested estimator in StackingEstimator in a complex pipeline."""
-    pipeline_string = (
-        'DecisionTreeClassifier(CombineDFs('
-        'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, '
-        'DecisionTreeClassifier__max_depth=8, DecisionTreeClassifier__min_samples_leaf=5,'
-        'DecisionTreeClassifier__min_samples_split=5),input_matrix) '
-        'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8, '
-        'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
-    )
-
-    deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
-    sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
-    tpot_obj._set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)
-
-    # StackingEstimator under the transformer_list of FeatureUnion
-    assert getattr(getattr(sklearn_pipeline.steps[0][1].transformer_list[0][1], 'estimator'), 'random_state') == 42
-    assert getattr(sklearn_pipeline.steps[1][1], 'random_state') == 42
-
-
 def test_evaluated_individuals_():
     """Assert that evaluated_individuals_ stores current pipelines and their CV scores."""
     tpot_obj = TPOTClassifier(
@@ -1402,7 +1346,6 @@ def test_evaluated_individuals_():
     for pipeline_string in sorted(tpot_obj.evaluated_individuals_.keys()):
         deap_pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
         sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
-        tpot_obj._set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)
         operator_count = tpot_obj._operator_count(deap_pipeline)
 
         try:
@@ -1450,7 +1393,6 @@ def pareto_eq(ind1, ind2):
     for deap_pipeline, fitness_score in zip(pop, fitness_scores):
         operator_count = tpot_obj._operator_count(deap_pipeline)
         sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
-        tpot_obj._set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)
 
         try:
             cv_scores = cross_val_score(sklearn_pipeline, training_features, training_target, cv=5, scoring='accuracy', verbose=0)
@@ -1485,7 +1427,6 @@ def pareto_eq(ind1, ind2):
     for deap_pipeline, fitness_score in zip(pop, fitness_scores):
         operator_count = tpot_obj._operator_count(deap_pipeline)
         sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
-        tpot_obj._set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)
 
         try:
             cv_scores = cross_val_score(sklearn_pipeline, training_features, training_target, cv=5, scoring='accuracy', verbose=0)
diff --git a/tpot/base.py b/tpot/base.py
index 865f0620..61ec6c39 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -63,7 +63,7 @@
 
 from ._version import __version__
 from .operator_utils import TPOTOperatorClassFactory, Operator, ARGType
-from .export_utils import export_pipeline, expr_to_tree, generate_pipeline_code
+from .export_utils import export_pipeline, expr_to_tree, generate_pipeline_code, set_param_recursive
 from .decorators import _pre_test
 from .builtins import CombineDFs, StackingEstimator
 
@@ -121,7 +121,7 @@ def __init__(self, generations=100, population_size=100, offspring_size=None,
         ----------
         generations: int or None, optional (default: 100)
             Number of iterations to the run pipeline optimization process.
-            It must be a positive number or None. If None, the parameter 
+            It must be a positive number or None. If None, the parameter
             max_time_mins must be defined as the runtime limit.
             Generally, TPOT will work better when you give it more generations (and
             therefore time) to optimize the pipeline. TPOT will evaluate
@@ -1065,7 +1065,7 @@ def _save_periodic_pipeline(self, gen):
                                             self.operators, self._pset,
                                             self._imputed, pareto_front_pipeline_score,
                                             self.random_state)
-                # dont export a pipeline you  had
+                # dont export a pipeline you had
                 if self._exported_pipeline_text.count(sklearn_pipeline_str):
                     self._update_pbar(pbar_num=0, pbar_msg='Periodic pipeline was not saved, probably saved before...')
                 else:
@@ -1146,6 +1146,7 @@ def _impute_values(self, features):
 
         return self._fitted_imputer.transform(features)
 
+
     def _check_dataset(self, features, target, sample_weight=None):
         """Check if a dataset has a valid feature set and labels.
 
@@ -1232,35 +1233,11 @@ def _compile_to_sklearn(self, expr):
         sklearn_pipeline_str = generate_pipeline_code(expr_to_tree(expr, self._pset), self.operators)
         sklearn_pipeline = eval(sklearn_pipeline_str, self.operators_context)
         sklearn_pipeline.memory = self._memory
+        if self.random_state:
+            # Fix random state when the operator allows
+            set_param_recursive(sklearn_pipeline.steps, 'random_state', self.random_state)
         return sklearn_pipeline
 
-    def _set_param_recursive(self, pipeline_steps, parameter, value):
-        """Recursively iterate through all objects in the pipeline and set a given parameter.
-
-        Parameters
-        ----------
-        pipeline_steps: array-like
-            List of (str, obj) tuples from a scikit-learn pipeline or related object
-        parameter: str
-            The parameter to assign a value for in each pipeline object
-        value: any
-            The value to assign the parameter to in each pipeline object
-        Returns
-        -------
-        None
-
-        """
-        for (_, obj) in pipeline_steps:
-            recursive_attrs = ['steps', 'transformer_list', 'estimators']
-            for attr in recursive_attrs:
-                if hasattr(obj, attr):
-                    self._set_param_recursive(getattr(obj, attr), parameter, value)
-            if hasattr(obj, 'estimator'):  # nested estimator
-                est = getattr(obj, 'estimator')
-                if hasattr(est, parameter):
-                    setattr(est, parameter, value)
-            if hasattr(obj, parameter):
-                setattr(obj, parameter, value)
 
     def _stop_by_max_time_mins(self):
         """Stop optimization process once maximum minutes have elapsed."""
@@ -1479,14 +1456,6 @@ def _preprocess_individuals(self, individuals):
                     # Transform the tree expression into an sklearn pipeline
                     sklearn_pipeline = self._toolbox.compile(expr=individual)
 
-                    # Fix random state when the operator allows
-                    self._set_param_recursive(sklearn_pipeline.steps, 'random_state', 42)
-                    # Setting the seed is needed for XGBoost support because XGBoost currently stores
-                    # both a seed and random_state, and they're not synced correctly.
-                    # XGBoost will raise an exception if random_state != seed.
-                    if 'XGB' in sklearn_pipeline_str:
-                        self._set_param_recursive(sklearn_pipeline.steps, 'seed', 42)
-
                     # Count the number of pipeline operators as a measure of pipeline complexity
                     operator_count = self._operator_count(individual)
                     operator_counts[individual_str] = max(1, operator_count)
diff --git a/tpot/export_utils.py b/tpot/export_utils.py
index 0260a384..b2e373f1 100644
--- a/tpot/export_utils.py
+++ b/tpot/export_utils.py
@@ -69,7 +69,7 @@ def export_pipeline(exported_pipeline,
     impute: bool (False):
         If impute = True, then adda a imputation step.
     random_state: integer
-        Random seed in train_test_split function.
+        Random seed in train_test_split function and exported pipeline.
     data_file_path: string (default: '')
         By default, the path of input dataset is 'PATH/TO/DATA/FILE' by default.
         If data_file_path is another string, the path will be replaced.
@@ -84,9 +84,9 @@ def export_pipeline(exported_pipeline,
     pipeline_tree = expr_to_tree(exported_pipeline, pset)
 
     # Have the exported code import all of the necessary modules and functions
-    pipeline_text = generate_import_code(exported_pipeline, operators, impute)
+    pipeline_text = generate_import_code(exported_pipeline, operators, impute, random_state)
 
-    pipeline_code = pipeline_code_wrapper(generate_export_pipeline_code(pipeline_tree, operators))
+    pipeline_code = pipeline_code_wrapper(generate_export_pipeline_code(pipeline_tree, operators), random_state)
 
     if pipeline_code.count("FunctionTransformer(copy)"):
         pipeline_text += """from sklearn.preprocessing import FunctionTransformer
@@ -165,7 +165,7 @@ def prim_to_list(prim, args):
     return tree
 
 
-def generate_import_code(pipeline, operators, impute=False):
+def generate_import_code(pipeline, operators, impute=False, random_state=None):
     """Generate all library import calls for use in TPOT.export().
 
     Parameters
@@ -176,6 +176,8 @@ def generate_import_code(pipeline, operators, impute=False):
         List of operator class from operator library
     impute : bool
         Whether to impute new values in the feature set.
+    random_state: integer or None
+        Random seed in train_test_split function and exported pipeline.
 
     Returns
     -------
@@ -220,6 +222,9 @@ def merge_imports(old_dict, new_dict):
 except ImportError:
     from sklearn.preprocessing import Imputer
 """
+    if random_state is not None:
+        pipeline_text += """from tpot.export_utils import set_param_recursive
+"""
 
     return pipeline_text
 
@@ -256,24 +261,38 @@ def _starting_imports(operators, operators_used):
         }
 
 
-def pipeline_code_wrapper(pipeline_code):
+def pipeline_code_wrapper(pipeline_code, random_state=None):
     """Generate code specific to the execution of the sklearn pipeline.
 
     Parameters
     ----------
     pipeline_code: str
         Code that defines the final sklearn pipeline
+    random_state: integer or None
+        Random seed in train_test_split function and exported pipeline.
 
     Returns
     -------
-    Source code for the sklearn pipeline and calls to fit and predict
+    exported_code: str
+        Source code for the sklearn pipeline and calls to fit and predict
 
     """
-    return """exported_pipeline = {}
+    if random_state is None:
+        exported_code = """exported_pipeline = {}
 
 exported_pipeline.fit(training_features, training_target)
 results = exported_pipeline.predict(testing_features)
 """.format(pipeline_code)
+    else:
+        exported_code = """exported_pipeline = {}
+# Fix random state for all the steps in exported pipeline
+set_param_recursive(exported_pipeline.steps, 'random_state', {})
+
+exported_pipeline.fit(training_features, training_target)
+results = exported_pipeline.predict(testing_features)
+""".format(pipeline_code, random_state)
+
+    return exported_code
 
 
 def generate_pipeline_code(pipeline_tree, operators):
@@ -390,3 +409,32 @@ def _make_branch(branch):
 
     return "make_union(\n{},\n{}\n)".\
         format(_indent(_make_branch(left), 4), _indent(_make_branch(right), 4))
+
+
+def set_param_recursive(pipeline_steps, parameter, value):
+    """Recursively iterate through all objects in the pipeline and set a given parameter.
+
+    Parameters
+    ----------
+    pipeline_steps: array-like
+        List of (str, obj) tuples from a scikit-learn pipeline or related object
+    parameter: str
+        The parameter to assign a value for in each pipeline object
+    value: any
+        The value to assign the parameter to in each pipeline object
+    Returns
+    -------
+    None
+
+    """
+    for (_, obj) in pipeline_steps:
+        recursive_attrs = ['steps', 'transformer_list', 'estimators']
+        for attr in recursive_attrs:
+            if hasattr(obj, attr):
+                set_param_recursive(getattr(obj, attr), parameter, value)
+        if hasattr(obj, 'estimator'):  # nested estimator
+            est = getattr(obj, 'estimator')
+            if hasattr(est, parameter):
+                setattr(est, parameter, value)
+        if hasattr(obj, parameter):
+            setattr(obj, parameter, value)

From 1bf37bdda630fb5d6f3a77653e12110d526558ca Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Mon, 4 Nov 2019 15:55:51 -0500
Subject: [PATCH 26/44] update examples for new exported codes format #933 and
 correct digits dataset names #943

---
 README.md                                     | 62 +++++++-----
 docs/api/index.html                           |  2 +-
 docs/examples/index.html                      |  4 +-
 docs/search/search_index.json                 |  2 +-
 docs/using/index.html                         |  6 +-
 docs_sources/api.md                           |  2 +-
 docs_sources/examples.md                      | 98 +++++++++++--------
 docs_sources/using.md                         |  6 +-
 tests/export_tests.py                         | 44 ++++-----
 tests/tpot_tests.py                           |  8 +-
 tpot/base.py                                  |  3 +-
 tpot/export_utils.py                          |  4 +-
 tutorials/{MNIST.ipynb => Digits.ipynb}       |  4 +-
 .../MAGIC Gamma Telescope.ipynb               |  4 +-
 .../tpot_MAGIC_Gamma_Telescope_pipeline.py    |  4 +-
 .../Portuguese Bank Marketing Strategy.ipynb  |  4 +-
 .../tpot_marketing_pipeline.py                |  4 +-
 17 files changed, 150 insertions(+), 111 deletions(-)
 rename tutorials/{MNIST.ipynb => Digits.ipynb} (98%)

diff --git a/README.md b/README.md
index 911b64c8..40251369 100644
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ Click on the corresponding links to find more information on TPOT usage in the d
 
 ### Classification
 
-Below is a minimal working example with the practice MNIST data set.
+Below is a minimal working example with the the optical recognition of handwritten digits dataset.
 
 ```python
 from tpot import TPOTClassifier
@@ -64,32 +64,43 @@ from sklearn.model_selection import train_test_split
 
 digits = load_digits()
 X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,
-                                                    train_size=0.75, test_size=0.25)
+                                                    train_size=0.75, test_size=0.25, random_state=42)
 
-tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2)
+tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=42)
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
-tpot.export('tpot_mnist_pipeline.py')
+tpot.export('tpot_digits_pipeline.py')
 ```
 
-Running this code should discover a pipeline that achieves about 98% testing accuracy, and the corresponding Python code should be exported to the `tpot_mnist_pipeline.py` file and look similar to the following:
+Running this code should discover a pipeline that achieves about 98% testing accuracy, and the corresponding Python code should be exported to the `tpot_digits_pipeline.py` file and look similar to the following:
 
 ```python
 import numpy as np
 import pandas as pd
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
-from sklearn.neighbors import KNeighborsClassifier
+from sklearn.pipeline import make_pipeline, make_union
+from sklearn.preprocessing import PolynomialFeatures
+from tpot.builtins import StackingEstimator
+from tpot.export_utils import set_param_recursive
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = tpot_data.drop('target', axis=1).values
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
-            train_test_split(features, tpot_data['target'].values, random_state=None)
-
-
-exported_pipeline = KNeighborsClassifier(n_neighbors=6, weights="distance")
-
-exported_pipeline.fit(training_features, training_classes)
+            train_test_split(features, tpot_data['target'], random_state=42)
+
+# Average CV score on the training set was: 0.9799428471757372
+exported_pipeline = make_pipeline(
+    PolynomialFeatures(degree=2, include_bias=False, interaction_only=False),
+    StackingEstimator(estimator=LogisticRegression(C=0.1, dual=False, penalty="l1")),
+    RandomForestClassifier(bootstrap=True, criterion="entropy", max_features=0.35000000000000003, min_samples_leaf=20, min_samples_split=19, n_estimators=100)
+)
+# Fix random state for all the steps in exported pipeline
+set_param_recursive(exported_pipeline.steps, 'random_state', 42)
+
+exported_pipeline.fit(training_features, training_target)
 results = exported_pipeline.predict(testing_features)
 ```
 
@@ -104,9 +115,9 @@ from sklearn.model_selection import train_test_split
 
 housing = load_boston()
 X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target,
-                                                    train_size=0.75, test_size=0.25)
+                                                    train_size=0.75, test_size=0.25, random_state=42)
 
-tpot = TPOTRegressor(generations=5, population_size=20, verbosity=2)
+tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2, random_state=42)
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
 tpot.export('tpot_boston_pipeline.py')
@@ -117,20 +128,27 @@ which should result in a pipeline that achieves about 12.77 mean squared error (
 ```python
 import numpy as np
 import pandas as pd
-from sklearn.ensemble import GradientBoostingRegressor
+from sklearn.ensemble import ExtraTreesRegressor
 from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import PolynomialFeatures
+from tpot.export_utils import set_param_recursive
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = tpot_data.drop('target', axis=1).values
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
-            train_test_split(features, tpot_data['target'].values, random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=42)
 
-exported_pipeline = GradientBoostingRegressor(alpha=0.85, learning_rate=0.1, loss="ls",
-                                              max_features=0.9, min_samples_leaf=5,
-                                              min_samples_split=6)
+# Average CV score on the training set was: -10.812040755234403
+exported_pipeline = make_pipeline(
+    PolynomialFeatures(degree=2, include_bias=False, interaction_only=False),
+    ExtraTreesRegressor(bootstrap=False, max_features=0.5, min_samples_leaf=2, min_samples_split=3, n_estimators=100)
+)
+# Fix random state for all the steps in exported pipeline
+set_param_recursive(exported_pipeline.steps, 'random_state', 42)
 
-exported_pipeline.fit(training_features, training_classes)
+exported_pipeline.fit(training_features, training_target)
 results = exported_pipeline.predict(testing_features)
 ```
 
diff --git a/docs/api/index.html b/docs/api/index.html
index 58c5f027..1119dfc3 100644
--- a/docs/api/index.html
+++ b/docs/api/index.html
@@ -406,7 +406,7 @@ <h1 id="classification">Classification</h1>
 tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2)
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
-tpot.export('tpot_mnist_pipeline.py')
+tpot.export('tpot_digits_pipeline.py')
 </code></pre>
 
 <p><strong>Functions</strong></p>
diff --git a/docs/examples/index.html b/docs/examples/index.html
index 61db571d..5ceffe11 100644
--- a/docs/examples/index.html
+++ b/docs/examples/index.html
@@ -272,10 +272,10 @@ <h2 id="mnist-digit-recognition">MNIST digit recognition</h2>
 tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2)
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
-tpot.export('tpot_mnist_pipeline.py')
+tpot.export('tpot_digits_pipeline.py')
 </code></pre>
 
-<p>Running this code should discover a pipeline (exported as <code>tpot_mnist_pipeline.py</code>) that achieves about 98% test accuracy:</p>
+<p>Running this code should discover a pipeline (exported as <code>tpot_digits_pipeline.py</code>) that achieves about 98% test accuracy:</p>
 <pre><code class="Python">import numpy as np
 
 from sklearn.model_selection import train_test_split
diff --git a/docs/search/search_index.json b/docs/search/search_index.json
index 2d44b7d7..b0e5bcf8 100644
--- a/docs/search/search_index.json
+++ b/docs/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Consider TPOT your Data Science Assistant . TPOT is a Python Automated Machine Learning tool that optimizes machine learning pipelines using genetic programming. TPOT will automate the most tedious part of machine learning by intelligently exploring thousands of possible pipelines to find the best one for your data. An example machine learning pipeline Once TPOT is finished searching (or you get tired of waiting), it provides you with the Python code for the best pipeline it found so you can tinker with the pipeline from there. An example TPOT pipeline TPOT is built on top of scikit-learn, so all of the code it generates should look familiar... if you're familiar with scikit-learn, anyway. TPOT is still under active development and we encourage you to check back on this repository regularly for updates.","title":"Home"},{"location":"api/","text":"Classification class tpot. TPOTClassifier ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='accuracy', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised classification tasks. The TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTClassifier will also search over the hyperparameters of all objects in the pipeline. By default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters. However, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='accuracy') Function used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used: 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc' If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a StratifiedKFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets. max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTClassifier configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: pareto_front_fitted_pipelines_ is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_mnist_pipeline.py') Functions fit (features, classes[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the classes for a feature set. predict_proba (features) Use the optimized pipeline to estimate the class probabilities for a feature set. score (testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, classes, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. classes : array-like {n_samples} List of class labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the classes for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted classes for the samples in the feature matrix predict_proba(features) Use the optimized pipeline to estimate the class probabilities for a feature set. Note: This function will only work for pipelines whose final classifier supports the predict_proba function. TPOT will raise an error otherwise. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples, n_classes} The class probabilities of the input samples score(testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'accuracy'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_classes : array-like {n_samples} List of class labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything Regression class tpot. TPOTRegressor ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='neg_mean_squared_error', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised regression tasks. The TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTRegressor will also search over the hyperparameters of all objects in the pipeline. By default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters. However, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='neg_mean_squared_error') Function used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used: 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2' Note that we recommend using the neg version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric. If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a KFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTRegressor configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: _pareto_front_fitted_pipelines is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split digits = load_boston() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Functions fit (features, target[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the target values for a feature set. score (testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, target, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. target : array-like {n_samples} List of target labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the target values for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted target values for the samples in the feature matrix score(testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'mean_squared_error'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_target : array-like {n_samples} List of target labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything","title":"TPOT API"},{"location":"api/#classification","text":"class tpot. TPOTClassifier ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='accuracy', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised classification tasks. The TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTClassifier will also search over the hyperparameters of all objects in the pipeline. By default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters. However, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='accuracy') Function used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used: 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc' If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a StratifiedKFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets. max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTClassifier configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: pareto_front_fitted_pipelines_ is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_mnist_pipeline.py') Functions fit (features, classes[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the classes for a feature set. predict_proba (features) Use the optimized pipeline to estimate the class probabilities for a feature set. score (testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, classes, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. classes : array-like {n_samples} List of class labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the classes for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted classes for the samples in the feature matrix predict_proba(features) Use the optimized pipeline to estimate the class probabilities for a feature set. Note: This function will only work for pipelines whose final classifier supports the predict_proba function. TPOT will raise an error otherwise. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples, n_classes} The class probabilities of the input samples score(testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'accuracy'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_classes : array-like {n_samples} List of class labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything","title":"Classification"},{"location":"api/#regression","text":"class tpot. TPOTRegressor ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='neg_mean_squared_error', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised regression tasks. The TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTRegressor will also search over the hyperparameters of all objects in the pipeline. By default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters. However, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='neg_mean_squared_error') Function used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used: 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2' Note that we recommend using the neg version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric. If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a KFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTRegressor configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: _pareto_front_fitted_pipelines is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split digits = load_boston() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Functions fit (features, target[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the target values for a feature set. score (testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, target, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. target : array-like {n_samples} List of target labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the target values for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted target values for the samples in the feature matrix score(testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'mean_squared_error'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_target : array-like {n_samples} List of target labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything","title":"Regression"},{"location":"citing/","text":"If you use TPOT in a scientific publication, please consider citing at least one of the following papers: Randal S. Olson, Ryan J. Urbanowicz, Peter C. Andrews, Nicole A. Lavender, La Creis Kidd, and Jason H. Moore (2016). Automating biomedical data science through tree-based pipeline optimization . Applications of Evolutionary Computation , pages 123-137. BibTeX entry: @inbook{Olson2016EvoBio, author={Olson, Randal S. and Urbanowicz, Ryan J. and Andrews, Peter C. and Lavender, Nicole A. and Kidd, La Creis and Moore, Jason H.}, editor={Squillero, Giovanni and Burelli, Paolo}, chapter={Automating Biomedical Data Science Through Tree-Based Pipeline Optimization}, title={Applications of Evolutionary Computation: 19th European Conference, EvoApplications 2016, Porto, Portugal, March 30 -- April 1, 2016, Proceedings, Part I}, year={2016}, publisher={Springer International Publishing}, pages={123--137}, isbn={978-3-319-31204-0}, doi={10.1007/978-3-319-31204-0_9}, url={http://dx.doi.org/10.1007/978-3-319-31204-0_9} } Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science Randal S. Olson, Nathan Bartley, Ryan J. Urbanowicz, and Jason H. Moore (2016). Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science . Proceedings of GECCO 2016 , pages 485-492. BibTeX entry: @inproceedings{OlsonGECCO2016, author = {Olson, Randal S. and Bartley, Nathan and Urbanowicz, Ryan J. and Moore, Jason H.}, title = {Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science}, booktitle = {Proceedings of the Genetic and Evolutionary Computation Conference 2016}, series = {GECCO '16}, year = {2016}, isbn = {978-1-4503-4206-3}, location = {Denver, Colorado, USA}, pages = {485--492}, numpages = {8}, url = {http://doi.acm.org/10.1145/2908812.2908918}, doi = {10.1145/2908812.2908918}, acmid = {2908918}, publisher = {ACM}, address = {New York, NY, USA}, } Alternatively, you can cite the repository directly with the following DOI:","title":"Citing"},{"location":"contributing/","text":"We welcome you to check the existing issues for bugs or enhancements to work on. If you have an idea for an extension to TPOT, please file a new issue so we can discuss it. Project layout The latest stable release of TPOT is on the master branch , whereas the latest version of TPOT in development is on the development branch . Make sure you are looking at and working on the correct branch if you're looking to contribute code. In terms of directory structure: All of TPOT's code sources are in the tpot directory The documentation sources are in the docs_sources directory Images in the documentation are in the images directory Tutorials for TPOT are in the tutorials directory Unit tests for TPOT are in the tests.py file Make sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the development branch. How to contribute The preferred way to contribute to TPOT is to fork the main repository on GitHub: Fork the project repository : click on the 'Fork' button near the top of the page. This creates a copy of the code under your account on the GitHub server. Clone this copy to your local disk: $ git clone git@github.com:YourUsername/tpot.git $ cd tpot Create a branch to hold your changes: $ git checkout -b my-contribution Make sure your local environment is setup correctly for development. Installation instructions are almost identical to the user instructions except that TPOT should not be installed. If you have TPOT installed on your computer then make sure you are using a virtual environment that does not have TPOT installed. Furthermore, you should make sure you have installed the nose package into your development environment so that you can test changes locally. $ conda install nose Start making changes on your newly created branch, remembering to never work on the master branch! Work on this copy on your computer using Git to do the version control. Once some changes are saved locally, you can use your tweaked version of TPOT by navigating to the project's base directory and running TPOT directly from the command line: $ python -m tpot.driver or by running script that imports and uses the TPOT module with code similar to from tpot import TPOTClassifier To check your changes haven't broken any existing tests and to check new tests you've added pass run the following (note, you must have the nose package installed within your dev environment for this to work): $ nosetests -s -v When you're done editing and local testing, run: $ git add modified_files $ git commit to record your changes in Git, then push them to GitHub with: $ git push -u origin my-contribution Finally, go to the web page of your fork of the TPOT repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. Make sure that you send your PR to the development branch, as the master branch is reserved for the latest stable release. This will start the CI server to check all the project's unit tests run and send an email to the maintainers. (If any of the above seems like magic to you, then look up the Git documentation on the web.) Before submitting your pull request Before you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes. If your contribution changes TPOT in any way: Update the documentation so all of your changes are reflected there. Update the README if anything there has changed. If your contribution involves any code changes: Update the project unit tests to test your code changes. Make sure that your code is properly commented with docstrings and comments explaining your rationale behind non-obvious coding practices. If your code affected any of the pipeline operators, make sure that the corresponding export functionality reflects those changes. If your contribution requires a new library dependency: Double-check that the new dependency is easy to install via pip or Anaconda and supports both Python 2 and 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep TPOT easy to install. Add the required version of the library to .travis.yml Add a line to pip install the library to .travis_install.sh Add a line to print the version of the library to .travis_install.sh Similarly add a line to print the version of the library to .travis_test.sh After submitting your pull request After submitting your pull request, Travis-CI will automatically run unit tests on your changes and make sure that your updated code builds and runs on Python 2 and 3. We also use services that automatically check code quality and test coverage. Check back shortly after submitting your pull request to make sure that your code passes these checks. If any of the checks come back with a red X, then do your best to address the errors.","title":"Contributing"},{"location":"contributing/#project-layout","text":"The latest stable release of TPOT is on the master branch , whereas the latest version of TPOT in development is on the development branch . Make sure you are looking at and working on the correct branch if you're looking to contribute code. In terms of directory structure: All of TPOT's code sources are in the tpot directory The documentation sources are in the docs_sources directory Images in the documentation are in the images directory Tutorials for TPOT are in the tutorials directory Unit tests for TPOT are in the tests.py file Make sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the development branch.","title":"Project layout"},{"location":"contributing/#how-to-contribute","text":"The preferred way to contribute to TPOT is to fork the main repository on GitHub: Fork the project repository : click on the 'Fork' button near the top of the page. This creates a copy of the code under your account on the GitHub server. Clone this copy to your local disk: $ git clone git@github.com:YourUsername/tpot.git $ cd tpot Create a branch to hold your changes: $ git checkout -b my-contribution Make sure your local environment is setup correctly for development. Installation instructions are almost identical to the user instructions except that TPOT should not be installed. If you have TPOT installed on your computer then make sure you are using a virtual environment that does not have TPOT installed. Furthermore, you should make sure you have installed the nose package into your development environment so that you can test changes locally. $ conda install nose Start making changes on your newly created branch, remembering to never work on the master branch! Work on this copy on your computer using Git to do the version control. Once some changes are saved locally, you can use your tweaked version of TPOT by navigating to the project's base directory and running TPOT directly from the command line: $ python -m tpot.driver or by running script that imports and uses the TPOT module with code similar to from tpot import TPOTClassifier To check your changes haven't broken any existing tests and to check new tests you've added pass run the following (note, you must have the nose package installed within your dev environment for this to work): $ nosetests -s -v When you're done editing and local testing, run: $ git add modified_files $ git commit to record your changes in Git, then push them to GitHub with: $ git push -u origin my-contribution Finally, go to the web page of your fork of the TPOT repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. Make sure that you send your PR to the development branch, as the master branch is reserved for the latest stable release. This will start the CI server to check all the project's unit tests run and send an email to the maintainers. (If any of the above seems like magic to you, then look up the Git documentation on the web.)","title":"How to contribute"},{"location":"contributing/#before-submitting-your-pull-request","text":"Before you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes. If your contribution changes TPOT in any way: Update the documentation so all of your changes are reflected there. Update the README if anything there has changed. If your contribution involves any code changes: Update the project unit tests to test your code changes. Make sure that your code is properly commented with docstrings and comments explaining your rationale behind non-obvious coding practices. If your code affected any of the pipeline operators, make sure that the corresponding export functionality reflects those changes. If your contribution requires a new library dependency: Double-check that the new dependency is easy to install via pip or Anaconda and supports both Python 2 and 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep TPOT easy to install. Add the required version of the library to .travis.yml Add a line to pip install the library to .travis_install.sh Add a line to print the version of the library to .travis_install.sh Similarly add a line to print the version of the library to .travis_test.sh","title":"Before submitting your pull request"},{"location":"contributing/#after-submitting-your-pull-request","text":"After submitting your pull request, Travis-CI will automatically run unit tests on your changes and make sure that your updated code builds and runs on Python 2 and 3. We also use services that automatically check code quality and test coverage. Check back shortly after submitting your pull request to make sure that your code passes these checks. If any of the checks come back with a red X, then do your best to address the errors.","title":"After submitting your pull request"},{"location":"examples/","text":"Overview The following sections illustrate the usage of TPOT with various datasets, each belonging to a typical class of machine learning tasks. Dataset Task Task class Dataset description Jupyter notebook Iris flower classification classification link link MNIST digit recognition (image) classification link link Boston housing prices modeling regression link N/A Titanic survival analysis classification link link Bank Marketing subscription prediction classification link link MAGIC Gamma Telescope event detection classification link link Notes: - For details on how the fit() , score() and export() methods work, refer to the usage documentation . - Upon re-running the experiments, your resulting pipelines may differ (to some extent) from the ones demonstrated here. Iris flower classification The following code illustrates how TPOT can be employed for performing a simple classification task over the Iris dataset. from tpot import TPOTClassifier from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import numpy as np iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64), iris.target.astype(np.float64), train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_iris_pipeline.py') Running this code should discover a pipeline (exported as tpot_iris_pipeline.py ) that achieves about 97% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline from sklearn.preprocessing import Normalizer # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = make_pipeline( Normalizer(), GaussianNB() ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) MNIST digit recognition Below is a minimal working example with the practice MNIST dataset, which is an image classification problem . from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_mnist_pipeline.py') Running this code should discover a pipeline (exported as tpot_mnist_pipeline.py ) that achieves about 98% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = KNeighborsClassifier(n_neighbors=6, weights=\"distance\") exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) Boston housing prices modeling The following code illustrates how TPOT can be employed for performing a regression task over the Boston housing prices dataset. from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split housing = load_boston() X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Running this code should discover a pipeline (exported as tpot_boston_pipeline.py ) that achieves at least 10 mean squared error (MSE) on the test set: import numpy as np from sklearn.ensemble import GradientBoostingRegressor from sklearn.model_selection import train_test_split # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = GradientBoostingRegressor(alpha=0.85, learning_rate=0.1, loss=\"ls\", max_features=0.9, min_samples_leaf=5, min_samples_split=6) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) Titanic survival analysis To see the TPOT applied the Titanic Kaggle dataset, see the Jupyter notebook here . This example shows how to take a messy dataset and preprocess it such that it can be used in scikit-learn and TPOT. Portuguese Bank Marketing The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here . MAGIC Gamma Telescope The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here .","title":"Examples"},{"location":"examples/#overview","text":"The following sections illustrate the usage of TPOT with various datasets, each belonging to a typical class of machine learning tasks. Dataset Task Task class Dataset description Jupyter notebook Iris flower classification classification link link MNIST digit recognition (image) classification link link Boston housing prices modeling regression link N/A Titanic survival analysis classification link link Bank Marketing subscription prediction classification link link MAGIC Gamma Telescope event detection classification link link Notes: - For details on how the fit() , score() and export() methods work, refer to the usage documentation . - Upon re-running the experiments, your resulting pipelines may differ (to some extent) from the ones demonstrated here.","title":"Overview"},{"location":"examples/#iris-flower-classification","text":"The following code illustrates how TPOT can be employed for performing a simple classification task over the Iris dataset. from tpot import TPOTClassifier from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import numpy as np iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64), iris.target.astype(np.float64), train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_iris_pipeline.py') Running this code should discover a pipeline (exported as tpot_iris_pipeline.py ) that achieves about 97% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline from sklearn.preprocessing import Normalizer # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = make_pipeline( Normalizer(), GaussianNB() ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)","title":"Iris flower classification"},{"location":"examples/#mnist-digit-recognition","text":"Below is a minimal working example with the practice MNIST dataset, which is an image classification problem . from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_mnist_pipeline.py') Running this code should discover a pipeline (exported as tpot_mnist_pipeline.py ) that achieves about 98% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = KNeighborsClassifier(n_neighbors=6, weights=\"distance\") exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)","title":"MNIST digit recognition"},{"location":"examples/#boston-housing-prices-modeling","text":"The following code illustrates how TPOT can be employed for performing a regression task over the Boston housing prices dataset. from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split housing = load_boston() X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Running this code should discover a pipeline (exported as tpot_boston_pipeline.py ) that achieves at least 10 mean squared error (MSE) on the test set: import numpy as np from sklearn.ensemble import GradientBoostingRegressor from sklearn.model_selection import train_test_split # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = GradientBoostingRegressor(alpha=0.85, learning_rate=0.1, loss=\"ls\", max_features=0.9, min_samples_leaf=5, min_samples_split=6) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)","title":"Boston housing prices modeling"},{"location":"examples/#titanic-survival-analysis","text":"To see the TPOT applied the Titanic Kaggle dataset, see the Jupyter notebook here . This example shows how to take a messy dataset and preprocess it such that it can be used in scikit-learn and TPOT.","title":"Titanic survival analysis"},{"location":"examples/#portuguese-bank-marketing","text":"The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here .","title":"Portuguese Bank Marketing"},{"location":"examples/#magic-gamma-telescope","text":"The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here .","title":"MAGIC Gamma Telescope"},{"location":"installing/","text":"TPOT is built on top of several existing Python libraries, including: NumPy SciPy scikit-learn DEAP update_checker tqdm stopit pandas joblib Most of the necessary Python packages can be installed via the Anaconda Python distribution , which we strongly recommend that you use. We also strongly recommend that you use of Python 3 over Python 2 if you're given the choice. NumPy, SciPy, scikit-learn, pandas and joblib can be installed in Anaconda via the command: conda install numpy scipy scikit-learn pandas joblib DEAP, update_checker, tqdm and stopit can be installed with pip via the command: pip install deap update_checker tqdm stopit For the Windows users , the pywin32 module is required if Python is NOT installed via the Anaconda Python distribution and can be installed with pip for Python verion <=3.3 or conda (e.g. miniconda) for any Python version: conda install pywin32 Optionally , you can install XGBoost if you would like TPOT to use the eXtreme Gradient Boosting models. XGBoost is entirely optional, and TPOT will still function normally without XGBoost if you do not have it installed. Windows users: pip installation may not work on some Windows environments, and it may cause unexpected errors. pip install xgboost If you have issues installing XGBoost, check the XGBoost installation documentation . If you plan to use Dask for parallel training, make sure to install dask[delay] and dask_ml . pip install dask[delayed] dask-ml If you plan to use the TPOT-MDR configuration , make sure to install scikit-mdr and scikit-rebate : pip install scikit-mdr skrebate Finally to install TPOT itself, run the following command: pip install tpot Please file a new issue if you run into installation problems.","title":"Installation"},{"location":"related/","text":"Other Automated Machine Learning (AutoML) tools and related projects: Name Language License Description Auto-WEKA Java GPL-v3 Automated model selection and hyper-parameter tuning for Weka models. auto-sklearn Python BSD-3-Clause An automated machine learning toolkit and a drop-in replacement for a scikit-learn estimator. auto_ml Python MIT Automated machine learning for analytics & production. Supports manual feature type declarations. H2O AutoML Java with Python, Scala & R APIs and web GUI Apache 2.0 Automated: data prep, hyperparameter tuning, random grid search and stacked ensembles in a distributed ML platform. devol Python MIT Automated deep neural network design via genetic programming. MLBox Python BSD-3-Clause Accurate hyper-parameter optimization in high-dimensional space with support for distributed computing. Recipe C GPL-v3 Machine-learning pipeline optimization through genetic programming. Uses grammars to define pipeline structure. Xcessiv Python Apache 2.0 A web-based application for quick, scalable, and automated hyper-parameter tuning and stacked ensembling in Python. GAMA Python Apache 2.0 Machine-learning pipeline optimization through asynchronous evaluation based genetic programming.","title":"Related"},{"location":"releases/","text":"Version 0.9 TPOT now supports sparse matrices with a new built-in TPOT configuration, \"TPOT sparse\". We are using a custom OneHotEncoder implementation that supports missing values and continuous features. We have added an \"early stopping\" option for stopping the optimization process if no improvement is made within a set number of generations. Look up the early_stop parameter to access this functionality. TPOT now reduces the number of duplicated pipelines between generations, which saves you time during the optimization process. TPOT now supports custom scoring functions via the command-line mode. We have added a new optional argument, periodic_checkpoint_folder , that allows TPOT to periodically save the best pipeline so far to a local folder during optimization process. TPOT no longer uses sklearn.externals.joblib when n_jobs=1 to avoid the potential freezing issue that scikit-learn suffers from . We have added pandas as a dependency to read input datasets instead of numpy.recfromcsv . NumPy's recfromcsv function is unable to parse datasets with complex data types. Fixed a bug that DEFAULT in the parameter(s) of nested estimator raises KeyError when exporting pipelines. Fixed a bug related to setting random_state in nested estimators. The issue would happen with pipeline with SelectFromModel ( ExtraTreesClassifier as nested estimator) or StackingEstimator if nested estimator has random_state parameter. Fixed a bug in the missing value imputation function in TPOT to impute along columns instead rows. Refined input checking for sparse matrices in TPOT. Refined the TPOT pipeline mutation operator. Version 0.8 TPOT now detects whether there are missing values in your dataset and replaces them with the median value of the column. TPOT now allows you to set a group parameter in the fit function so you can use the GroupKFold cross-validation strategy. TPOT now allows you to set a subsample ratio of the training instance with the subsample parameter. For example, setting subsample =0.5 tells TPOT to create a fixed subsample of half of the training data for the pipeline optimization process. This parameter can be useful for speeding up the pipeline optimization process, but may give less accurate performance estimates from cross-validation. TPOT now has more built-in configurations , including TPOT MDR and TPOT light, for both classification and regression problems. TPOTClassifier and TPOTRegressor now expose three useful internal attributes, fitted_pipeline_ , pareto_front_fitted_pipelines_ , and evaluated_individuals_ . These attributes are described in the API documentation . Oh, TPOT now has thorough API documentation . Check it out! Fixed a reproducibility issue where setting random_seed didn't necessarily result in the same results every time. This bug was present since TPOT v0.7. Refined input checking in TPOT. Removed Python 2 uncompliant code. Version 0.7 TPOT now has multiprocessing support. TPOT allows you to use multiple processes in parallel to accelerate the pipeline optimization process in TPOT with the n_jobs parameter. TPOT now allows you to customize the operators and parameters considered during the optimization process , which can be accomplished with the new config_dict parameter. The format of this customized dictionary can be found in the online documentation , along with a list of built-in configurations . TPOT now allows you to specify a time limit for evaluating a single pipeline (default limit is 5 minutes) in optimization process with the max_eval_time_mins parameter, so TPOT won't spend hours evaluating overly-complex pipelines. We tweaked TPOT's underlying evolutionary optimization algorithm to work even better, including using the mu+lambda algorithm . This algorithm gives you more control of how many pipelines are generated every iteration with the offspring_size parameter. Refined the default operators and parameters in TPOT, so TPOT 0.7 should work even better than 0.6. TPOT now supports sample weights in the fitness function if some if your samples are more important to classify correctly than others. The sample weights option works the same as in scikit-learn, e.g., tpot.fit(x_train, y_train, sample_weights=sample_weights) . The default scoring metric in TPOT has been changed from balanced accuracy to accuracy, the same default metric for classification algorithms in scikit-learn. Balanced accuracy can still be used by setting scoring='balanced_accuracy' when creating a TPOT instance. Version 0.6 TPOT now supports regression problems! We have created two separate TPOTClassifier and TPOTRegressor classes to support classification and regression problems, respectively. The command-line interface also supports this feature through the -mode parameter. TPOT now allows you to specify a time limit for the optimization process with the max_time_mins parameter, so you don't need to guess how long TPOT will take any more to recommend a pipeline to you. Added a new operator that performs feature selection using ExtraTrees feature importance scores. XGBoost has been added as an optional dependency to TPOT. If you have XGBoost installed, TPOT will automatically detect your installation and use the XGBoostClassifier and XGBoostRegressor in its pipelines. TPOT now offers a verbosity level of 3 (\"science mode\"), which outputs the entire Pareto front instead of only the current best score. This feature may be useful for users looking to make a trade-off between pipeline complexity and score. Version 0.5 Major refactor: Each operator is defined in a separate class file. Hooray for easier-to-maintain code! TPOT now exports directly to scikit-learn Pipelines instead of hacky code. Internal representation of individuals now uses scikit-learn pipelines. Parameters for each operator have been optimized so TPOT spends less time exploring useless parameters. We have removed pandas as a dependency and instead use numpy matrices to store the data. TPOT now uses k-fold cross-validation when evaluating pipelines, with a default k = 3. This k parameter can be tuned when creating a new TPOT instance. Improved scoring function support : Even though TPOT uses balanced accuracy by default, you can now have TPOT use any of the scoring functions that cross_val_score supports. Added the scikit-learn Normalizer preprocessor. Minor text fixes. Version 0.4 In TPOT 0.4, we've made some major changes to the internals of TPOT and added some convenience functions. We've summarized the changes below. Added new sklearn models and preprocessors AdaBoostClassifier BernoulliNB ExtraTreesClassifier GaussianNB MultinomialNB LinearSVC PassiveAggressiveClassifier GradientBoostingClassifier RBFSampler FastICA FeatureAgglomeration Nystroem Added operator that inserts virtual features for the count of features with values of zero Reworked parameterization of TPOT operators Reduced parameter search space with information from a scikit-learn benchmark TPOT no longer generates arbitrary parameter values, but uses a fixed parameter set instead Removed XGBoost as a dependency Too many users were having install issues with XGBoost Replaced with scikit-learn's GradientBoostingClassifier Improved descriptiveness of TPOT command line parameter documentation Removed min/max/avg details during fit() when verbosity > 1 Replaced with tqdm progress bar Added tqdm as a dependency Added fit_predict() convenience function Added get_params() function so TPOT can operate in scikit-learn's cross_val_score & related functions Version 0.3 We revised the internal optimization process of TPOT to make it more efficient, in particular in regards to the model parameters that TPOT optimizes over. Version 0.2 TPOT now has the ability to export the optimized pipelines to sklearn code. Logistic regression, SVM, and k-nearest neighbors classifiers were added as pipeline operators. Previously, TPOT only included decision tree and random forest classifiers. TPOT can now use arbitrary scoring functions for the optimization process. TPOT now performs multi-objective Pareto optimization to balance model complexity (i.e., # of pipeline operators) and the score of the pipeline. Version 0.1 First public release of TPOT. Optimizes pipelines with decision trees and random forest classifiers as the model, and uses a handful of feature preprocessors.","title":"Release Notes"},{"location":"releases/#version-09","text":"TPOT now supports sparse matrices with a new built-in TPOT configuration, \"TPOT sparse\". We are using a custom OneHotEncoder implementation that supports missing values and continuous features. We have added an \"early stopping\" option for stopping the optimization process if no improvement is made within a set number of generations. Look up the early_stop parameter to access this functionality. TPOT now reduces the number of duplicated pipelines between generations, which saves you time during the optimization process. TPOT now supports custom scoring functions via the command-line mode. We have added a new optional argument, periodic_checkpoint_folder , that allows TPOT to periodically save the best pipeline so far to a local folder during optimization process. TPOT no longer uses sklearn.externals.joblib when n_jobs=1 to avoid the potential freezing issue that scikit-learn suffers from . We have added pandas as a dependency to read input datasets instead of numpy.recfromcsv . NumPy's recfromcsv function is unable to parse datasets with complex data types. Fixed a bug that DEFAULT in the parameter(s) of nested estimator raises KeyError when exporting pipelines. Fixed a bug related to setting random_state in nested estimators. The issue would happen with pipeline with SelectFromModel ( ExtraTreesClassifier as nested estimator) or StackingEstimator if nested estimator has random_state parameter. Fixed a bug in the missing value imputation function in TPOT to impute along columns instead rows. Refined input checking for sparse matrices in TPOT. Refined the TPOT pipeline mutation operator.","title":"Version 0.9"},{"location":"releases/#version-08","text":"TPOT now detects whether there are missing values in your dataset and replaces them with the median value of the column. TPOT now allows you to set a group parameter in the fit function so you can use the GroupKFold cross-validation strategy. TPOT now allows you to set a subsample ratio of the training instance with the subsample parameter. For example, setting subsample =0.5 tells TPOT to create a fixed subsample of half of the training data for the pipeline optimization process. This parameter can be useful for speeding up the pipeline optimization process, but may give less accurate performance estimates from cross-validation. TPOT now has more built-in configurations , including TPOT MDR and TPOT light, for both classification and regression problems. TPOTClassifier and TPOTRegressor now expose three useful internal attributes, fitted_pipeline_ , pareto_front_fitted_pipelines_ , and evaluated_individuals_ . These attributes are described in the API documentation . Oh, TPOT now has thorough API documentation . Check it out! Fixed a reproducibility issue where setting random_seed didn't necessarily result in the same results every time. This bug was present since TPOT v0.7. Refined input checking in TPOT. Removed Python 2 uncompliant code.","title":"Version 0.8"},{"location":"releases/#version-07","text":"TPOT now has multiprocessing support. TPOT allows you to use multiple processes in parallel to accelerate the pipeline optimization process in TPOT with the n_jobs parameter. TPOT now allows you to customize the operators and parameters considered during the optimization process , which can be accomplished with the new config_dict parameter. The format of this customized dictionary can be found in the online documentation , along with a list of built-in configurations . TPOT now allows you to specify a time limit for evaluating a single pipeline (default limit is 5 minutes) in optimization process with the max_eval_time_mins parameter, so TPOT won't spend hours evaluating overly-complex pipelines. We tweaked TPOT's underlying evolutionary optimization algorithm to work even better, including using the mu+lambda algorithm . This algorithm gives you more control of how many pipelines are generated every iteration with the offspring_size parameter. Refined the default operators and parameters in TPOT, so TPOT 0.7 should work even better than 0.6. TPOT now supports sample weights in the fitness function if some if your samples are more important to classify correctly than others. The sample weights option works the same as in scikit-learn, e.g., tpot.fit(x_train, y_train, sample_weights=sample_weights) . The default scoring metric in TPOT has been changed from balanced accuracy to accuracy, the same default metric for classification algorithms in scikit-learn. Balanced accuracy can still be used by setting scoring='balanced_accuracy' when creating a TPOT instance.","title":"Version 0.7"},{"location":"releases/#version-06","text":"TPOT now supports regression problems! We have created two separate TPOTClassifier and TPOTRegressor classes to support classification and regression problems, respectively. The command-line interface also supports this feature through the -mode parameter. TPOT now allows you to specify a time limit for the optimization process with the max_time_mins parameter, so you don't need to guess how long TPOT will take any more to recommend a pipeline to you. Added a new operator that performs feature selection using ExtraTrees feature importance scores. XGBoost has been added as an optional dependency to TPOT. If you have XGBoost installed, TPOT will automatically detect your installation and use the XGBoostClassifier and XGBoostRegressor in its pipelines. TPOT now offers a verbosity level of 3 (\"science mode\"), which outputs the entire Pareto front instead of only the current best score. This feature may be useful for users looking to make a trade-off between pipeline complexity and score.","title":"Version 0.6"},{"location":"releases/#version-05","text":"Major refactor: Each operator is defined in a separate class file. Hooray for easier-to-maintain code! TPOT now exports directly to scikit-learn Pipelines instead of hacky code. Internal representation of individuals now uses scikit-learn pipelines. Parameters for each operator have been optimized so TPOT spends less time exploring useless parameters. We have removed pandas as a dependency and instead use numpy matrices to store the data. TPOT now uses k-fold cross-validation when evaluating pipelines, with a default k = 3. This k parameter can be tuned when creating a new TPOT instance. Improved scoring function support : Even though TPOT uses balanced accuracy by default, you can now have TPOT use any of the scoring functions that cross_val_score supports. Added the scikit-learn Normalizer preprocessor. Minor text fixes.","title":"Version 0.5"},{"location":"releases/#version-04","text":"In TPOT 0.4, we've made some major changes to the internals of TPOT and added some convenience functions. We've summarized the changes below. Added new sklearn models and preprocessors AdaBoostClassifier BernoulliNB ExtraTreesClassifier GaussianNB MultinomialNB LinearSVC PassiveAggressiveClassifier GradientBoostingClassifier RBFSampler FastICA FeatureAgglomeration Nystroem Added operator that inserts virtual features for the count of features with values of zero Reworked parameterization of TPOT operators Reduced parameter search space with information from a scikit-learn benchmark TPOT no longer generates arbitrary parameter values, but uses a fixed parameter set instead Removed XGBoost as a dependency Too many users were having install issues with XGBoost Replaced with scikit-learn's GradientBoostingClassifier Improved descriptiveness of TPOT command line parameter documentation Removed min/max/avg details during fit() when verbosity > 1 Replaced with tqdm progress bar Added tqdm as a dependency Added fit_predict() convenience function Added get_params() function so TPOT can operate in scikit-learn's cross_val_score & related functions","title":"Version 0.4"},{"location":"releases/#version-03","text":"We revised the internal optimization process of TPOT to make it more efficient, in particular in regards to the model parameters that TPOT optimizes over.","title":"Version 0.3"},{"location":"releases/#version-02","text":"TPOT now has the ability to export the optimized pipelines to sklearn code. Logistic regression, SVM, and k-nearest neighbors classifiers were added as pipeline operators. Previously, TPOT only included decision tree and random forest classifiers. TPOT can now use arbitrary scoring functions for the optimization process. TPOT now performs multi-objective Pareto optimization to balance model complexity (i.e., # of pipeline operators) and the score of the pipeline.","title":"Version 0.2"},{"location":"releases/#version-01","text":"First public release of TPOT. Optimizes pipelines with decision trees and random forest classifiers as the model, and uses a handful of feature preprocessors.","title":"Version 0.1"},{"location":"support/","text":"TPOT was developed in the Computational Genetics Lab at the University of Pennsylvania with funding from the NIH under grant R01 AI117694. We are incredibly grateful for the support of the NIH and the University of Pennsylvania during the development of this project. The TPOT logo was designed by Todd Newmuis, who generously donated his time to the project.","title":"Support"},{"location":"using/","text":"What to expect from AutoML software Automated machine learning (AutoML) takes a higher-level approach to machine learning than most practitioners are used to, so we've gathered a handful of guidelines on what to expect when running AutoML software such as TPOT. AutoML algorithms aren't intended to run for only a few minutes Of course, you can run TPOT for only a few minutes and it will find a reasonably good pipeline for your dataset. However, if you don't run TPOT for long enough, it may not find the best possible pipeline for your dataset. It may even not find any suitable pipeline at all, in which case a RuntimeError('A pipeline has not yet been optimized. Please call fit() first.') will be raised. Often it is worthwhile to run multiple instances of TPOT in parallel for a long time (hours to days) to allow TPOT to thoroughly search the pipeline space for your dataset. AutoML algorithms can take a long time to finish their search AutoML algorithms aren't as simple as fitting one model on the dataset; they are considering multiple machine learning algorithms (random forests, linear models, SVMs, etc.) in a pipeline with multiple preprocessing steps (missing value imputation, scaling, PCA, feature selection, etc.), the hyperparameters for all of the models and preprocessing steps, as well as multiple ways to ensemble or stack the algorithms within the pipeline. As such, TPOT will take a while to run on larger datasets, but it's important to realize why. With the default TPOT settings (100 generations with 100 population size), TPOT will evaluate 10,000 pipeline configurations before finishing. To put this number into context, think about a grid search of 10,000 hyperparameter combinations for a machine learning algorithm and how long that grid search will take. That is 10,000 model configurations to evaluate with 10-fold cross-validation, which means that roughly 100,000 models are fit and evaluated on the training data in one grid search. That's a time-consuming procedure, even for simpler models like decision trees. Typical TPOT runs will take hours to days to finish (unless it's a small dataset), but you can always interrupt the run partway through and see the best results so far. TPOT also provides a warm_start parameter that lets you restart a TPOT run from where it left off. AutoML algorithms can recommend different solutions for the same dataset If you're working with a reasonably complex dataset or run TPOT for a short amount of time, different TPOT runs may result in different pipeline recommendations. TPOT's optimization algorithm is stochastic in nature, which means that it uses randomness (in part) to search the possible pipeline space. When two TPOT runs recommend different pipelines, this means that the TPOT runs didn't converge due to lack of time or that multiple pipelines perform more-or-less the same on your dataset. This is actually an advantage over fixed grid search techniques: TPOT is meant to be an assistant that gives you ideas on how to solve a particular machine learning problem by exploring pipeline configurations that you might have never considered, then leaves the fine-tuning to more constrained parameter tuning techniques such as grid search. TPOT with code We've taken care to design the TPOT interface to be as similar as possible to scikit-learn. TPOT can be imported just like any regular Python module. To import TPOT, type: from tpot import TPOTClassifier then create an instance of TPOT as follows: pipeline_optimizer = TPOTClassifier() It's also possible to use TPOT for regression problems with the TPOTRegressor class. Other than the class name, a TPOTRegressor is used the same way as a TPOTClassifier . You can read more about the TPOTClassifier and TPOTRegressor classes in the API documentation . Some example code with custom TPOT parameters might look like: pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) Now TPOT is ready to optimize a pipeline for you. You can tell TPOT to optimize a pipeline based on a data set with the fit function: pipeline_optimizer.fit(X_train, y_train) The fit function initializes the genetic programming algorithm to find the highest-scoring pipeline based on average k-fold cross-validation Then, the pipeline is trained on the entire set of provided samples, and the TPOT instance can be used as a fitted model. You can then proceed to evaluate the final pipeline on the testing set with the score function: print(pipeline_optimizer.score(X_test, y_test)) Finally, you can tell TPOT to export the corresponding Python code for the optimized pipeline to a text file with the export function: pipeline_optimizer.export('tpot_exported_pipeline.py') Once this code finishes running, tpot_exported_pipeline.py will contain the Python code for the optimized pipeline. Below is a full example script using TPOT to optimize a pipeline, score it, and export the best pipeline to a file. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) pipeline_optimizer.fit(X_train, y_train) print(pipeline_optimizer.score(X_test, y_test)) pipeline_optimizer.export('tpot_exported_pipeline.py') Check our examples to see TPOT applied to some specific data sets. TPOT on the command line To use TPOT via the command line, enter the following command with a path to the data file: tpot /path_to/data_file.csv An example command-line call to TPOT may look like: tpot data/mnist.csv -is , -target class -o tpot_exported_pipeline.py -g 5 -p 20 -cv 5 -s 42 -v 2 TPOT offers several arguments that can be provided at the command line. To see brief descriptions of these arguments, enter the following command: tpot --help Detailed descriptions of the command-line arguments are below. Argument Parameter Valid values Effect -is INPUT_SEPARATOR Any string Character used to separate columns in the input file. -target TARGET_NAME Any string Name of the target column in the input file. -mode TPOT_MODE ['classification', 'regression'] Whether TPOT is being used for a supervised classification or regression problem. -o OUTPUT_FILE String path to a file File to export the code for the final optimized pipeline. -g GENERATIONS Any positive integer Number of iterations to run the pipeline optimization process. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -p POPULATION_SIZE Any positive integer Number of individuals to retain in the GP population every generation. Generally, TPOT will work better when you give it more individuals (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -os OFFSPRING_SIZE Any positive integer Number of offspring to produce in each GP generation. By default, OFFSPRING_SIZE = POPULATION_SIZE. -mr MUTATION_RATE [0.0, 1.0] GP mutation rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to apply random changes to every generation. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. -xr CROSSOVER_RATE [0.0, 1.0] GP crossover rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to \"breed\" every generation. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. -scoring SCORING_FN 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'my_module.scorer_name*' Function used to evaluate the quality of a given pipeline for the problem. By default, accuracy is used for classification and mean squared error (MSE) is used for regression. TPOT assumes that any function with \"error\" or \"loss\" in the name is meant to be minimized, whereas any other functions will be maximized. my_module.scorer_name: You can also specify your own function or a full python path to an existing one. See the section on scoring functions for more details. -cv CV Any integer > 1 Number of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process. -sub SUBSAMPLE (0.0, 1.0] Subsample ratio of the training instance. Setting it to 0.5 means that TPOT randomly collects half of training samples for pipeline optimization process. -njobs NUM_JOBS Any positive integer or -1 Number of CPUs for evaluating pipelines in parallel during the TPOT optimization process. Assigning this to -1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. -maxtime MAX_TIME_MINS Any positive integer How many minutes TPOT has to optimize the pipeline. If provided, this setting will override the \"generations\" parameter and allow TPOT to run until it runs out of time. -maxeval MAX_EVAL_MINS Any positive float How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to consider more complex pipelines but will also allow TPOT to run longer. -s RANDOM_STATE Any positive integer Random number generator seed for reproducibility. Set this seed if you want your TPOT run to be reproducible with the same seed and data set in the future. -config CONFIG_FILE String or file path Operators and parameter configurations in TPOT: Path for configuration file: TPOT will use the path to a configuration file for customizing the operators and parameters that TPOT uses in the optimization process string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. -template TEMPLATE String Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. -memory MEMORY String or file path If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. Memory caching mode in TPOT: Path for a caching directory: TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown. string 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown. -cf CHECKPOINT_FOLDER Folder path If supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing. This is useful in multiple cases: sudden death before tpot could save an optimized pipeline progress tracking grabbing a pipeline while tpot is working Example: mkdir my_checkpoints -cf ./my_checkpoints -es EARLY_STOP Any positive integer How many generations TPOT checks whether there is no improvement in optimization process. End optimization process if there is no improvement in the set number of generations. -v VERBOSITY {0, 1, 2, 3} How much information TPOT communicates while it is running. 0 = none, 1 = minimal, 2 = high, 3 = all. A setting of 2 or higher will add a progress bar during the optimization procedure. --no-update-check Flag indicating whether the TPOT version checker should be disabled. --version Show TPOT's version number and exit. --help Show TPOT's help documentation and exit. Scoring functions TPOT makes use of sklearn.model_selection.cross_val_score for evaluating pipelines, and as such offers the same support for scoring functions. There are two ways to make use of scoring functions with TPOT: You can pass in a string to the scoring parameter from the list above. Any other strings will cause TPOT to throw an exception. You can pass the callable object/function with signature scorer(estimator, X, y) , where estimator is trained estimator to use for scoring, X are features that will be passed to estimator.predict and y are target values for X . To do this, you should implement your own function. See the example below for further explanation. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split from sklearn.metrics.scorer import make_scorer digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) # Make a custom metric function def my_custom_accuracy(y_true, y_pred): return float(sum(y_pred == y_true)) / len(y_true) # Make a custom a scorer from the custom metric function # Note: greater_is_better=False in make_scorer below would mean that the scoring function should be minimized. my_custom_scorer = make_scorer(my_custom_accuracy, greater_is_better=True) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, scoring=my_custom_scorer) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_mnist_pipeline.py') You can pass a metric function with the signature score_func(y_true, y_pred) (e.g. my_custom_accuracy in the example above), where y_true are the true target values and y_pred are the predicted target values from an estimator. To do this, you should implement your own function. See the example above for further explanation. TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized ( greater_is_better=False in make_scorer ), whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. my_module.scorer_name : You can also use a custom score_func(y_true, y_pred) or scorer(estimator, X, y) function through the command line by adding the argument -scoring my_module.scorer to your command-line call. TPOT will import your module and use the custom scoring function from there. TPOT will include your current working directory when importing the module, so you can place it in the same directory where you are going to run TPOT. Example: -scoring sklearn.metrics.auc will use the function auc from sklearn.metrics module. Built-in TPOT configurations TPOT comes with a handful of default operators and parameter configurations that we believe work well for optimizing machine learning pipelines. Below is a list of the current built-in configurations that come with TPOT. Configuration Name Description Operators Default TPOT TPOT will search over a broad range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Some of these operators are complex and may take a long time to run, especially on larger datasets. Note: This is the default configuration for TPOT. To use this configuration, use the default value (None) for the config_dict parameter. Classification Regression TPOT light TPOT will search over a restricted range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Only simpler and fast-running operators will be used in these pipelines, so TPOT light is useful for finding quick and simple pipelines for a classification or regression problem. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression TPOT MDR TPOT will search over a series of feature selectors and Multifactor Dimensionality Reduction models to find a series of operators that maximize prediction accuracy. The TPOT MDR configuration is specialized for genome-wide association studies (GWAS) , and is described in detail online here . Note that TPOT MDR may be slow to run because the feature selection routines are computationally expensive, especially on large datasets. Classification Regression TPOT sparse TPOT uses a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression To use any of these configurations, simply pass the string name of the configuration to the config_dict parameter (or -config on the command line). For example, to use the \"TPOT light\" configuration: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict='TPOT light') tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_mnist_pipeline.py') Customizing TPOT's operators and parameters Beyond the default configurations that come with TPOT, in some cases it is useful to limit the algorithms and parameters that TPOT considers. For that reason, we allow users to provide TPOT with a custom configuration for its operators and parameters. The custom TPOT configuration must be in nested dictionary format, where the first level key is the path and name of the operator (e.g., sklearn.naive_bayes.MultinomialNB ) and the second level key is the corresponding parameter name for that operator (e.g., fit_prior ). The second level key should point to a list of parameter values for that parameter, e.g., 'fit_prior': [True, False] . For a simple example, the configuration could be: tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } in which case TPOT would only consider pipelines containing GaussianNB , BernoulliNB , MultinomialNB , and tune those algorithm's parameters in the ranges provided. This dictionary can be passed directly within the code to the TPOTClassifier / TPOTRegressor config_dict parameter, described above. For example: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict=tpot_config) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_mnist_pipeline.py') Command-line users must create a separate .py file with the custom configuration and provide the path to the file to the tpot call. For example, if the simple example configuration above is saved in tpot_classifier_config.py , that configuration could be used on the command line with the command: tpot data/mnist.csv -is , -target class -config tpot_classifier_config.py -g 5 -p 20 -v 2 -o tpot_exported_pipeline.py When using the command-line interface, the configuration file specified in the -config parameter must name its custom TPOT configuration tpot_config . Otherwise, TPOT will not be able to locate the configuration dictionary. For more detailed examples of how to customize TPOT's operator configuration, see the default configurations for classification and regression in TPOT's source code. Note that you must have all of the corresponding packages for the operators installed on your computer, otherwise TPOT will not be able to use them. For example, if XGBoost is not installed on your computer, then TPOT will simply not import nor use XGBoost in the pipelines it considers. Template option in TPOT Template option provides a way to specify a desired structure for machine learning pipeline, which may reduce TPOT computation time and potentially provide more interpretable results. Current implementation only supports linear pipelines. Below is a simple example to use template option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of SelectorMixin ), 2nd step is a feature transformer (a subclass of TransformerMixin ) and 3rd step is a classifier for classification (a subclass of ClassifierMixin ). The last step must be Classifier for TPOTClassifier 's template but Regressor for TPOTRegressor . Note: although SelectorMixin is subclass of TransformerMixin in scikit-leawrn, but Transformer in this option excludes those subclasses of SelectorMixin . tpot_obj = TPOTClassifier( template='Selector-Transformer-Classifier' ) If a specific operator, e.g. SelectPercentile , is prefered to used in the 1st step of pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'. FeatureSetSelector in TPOT FeatureSetSelector is a special new operator in TPOT. This operator enables feature selection based on priori export knowledge. For example, in RNA-seq gene expression analysis, this operator can be used to select one or more gene (feature) set(s) based on GO (Gene Ontology) terms or annotated gene sets Molecular Signatures Database ( MSigDB ) in the 1st step of pipeline via template option above, in order to reduce dimensions and TPOT computation time. This operator requires a dataset list in csv format. In this csv file, there are only three columns: 1st column is feature set names, 2nd column is the total number of features in one set and 3rd column is a list of feature names (if input X is pandas.DataFrame) or indexes (if input X is numpy.ndarray) delimited by \";\". Below is a example how to use this operator in TPOT. Please check our preprint paper for more details. from tpot import TPOTClassifier import numpy as np import pandas as pd from tpot.config import classifier_config_dict test_data = pd.read_csv(\"https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/tests.csv\") test_X = test_data.drop(\"class\", axis=1) test_y = test_data['class'] # add FeatureSetSelector into tpot configuration classifier_config_dict['tpot.builtins.FeatureSetSelector'] = { 'subset_list': ['https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/subset_test.csv'], 'sel_subset': [0,1] # select only one feature set, a list of index of subset in the list above #'sel_subset': list(combinations(range(3), 2)) # select two feature sets } tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, template='FeatureSetSelector-Transformer-Classifier', config_dict=classifier_config_dict) tpot.fit(test_X, test_y) Pipeline caching in TPOT With the memory parameter, pipelines can cache the results of each transformer after fitting them. This feature is used to avoid repeated computation by transformers within a pipeline if the parameters and input data are identical to another fitted pipeline during optimization process. TPOT allows users to specify a custom directory path or joblib.Memory in case they want to re-use the memory cache in future TPOT runs (or a warm_start run). There are three methods for enabling memory caching in TPOT: from tpot import TPOTClassifier from tempfile import mkdtemp from joblib import Memory from shutil import rmtree # Method 1, auto mode: TPOT uses memory caching with a temporary directory and cleans it up upon shutdown tpot = TPOTClassifier(memory='auto') # Method 2, with a custom directory for memory caching tpot = TPOTClassifier(memory='/to/your/path') # Method 3, with a Memory object cachedir = mkdtemp() # Create a temporary folder memory = Memory(cachedir=cachedir, verbose=0) tpot = TPOTClassifier(memory=memory) # Clear the cache directory when you don't need it anymore rmtree(cachedir) Note: TPOT does NOT clean up memory caches if users set a custom directory path or Memory object. We recommend that you clean up the memory caches when you don't need it anymore. Crash/freeze issue with n_jobs > 1 under OSX or Linux Internally, TPOT uses joblib to fit estimators in parallel. This is the same parallelization framework used by scikit-learn. But it may crash/freeze with n_jobs > 1 under OSX or Linux as scikit-learn does , especially with large datasets. One solution is to configure Python's multiprocessing module to use the forkserver start method (instead of the default fork ) to manage the process pools. You can enable the forkserver mode globally for your program by putting the following codes into your main script: import multiprocessing # other imports, custom code, load data, define model... if __name__ == '__main__': multiprocessing.set_start_method('forkserver') # call scikit-learn utils or tpot utils with n_jobs > 1 here More information about these start methods can be found in the multiprocessing documentation . Parallel Training with Dask For large problems or working on Jupyter notebook, we highly recommend that you can distribute the work on a Dask cluster. The dask-examples binder has a runnable example with a small dask cluster. To use your Dask cluster to fit a TPOT model, specify the use_dask keyword when you create the TPOT estimator. Note: if use_dask=True , TPOT will use as many cores as available on the your Dask cluster. If n_jobs is specified, then it will control the chunk size (10* n_jobs if it is less then offspring size) of parallel training. estimator = TPOTEstimator(use_dask=True, n_jobs=-1) This will use use all the workers on your cluster to do the training, and use Dask-ML's pipeline rewriting to avoid re-fitting estimators multiple times on the same set of data. It will also provide fine-grained diagnostics in the distributed scheduler UI . Alternatively, Dask implements a joblib backend. You can instruct TPOT to use the distributed backend during training by specifying a joblib.parallel_backend : import joblib import distributed.joblib from dask.distributed import Client # connect to the cluster client = Client('schedueler-address') # create the estimator normally estimator = TPOTClassifier(n_jobs=-1) # perform the fit in this context manager with joblib.parallel_backend(\"dask\"): estimator.fit(X, y) See dask's distributed joblib integration for more.","title":"Using TPOT"},{"location":"using/#what-to-expect-from-automl-software","text":"Automated machine learning (AutoML) takes a higher-level approach to machine learning than most practitioners are used to, so we've gathered a handful of guidelines on what to expect when running AutoML software such as TPOT.","title":"What to expect from AutoML software"},{"location":"using/#tpot-with-code","text":"We've taken care to design the TPOT interface to be as similar as possible to scikit-learn. TPOT can be imported just like any regular Python module. To import TPOT, type: from tpot import TPOTClassifier then create an instance of TPOT as follows: pipeline_optimizer = TPOTClassifier() It's also possible to use TPOT for regression problems with the TPOTRegressor class. Other than the class name, a TPOTRegressor is used the same way as a TPOTClassifier . You can read more about the TPOTClassifier and TPOTRegressor classes in the API documentation . Some example code with custom TPOT parameters might look like: pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) Now TPOT is ready to optimize a pipeline for you. You can tell TPOT to optimize a pipeline based on a data set with the fit function: pipeline_optimizer.fit(X_train, y_train) The fit function initializes the genetic programming algorithm to find the highest-scoring pipeline based on average k-fold cross-validation Then, the pipeline is trained on the entire set of provided samples, and the TPOT instance can be used as a fitted model. You can then proceed to evaluate the final pipeline on the testing set with the score function: print(pipeline_optimizer.score(X_test, y_test)) Finally, you can tell TPOT to export the corresponding Python code for the optimized pipeline to a text file with the export function: pipeline_optimizer.export('tpot_exported_pipeline.py') Once this code finishes running, tpot_exported_pipeline.py will contain the Python code for the optimized pipeline. Below is a full example script using TPOT to optimize a pipeline, score it, and export the best pipeline to a file. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) pipeline_optimizer.fit(X_train, y_train) print(pipeline_optimizer.score(X_test, y_test)) pipeline_optimizer.export('tpot_exported_pipeline.py') Check our examples to see TPOT applied to some specific data sets.","title":"TPOT with code"},{"location":"using/#tpot-on-the-command-line","text":"To use TPOT via the command line, enter the following command with a path to the data file: tpot /path_to/data_file.csv An example command-line call to TPOT may look like: tpot data/mnist.csv -is , -target class -o tpot_exported_pipeline.py -g 5 -p 20 -cv 5 -s 42 -v 2 TPOT offers several arguments that can be provided at the command line. To see brief descriptions of these arguments, enter the following command: tpot --help Detailed descriptions of the command-line arguments are below. Argument Parameter Valid values Effect -is INPUT_SEPARATOR Any string Character used to separate columns in the input file. -target TARGET_NAME Any string Name of the target column in the input file. -mode TPOT_MODE ['classification', 'regression'] Whether TPOT is being used for a supervised classification or regression problem. -o OUTPUT_FILE String path to a file File to export the code for the final optimized pipeline. -g GENERATIONS Any positive integer Number of iterations to run the pipeline optimization process. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -p POPULATION_SIZE Any positive integer Number of individuals to retain in the GP population every generation. Generally, TPOT will work better when you give it more individuals (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -os OFFSPRING_SIZE Any positive integer Number of offspring to produce in each GP generation. By default, OFFSPRING_SIZE = POPULATION_SIZE. -mr MUTATION_RATE [0.0, 1.0] GP mutation rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to apply random changes to every generation. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. -xr CROSSOVER_RATE [0.0, 1.0] GP crossover rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to \"breed\" every generation. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. -scoring SCORING_FN 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'my_module.scorer_name*' Function used to evaluate the quality of a given pipeline for the problem. By default, accuracy is used for classification and mean squared error (MSE) is used for regression. TPOT assumes that any function with \"error\" or \"loss\" in the name is meant to be minimized, whereas any other functions will be maximized. my_module.scorer_name: You can also specify your own function or a full python path to an existing one. See the section on scoring functions for more details. -cv CV Any integer > 1 Number of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process. -sub SUBSAMPLE (0.0, 1.0] Subsample ratio of the training instance. Setting it to 0.5 means that TPOT randomly collects half of training samples for pipeline optimization process. -njobs NUM_JOBS Any positive integer or -1 Number of CPUs for evaluating pipelines in parallel during the TPOT optimization process. Assigning this to -1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. -maxtime MAX_TIME_MINS Any positive integer How many minutes TPOT has to optimize the pipeline. If provided, this setting will override the \"generations\" parameter and allow TPOT to run until it runs out of time. -maxeval MAX_EVAL_MINS Any positive float How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to consider more complex pipelines but will also allow TPOT to run longer. -s RANDOM_STATE Any positive integer Random number generator seed for reproducibility. Set this seed if you want your TPOT run to be reproducible with the same seed and data set in the future. -config CONFIG_FILE String or file path Operators and parameter configurations in TPOT: Path for configuration file: TPOT will use the path to a configuration file for customizing the operators and parameters that TPOT uses in the optimization process string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. -template TEMPLATE String Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. -memory MEMORY String or file path If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. Memory caching mode in TPOT: Path for a caching directory: TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown. string 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown. -cf CHECKPOINT_FOLDER Folder path If supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing. This is useful in multiple cases: sudden death before tpot could save an optimized pipeline progress tracking grabbing a pipeline while tpot is working Example: mkdir my_checkpoints -cf ./my_checkpoints -es EARLY_STOP Any positive integer How many generations TPOT checks whether there is no improvement in optimization process. End optimization process if there is no improvement in the set number of generations. -v VERBOSITY {0, 1, 2, 3} How much information TPOT communicates while it is running. 0 = none, 1 = minimal, 2 = high, 3 = all. A setting of 2 or higher will add a progress bar during the optimization procedure. --no-update-check Flag indicating whether the TPOT version checker should be disabled. --version Show TPOT's version number and exit. --help Show TPOT's help documentation and exit.","title":"TPOT on the command line"},{"location":"using/#scoring-functions","text":"TPOT makes use of sklearn.model_selection.cross_val_score for evaluating pipelines, and as such offers the same support for scoring functions. There are two ways to make use of scoring functions with TPOT: You can pass in a string to the scoring parameter from the list above. Any other strings will cause TPOT to throw an exception. You can pass the callable object/function with signature scorer(estimator, X, y) , where estimator is trained estimator to use for scoring, X are features that will be passed to estimator.predict and y are target values for X . To do this, you should implement your own function. See the example below for further explanation. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split from sklearn.metrics.scorer import make_scorer digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) # Make a custom metric function def my_custom_accuracy(y_true, y_pred): return float(sum(y_pred == y_true)) / len(y_true) # Make a custom a scorer from the custom metric function # Note: greater_is_better=False in make_scorer below would mean that the scoring function should be minimized. my_custom_scorer = make_scorer(my_custom_accuracy, greater_is_better=True) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, scoring=my_custom_scorer) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_mnist_pipeline.py') You can pass a metric function with the signature score_func(y_true, y_pred) (e.g. my_custom_accuracy in the example above), where y_true are the true target values and y_pred are the predicted target values from an estimator. To do this, you should implement your own function. See the example above for further explanation. TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized ( greater_is_better=False in make_scorer ), whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. my_module.scorer_name : You can also use a custom score_func(y_true, y_pred) or scorer(estimator, X, y) function through the command line by adding the argument -scoring my_module.scorer to your command-line call. TPOT will import your module and use the custom scoring function from there. TPOT will include your current working directory when importing the module, so you can place it in the same directory where you are going to run TPOT. Example: -scoring sklearn.metrics.auc will use the function auc from sklearn.metrics module.","title":"Scoring functions"},{"location":"using/#built-in-tpot-configurations","text":"TPOT comes with a handful of default operators and parameter configurations that we believe work well for optimizing machine learning pipelines. Below is a list of the current built-in configurations that come with TPOT. Configuration Name Description Operators Default TPOT TPOT will search over a broad range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Some of these operators are complex and may take a long time to run, especially on larger datasets. Note: This is the default configuration for TPOT. To use this configuration, use the default value (None) for the config_dict parameter. Classification Regression TPOT light TPOT will search over a restricted range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Only simpler and fast-running operators will be used in these pipelines, so TPOT light is useful for finding quick and simple pipelines for a classification or regression problem. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression TPOT MDR TPOT will search over a series of feature selectors and Multifactor Dimensionality Reduction models to find a series of operators that maximize prediction accuracy. The TPOT MDR configuration is specialized for genome-wide association studies (GWAS) , and is described in detail online here . Note that TPOT MDR may be slow to run because the feature selection routines are computationally expensive, especially on large datasets. Classification Regression TPOT sparse TPOT uses a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression To use any of these configurations, simply pass the string name of the configuration to the config_dict parameter (or -config on the command line). For example, to use the \"TPOT light\" configuration: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict='TPOT light') tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_mnist_pipeline.py')","title":"Built-in TPOT configurations"},{"location":"using/#customizing-tpots-operators-and-parameters","text":"Beyond the default configurations that come with TPOT, in some cases it is useful to limit the algorithms and parameters that TPOT considers. For that reason, we allow users to provide TPOT with a custom configuration for its operators and parameters. The custom TPOT configuration must be in nested dictionary format, where the first level key is the path and name of the operator (e.g., sklearn.naive_bayes.MultinomialNB ) and the second level key is the corresponding parameter name for that operator (e.g., fit_prior ). The second level key should point to a list of parameter values for that parameter, e.g., 'fit_prior': [True, False] . For a simple example, the configuration could be: tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } in which case TPOT would only consider pipelines containing GaussianNB , BernoulliNB , MultinomialNB , and tune those algorithm's parameters in the ranges provided. This dictionary can be passed directly within the code to the TPOTClassifier / TPOTRegressor config_dict parameter, described above. For example: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict=tpot_config) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_mnist_pipeline.py') Command-line users must create a separate .py file with the custom configuration and provide the path to the file to the tpot call. For example, if the simple example configuration above is saved in tpot_classifier_config.py , that configuration could be used on the command line with the command: tpot data/mnist.csv -is , -target class -config tpot_classifier_config.py -g 5 -p 20 -v 2 -o tpot_exported_pipeline.py When using the command-line interface, the configuration file specified in the -config parameter must name its custom TPOT configuration tpot_config . Otherwise, TPOT will not be able to locate the configuration dictionary. For more detailed examples of how to customize TPOT's operator configuration, see the default configurations for classification and regression in TPOT's source code. Note that you must have all of the corresponding packages for the operators installed on your computer, otherwise TPOT will not be able to use them. For example, if XGBoost is not installed on your computer, then TPOT will simply not import nor use XGBoost in the pipelines it considers.","title":"Customizing TPOT's operators and parameters"},{"location":"using/#template-option-in-tpot","text":"Template option provides a way to specify a desired structure for machine learning pipeline, which may reduce TPOT computation time and potentially provide more interpretable results. Current implementation only supports linear pipelines. Below is a simple example to use template option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of SelectorMixin ), 2nd step is a feature transformer (a subclass of TransformerMixin ) and 3rd step is a classifier for classification (a subclass of ClassifierMixin ). The last step must be Classifier for TPOTClassifier 's template but Regressor for TPOTRegressor . Note: although SelectorMixin is subclass of TransformerMixin in scikit-leawrn, but Transformer in this option excludes those subclasses of SelectorMixin . tpot_obj = TPOTClassifier( template='Selector-Transformer-Classifier' ) If a specific operator, e.g. SelectPercentile , is prefered to used in the 1st step of pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'.","title":"Template option in TPOT"},{"location":"using/#featuresetselector-in-tpot","text":"FeatureSetSelector is a special new operator in TPOT. This operator enables feature selection based on priori export knowledge. For example, in RNA-seq gene expression analysis, this operator can be used to select one or more gene (feature) set(s) based on GO (Gene Ontology) terms or annotated gene sets Molecular Signatures Database ( MSigDB ) in the 1st step of pipeline via template option above, in order to reduce dimensions and TPOT computation time. This operator requires a dataset list in csv format. In this csv file, there are only three columns: 1st column is feature set names, 2nd column is the total number of features in one set and 3rd column is a list of feature names (if input X is pandas.DataFrame) or indexes (if input X is numpy.ndarray) delimited by \";\". Below is a example how to use this operator in TPOT. Please check our preprint paper for more details. from tpot import TPOTClassifier import numpy as np import pandas as pd from tpot.config import classifier_config_dict test_data = pd.read_csv(\"https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/tests.csv\") test_X = test_data.drop(\"class\", axis=1) test_y = test_data['class'] # add FeatureSetSelector into tpot configuration classifier_config_dict['tpot.builtins.FeatureSetSelector'] = { 'subset_list': ['https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/subset_test.csv'], 'sel_subset': [0,1] # select only one feature set, a list of index of subset in the list above #'sel_subset': list(combinations(range(3), 2)) # select two feature sets } tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, template='FeatureSetSelector-Transformer-Classifier', config_dict=classifier_config_dict) tpot.fit(test_X, test_y)","title":"FeatureSetSelector in TPOT"},{"location":"using/#pipeline-caching-in-tpot","text":"With the memory parameter, pipelines can cache the results of each transformer after fitting them. This feature is used to avoid repeated computation by transformers within a pipeline if the parameters and input data are identical to another fitted pipeline during optimization process. TPOT allows users to specify a custom directory path or joblib.Memory in case they want to re-use the memory cache in future TPOT runs (or a warm_start run). There are three methods for enabling memory caching in TPOT: from tpot import TPOTClassifier from tempfile import mkdtemp from joblib import Memory from shutil import rmtree # Method 1, auto mode: TPOT uses memory caching with a temporary directory and cleans it up upon shutdown tpot = TPOTClassifier(memory='auto') # Method 2, with a custom directory for memory caching tpot = TPOTClassifier(memory='/to/your/path') # Method 3, with a Memory object cachedir = mkdtemp() # Create a temporary folder memory = Memory(cachedir=cachedir, verbose=0) tpot = TPOTClassifier(memory=memory) # Clear the cache directory when you don't need it anymore rmtree(cachedir) Note: TPOT does NOT clean up memory caches if users set a custom directory path or Memory object. We recommend that you clean up the memory caches when you don't need it anymore.","title":"Pipeline caching in TPOT"},{"location":"using/#crashfreeze-issue-with-n_jobs-1-under-osx-or-linux","text":"Internally, TPOT uses joblib to fit estimators in parallel. This is the same parallelization framework used by scikit-learn. But it may crash/freeze with n_jobs > 1 under OSX or Linux as scikit-learn does , especially with large datasets. One solution is to configure Python's multiprocessing module to use the forkserver start method (instead of the default fork ) to manage the process pools. You can enable the forkserver mode globally for your program by putting the following codes into your main script: import multiprocessing # other imports, custom code, load data, define model... if __name__ == '__main__': multiprocessing.set_start_method('forkserver') # call scikit-learn utils or tpot utils with n_jobs > 1 here More information about these start methods can be found in the multiprocessing documentation .","title":"Crash/freeze issue with n_jobs &gt; 1 under OSX or Linux"},{"location":"using/#parallel-training-with-dask","text":"For large problems or working on Jupyter notebook, we highly recommend that you can distribute the work on a Dask cluster. The dask-examples binder has a runnable example with a small dask cluster. To use your Dask cluster to fit a TPOT model, specify the use_dask keyword when you create the TPOT estimator. Note: if use_dask=True , TPOT will use as many cores as available on the your Dask cluster. If n_jobs is specified, then it will control the chunk size (10* n_jobs if it is less then offspring size) of parallel training. estimator = TPOTEstimator(use_dask=True, n_jobs=-1) This will use use all the workers on your cluster to do the training, and use Dask-ML's pipeline rewriting to avoid re-fitting estimators multiple times on the same set of data. It will also provide fine-grained diagnostics in the distributed scheduler UI . Alternatively, Dask implements a joblib backend. You can instruct TPOT to use the distributed backend during training by specifying a joblib.parallel_backend : import joblib import distributed.joblib from dask.distributed import Client # connect to the cluster client = Client('schedueler-address') # create the estimator normally estimator = TPOTClassifier(n_jobs=-1) # perform the fit in this context manager with joblib.parallel_backend(\"dask\"): estimator.fit(X, y) See dask's distributed joblib integration for more.","title":"Parallel Training with Dask"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Consider TPOT your Data Science Assistant . TPOT is a Python Automated Machine Learning tool that optimizes machine learning pipelines using genetic programming. TPOT will automate the most tedious part of machine learning by intelligently exploring thousands of possible pipelines to find the best one for your data. An example machine learning pipeline Once TPOT is finished searching (or you get tired of waiting), it provides you with the Python code for the best pipeline it found so you can tinker with the pipeline from there. An example TPOT pipeline TPOT is built on top of scikit-learn, so all of the code it generates should look familiar... if you're familiar with scikit-learn, anyway. TPOT is still under active development and we encourage you to check back on this repository regularly for updates.","title":"Home"},{"location":"api/","text":"Classification class tpot. TPOTClassifier ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='accuracy', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised classification tasks. The TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTClassifier will also search over the hyperparameters of all objects in the pipeline. By default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters. However, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='accuracy') Function used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used: 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc' If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a StratifiedKFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets. max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTClassifier configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: pareto_front_fitted_pipelines_ is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Functions fit (features, classes[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the classes for a feature set. predict_proba (features) Use the optimized pipeline to estimate the class probabilities for a feature set. score (testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, classes, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. classes : array-like {n_samples} List of class labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the classes for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted classes for the samples in the feature matrix predict_proba(features) Use the optimized pipeline to estimate the class probabilities for a feature set. Note: This function will only work for pipelines whose final classifier supports the predict_proba function. TPOT will raise an error otherwise. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples, n_classes} The class probabilities of the input samples score(testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'accuracy'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_classes : array-like {n_samples} List of class labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything Regression class tpot. TPOTRegressor ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='neg_mean_squared_error', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised regression tasks. The TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTRegressor will also search over the hyperparameters of all objects in the pipeline. By default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters. However, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='neg_mean_squared_error') Function used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used: 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2' Note that we recommend using the neg version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric. If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a KFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTRegressor configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: _pareto_front_fitted_pipelines is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split digits = load_boston() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Functions fit (features, target[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the target values for a feature set. score (testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, target, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. target : array-like {n_samples} List of target labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the target values for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted target values for the samples in the feature matrix score(testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'mean_squared_error'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_target : array-like {n_samples} List of target labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything","title":"TPOT API"},{"location":"api/#classification","text":"class tpot. TPOTClassifier ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='accuracy', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised classification tasks. The TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTClassifier will also search over the hyperparameters of all objects in the pipeline. By default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters. However, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='accuracy') Function used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used: 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc' If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a StratifiedKFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets. max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTClassifier configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: pareto_front_fitted_pipelines_ is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Functions fit (features, classes[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the classes for a feature set. predict_proba (features) Use the optimized pipeline to estimate the class probabilities for a feature set. score (testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, classes, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. classes : array-like {n_samples} List of class labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the classes for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted classes for the samples in the feature matrix predict_proba(features) Use the optimized pipeline to estimate the class probabilities for a feature set. Note: This function will only work for pipelines whose final classifier supports the predict_proba function. TPOT will raise an error otherwise. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples, n_classes} The class probabilities of the input samples score(testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'accuracy'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_classes : array-like {n_samples} List of class labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything","title":"Classification"},{"location":"api/#regression","text":"class tpot. TPOTRegressor ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='neg_mean_squared_error', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised regression tasks. The TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTRegressor will also search over the hyperparameters of all objects in the pipeline. By default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters. However, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='neg_mean_squared_error') Function used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used: 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2' Note that we recommend using the neg version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric. If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a KFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTRegressor configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: _pareto_front_fitted_pipelines is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split digits = load_boston() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Functions fit (features, target[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the target values for a feature set. score (testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, target, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. target : array-like {n_samples} List of target labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the target values for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted target values for the samples in the feature matrix score(testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'mean_squared_error'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_target : array-like {n_samples} List of target labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything","title":"Regression"},{"location":"citing/","text":"If you use TPOT in a scientific publication, please consider citing at least one of the following papers: Randal S. Olson, Ryan J. Urbanowicz, Peter C. Andrews, Nicole A. Lavender, La Creis Kidd, and Jason H. Moore (2016). Automating biomedical data science through tree-based pipeline optimization . Applications of Evolutionary Computation , pages 123-137. BibTeX entry: @inbook{Olson2016EvoBio, author={Olson, Randal S. and Urbanowicz, Ryan J. and Andrews, Peter C. and Lavender, Nicole A. and Kidd, La Creis and Moore, Jason H.}, editor={Squillero, Giovanni and Burelli, Paolo}, chapter={Automating Biomedical Data Science Through Tree-Based Pipeline Optimization}, title={Applications of Evolutionary Computation: 19th European Conference, EvoApplications 2016, Porto, Portugal, March 30 -- April 1, 2016, Proceedings, Part I}, year={2016}, publisher={Springer International Publishing}, pages={123--137}, isbn={978-3-319-31204-0}, doi={10.1007/978-3-319-31204-0_9}, url={http://dx.doi.org/10.1007/978-3-319-31204-0_9} } Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science Randal S. Olson, Nathan Bartley, Ryan J. Urbanowicz, and Jason H. Moore (2016). Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science . Proceedings of GECCO 2016 , pages 485-492. BibTeX entry: @inproceedings{OlsonGECCO2016, author = {Olson, Randal S. and Bartley, Nathan and Urbanowicz, Ryan J. and Moore, Jason H.}, title = {Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science}, booktitle = {Proceedings of the Genetic and Evolutionary Computation Conference 2016}, series = {GECCO '16}, year = {2016}, isbn = {978-1-4503-4206-3}, location = {Denver, Colorado, USA}, pages = {485--492}, numpages = {8}, url = {http://doi.acm.org/10.1145/2908812.2908918}, doi = {10.1145/2908812.2908918}, acmid = {2908918}, publisher = {ACM}, address = {New York, NY, USA}, } Alternatively, you can cite the repository directly with the following DOI:","title":"Citing"},{"location":"contributing/","text":"We welcome you to check the existing issues for bugs or enhancements to work on. If you have an idea for an extension to TPOT, please file a new issue so we can discuss it. Project layout The latest stable release of TPOT is on the master branch , whereas the latest version of TPOT in development is on the development branch . Make sure you are looking at and working on the correct branch if you're looking to contribute code. In terms of directory structure: All of TPOT's code sources are in the tpot directory The documentation sources are in the docs_sources directory Images in the documentation are in the images directory Tutorials for TPOT are in the tutorials directory Unit tests for TPOT are in the tests.py file Make sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the development branch. How to contribute The preferred way to contribute to TPOT is to fork the main repository on GitHub: Fork the project repository : click on the 'Fork' button near the top of the page. This creates a copy of the code under your account on the GitHub server. Clone this copy to your local disk: $ git clone git@github.com:YourUsername/tpot.git $ cd tpot Create a branch to hold your changes: $ git checkout -b my-contribution Make sure your local environment is setup correctly for development. Installation instructions are almost identical to the user instructions except that TPOT should not be installed. If you have TPOT installed on your computer then make sure you are using a virtual environment that does not have TPOT installed. Furthermore, you should make sure you have installed the nose package into your development environment so that you can test changes locally. $ conda install nose Start making changes on your newly created branch, remembering to never work on the master branch! Work on this copy on your computer using Git to do the version control. Once some changes are saved locally, you can use your tweaked version of TPOT by navigating to the project's base directory and running TPOT directly from the command line: $ python -m tpot.driver or by running script that imports and uses the TPOT module with code similar to from tpot import TPOTClassifier To check your changes haven't broken any existing tests and to check new tests you've added pass run the following (note, you must have the nose package installed within your dev environment for this to work): $ nosetests -s -v When you're done editing and local testing, run: $ git add modified_files $ git commit to record your changes in Git, then push them to GitHub with: $ git push -u origin my-contribution Finally, go to the web page of your fork of the TPOT repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. Make sure that you send your PR to the development branch, as the master branch is reserved for the latest stable release. This will start the CI server to check all the project's unit tests run and send an email to the maintainers. (If any of the above seems like magic to you, then look up the Git documentation on the web.) Before submitting your pull request Before you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes. If your contribution changes TPOT in any way: Update the documentation so all of your changes are reflected there. Update the README if anything there has changed. If your contribution involves any code changes: Update the project unit tests to test your code changes. Make sure that your code is properly commented with docstrings and comments explaining your rationale behind non-obvious coding practices. If your code affected any of the pipeline operators, make sure that the corresponding export functionality reflects those changes. If your contribution requires a new library dependency: Double-check that the new dependency is easy to install via pip or Anaconda and supports both Python 2 and 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep TPOT easy to install. Add the required version of the library to .travis.yml Add a line to pip install the library to .travis_install.sh Add a line to print the version of the library to .travis_install.sh Similarly add a line to print the version of the library to .travis_test.sh After submitting your pull request After submitting your pull request, Travis-CI will automatically run unit tests on your changes and make sure that your updated code builds and runs on Python 2 and 3. We also use services that automatically check code quality and test coverage. Check back shortly after submitting your pull request to make sure that your code passes these checks. If any of the checks come back with a red X, then do your best to address the errors.","title":"Contributing"},{"location":"contributing/#project-layout","text":"The latest stable release of TPOT is on the master branch , whereas the latest version of TPOT in development is on the development branch . Make sure you are looking at and working on the correct branch if you're looking to contribute code. In terms of directory structure: All of TPOT's code sources are in the tpot directory The documentation sources are in the docs_sources directory Images in the documentation are in the images directory Tutorials for TPOT are in the tutorials directory Unit tests for TPOT are in the tests.py file Make sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the development branch.","title":"Project layout"},{"location":"contributing/#how-to-contribute","text":"The preferred way to contribute to TPOT is to fork the main repository on GitHub: Fork the project repository : click on the 'Fork' button near the top of the page. This creates a copy of the code under your account on the GitHub server. Clone this copy to your local disk: $ git clone git@github.com:YourUsername/tpot.git $ cd tpot Create a branch to hold your changes: $ git checkout -b my-contribution Make sure your local environment is setup correctly for development. Installation instructions are almost identical to the user instructions except that TPOT should not be installed. If you have TPOT installed on your computer then make sure you are using a virtual environment that does not have TPOT installed. Furthermore, you should make sure you have installed the nose package into your development environment so that you can test changes locally. $ conda install nose Start making changes on your newly created branch, remembering to never work on the master branch! Work on this copy on your computer using Git to do the version control. Once some changes are saved locally, you can use your tweaked version of TPOT by navigating to the project's base directory and running TPOT directly from the command line: $ python -m tpot.driver or by running script that imports and uses the TPOT module with code similar to from tpot import TPOTClassifier To check your changes haven't broken any existing tests and to check new tests you've added pass run the following (note, you must have the nose package installed within your dev environment for this to work): $ nosetests -s -v When you're done editing and local testing, run: $ git add modified_files $ git commit to record your changes in Git, then push them to GitHub with: $ git push -u origin my-contribution Finally, go to the web page of your fork of the TPOT repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. Make sure that you send your PR to the development branch, as the master branch is reserved for the latest stable release. This will start the CI server to check all the project's unit tests run and send an email to the maintainers. (If any of the above seems like magic to you, then look up the Git documentation on the web.)","title":"How to contribute"},{"location":"contributing/#before-submitting-your-pull-request","text":"Before you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes. If your contribution changes TPOT in any way: Update the documentation so all of your changes are reflected there. Update the README if anything there has changed. If your contribution involves any code changes: Update the project unit tests to test your code changes. Make sure that your code is properly commented with docstrings and comments explaining your rationale behind non-obvious coding practices. If your code affected any of the pipeline operators, make sure that the corresponding export functionality reflects those changes. If your contribution requires a new library dependency: Double-check that the new dependency is easy to install via pip or Anaconda and supports both Python 2 and 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep TPOT easy to install. Add the required version of the library to .travis.yml Add a line to pip install the library to .travis_install.sh Add a line to print the version of the library to .travis_install.sh Similarly add a line to print the version of the library to .travis_test.sh","title":"Before submitting your pull request"},{"location":"contributing/#after-submitting-your-pull-request","text":"After submitting your pull request, Travis-CI will automatically run unit tests on your changes and make sure that your updated code builds and runs on Python 2 and 3. We also use services that automatically check code quality and test coverage. Check back shortly after submitting your pull request to make sure that your code passes these checks. If any of the checks come back with a red X, then do your best to address the errors.","title":"After submitting your pull request"},{"location":"examples/","text":"Overview The following sections illustrate the usage of TPOT with various datasets, each belonging to a typical class of machine learning tasks. Dataset Task Task class Dataset description Jupyter notebook Iris flower classification classification link link MNIST digit recognition (image) classification link link Boston housing prices modeling regression link N/A Titanic survival analysis classification link link Bank Marketing subscription prediction classification link link MAGIC Gamma Telescope event detection classification link link Notes: - For details on how the fit() , score() and export() methods work, refer to the usage documentation . - Upon re-running the experiments, your resulting pipelines may differ (to some extent) from the ones demonstrated here. Iris flower classification The following code illustrates how TPOT can be employed for performing a simple classification task over the Iris dataset. from tpot import TPOTClassifier from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import numpy as np iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64), iris.target.astype(np.float64), train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_iris_pipeline.py') Running this code should discover a pipeline (exported as tpot_iris_pipeline.py ) that achieves about 97% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline from sklearn.preprocessing import Normalizer # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = make_pipeline( Normalizer(), GaussianNB() ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) MNIST digit recognition Below is a minimal working example with the practice MNIST dataset, which is an image classification problem . from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Running this code should discover a pipeline (exported as tpot_digits_pipeline.py ) that achieves about 98% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = KNeighborsClassifier(n_neighbors=6, weights=\"distance\") exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) Boston housing prices modeling The following code illustrates how TPOT can be employed for performing a regression task over the Boston housing prices dataset. from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split housing = load_boston() X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Running this code should discover a pipeline (exported as tpot_boston_pipeline.py ) that achieves at least 10 mean squared error (MSE) on the test set: import numpy as np from sklearn.ensemble import GradientBoostingRegressor from sklearn.model_selection import train_test_split # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = GradientBoostingRegressor(alpha=0.85, learning_rate=0.1, loss=\"ls\", max_features=0.9, min_samples_leaf=5, min_samples_split=6) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) Titanic survival analysis To see the TPOT applied the Titanic Kaggle dataset, see the Jupyter notebook here . This example shows how to take a messy dataset and preprocess it such that it can be used in scikit-learn and TPOT. Portuguese Bank Marketing The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here . MAGIC Gamma Telescope The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here .","title":"Examples"},{"location":"examples/#overview","text":"The following sections illustrate the usage of TPOT with various datasets, each belonging to a typical class of machine learning tasks. Dataset Task Task class Dataset description Jupyter notebook Iris flower classification classification link link MNIST digit recognition (image) classification link link Boston housing prices modeling regression link N/A Titanic survival analysis classification link link Bank Marketing subscription prediction classification link link MAGIC Gamma Telescope event detection classification link link Notes: - For details on how the fit() , score() and export() methods work, refer to the usage documentation . - Upon re-running the experiments, your resulting pipelines may differ (to some extent) from the ones demonstrated here.","title":"Overview"},{"location":"examples/#iris-flower-classification","text":"The following code illustrates how TPOT can be employed for performing a simple classification task over the Iris dataset. from tpot import TPOTClassifier from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import numpy as np iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64), iris.target.astype(np.float64), train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_iris_pipeline.py') Running this code should discover a pipeline (exported as tpot_iris_pipeline.py ) that achieves about 97% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline from sklearn.preprocessing import Normalizer # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = make_pipeline( Normalizer(), GaussianNB() ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)","title":"Iris flower classification"},{"location":"examples/#mnist-digit-recognition","text":"Below is a minimal working example with the practice MNIST dataset, which is an image classification problem . from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Running this code should discover a pipeline (exported as tpot_digits_pipeline.py ) that achieves about 98% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = KNeighborsClassifier(n_neighbors=6, weights=\"distance\") exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)","title":"MNIST digit recognition"},{"location":"examples/#boston-housing-prices-modeling","text":"The following code illustrates how TPOT can be employed for performing a regression task over the Boston housing prices dataset. from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split housing = load_boston() X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Running this code should discover a pipeline (exported as tpot_boston_pipeline.py ) that achieves at least 10 mean squared error (MSE) on the test set: import numpy as np from sklearn.ensemble import GradientBoostingRegressor from sklearn.model_selection import train_test_split # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = GradientBoostingRegressor(alpha=0.85, learning_rate=0.1, loss=\"ls\", max_features=0.9, min_samples_leaf=5, min_samples_split=6) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)","title":"Boston housing prices modeling"},{"location":"examples/#titanic-survival-analysis","text":"To see the TPOT applied the Titanic Kaggle dataset, see the Jupyter notebook here . This example shows how to take a messy dataset and preprocess it such that it can be used in scikit-learn and TPOT.","title":"Titanic survival analysis"},{"location":"examples/#portuguese-bank-marketing","text":"The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here .","title":"Portuguese Bank Marketing"},{"location":"examples/#magic-gamma-telescope","text":"The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here .","title":"MAGIC Gamma Telescope"},{"location":"installing/","text":"TPOT is built on top of several existing Python libraries, including: NumPy SciPy scikit-learn DEAP update_checker tqdm stopit pandas joblib Most of the necessary Python packages can be installed via the Anaconda Python distribution , which we strongly recommend that you use. We also strongly recommend that you use of Python 3 over Python 2 if you're given the choice. NumPy, SciPy, scikit-learn, pandas and joblib can be installed in Anaconda via the command: conda install numpy scipy scikit-learn pandas joblib DEAP, update_checker, tqdm and stopit can be installed with pip via the command: pip install deap update_checker tqdm stopit For the Windows users , the pywin32 module is required if Python is NOT installed via the Anaconda Python distribution and can be installed with pip for Python verion <=3.3 or conda (e.g. miniconda) for any Python version: conda install pywin32 Optionally , you can install XGBoost if you would like TPOT to use the eXtreme Gradient Boosting models. XGBoost is entirely optional, and TPOT will still function normally without XGBoost if you do not have it installed. Windows users: pip installation may not work on some Windows environments, and it may cause unexpected errors. pip install xgboost If you have issues installing XGBoost, check the XGBoost installation documentation . If you plan to use Dask for parallel training, make sure to install dask[delay] and dask_ml . pip install dask[delayed] dask-ml If you plan to use the TPOT-MDR configuration , make sure to install scikit-mdr and scikit-rebate : pip install scikit-mdr skrebate Finally to install TPOT itself, run the following command: pip install tpot Please file a new issue if you run into installation problems.","title":"Installation"},{"location":"related/","text":"Other Automated Machine Learning (AutoML) tools and related projects: Name Language License Description Auto-WEKA Java GPL-v3 Automated model selection and hyper-parameter tuning for Weka models. auto-sklearn Python BSD-3-Clause An automated machine learning toolkit and a drop-in replacement for a scikit-learn estimator. auto_ml Python MIT Automated machine learning for analytics & production. Supports manual feature type declarations. H2O AutoML Java with Python, Scala & R APIs and web GUI Apache 2.0 Automated: data prep, hyperparameter tuning, random grid search and stacked ensembles in a distributed ML platform. devol Python MIT Automated deep neural network design via genetic programming. MLBox Python BSD-3-Clause Accurate hyper-parameter optimization in high-dimensional space with support for distributed computing. Recipe C GPL-v3 Machine-learning pipeline optimization through genetic programming. Uses grammars to define pipeline structure. Xcessiv Python Apache 2.0 A web-based application for quick, scalable, and automated hyper-parameter tuning and stacked ensembling in Python. GAMA Python Apache 2.0 Machine-learning pipeline optimization through asynchronous evaluation based genetic programming.","title":"Related"},{"location":"releases/","text":"Version 0.9 TPOT now supports sparse matrices with a new built-in TPOT configuration, \"TPOT sparse\". We are using a custom OneHotEncoder implementation that supports missing values and continuous features. We have added an \"early stopping\" option for stopping the optimization process if no improvement is made within a set number of generations. Look up the early_stop parameter to access this functionality. TPOT now reduces the number of duplicated pipelines between generations, which saves you time during the optimization process. TPOT now supports custom scoring functions via the command-line mode. We have added a new optional argument, periodic_checkpoint_folder , that allows TPOT to periodically save the best pipeline so far to a local folder during optimization process. TPOT no longer uses sklearn.externals.joblib when n_jobs=1 to avoid the potential freezing issue that scikit-learn suffers from . We have added pandas as a dependency to read input datasets instead of numpy.recfromcsv . NumPy's recfromcsv function is unable to parse datasets with complex data types. Fixed a bug that DEFAULT in the parameter(s) of nested estimator raises KeyError when exporting pipelines. Fixed a bug related to setting random_state in nested estimators. The issue would happen with pipeline with SelectFromModel ( ExtraTreesClassifier as nested estimator) or StackingEstimator if nested estimator has random_state parameter. Fixed a bug in the missing value imputation function in TPOT to impute along columns instead rows. Refined input checking for sparse matrices in TPOT. Refined the TPOT pipeline mutation operator. Version 0.8 TPOT now detects whether there are missing values in your dataset and replaces them with the median value of the column. TPOT now allows you to set a group parameter in the fit function so you can use the GroupKFold cross-validation strategy. TPOT now allows you to set a subsample ratio of the training instance with the subsample parameter. For example, setting subsample =0.5 tells TPOT to create a fixed subsample of half of the training data for the pipeline optimization process. This parameter can be useful for speeding up the pipeline optimization process, but may give less accurate performance estimates from cross-validation. TPOT now has more built-in configurations , including TPOT MDR and TPOT light, for both classification and regression problems. TPOTClassifier and TPOTRegressor now expose three useful internal attributes, fitted_pipeline_ , pareto_front_fitted_pipelines_ , and evaluated_individuals_ . These attributes are described in the API documentation . Oh, TPOT now has thorough API documentation . Check it out! Fixed a reproducibility issue where setting random_seed didn't necessarily result in the same results every time. This bug was present since TPOT v0.7. Refined input checking in TPOT. Removed Python 2 uncompliant code. Version 0.7 TPOT now has multiprocessing support. TPOT allows you to use multiple processes in parallel to accelerate the pipeline optimization process in TPOT with the n_jobs parameter. TPOT now allows you to customize the operators and parameters considered during the optimization process , which can be accomplished with the new config_dict parameter. The format of this customized dictionary can be found in the online documentation , along with a list of built-in configurations . TPOT now allows you to specify a time limit for evaluating a single pipeline (default limit is 5 minutes) in optimization process with the max_eval_time_mins parameter, so TPOT won't spend hours evaluating overly-complex pipelines. We tweaked TPOT's underlying evolutionary optimization algorithm to work even better, including using the mu+lambda algorithm . This algorithm gives you more control of how many pipelines are generated every iteration with the offspring_size parameter. Refined the default operators and parameters in TPOT, so TPOT 0.7 should work even better than 0.6. TPOT now supports sample weights in the fitness function if some if your samples are more important to classify correctly than others. The sample weights option works the same as in scikit-learn, e.g., tpot.fit(x_train, y_train, sample_weights=sample_weights) . The default scoring metric in TPOT has been changed from balanced accuracy to accuracy, the same default metric for classification algorithms in scikit-learn. Balanced accuracy can still be used by setting scoring='balanced_accuracy' when creating a TPOT instance. Version 0.6 TPOT now supports regression problems! We have created two separate TPOTClassifier and TPOTRegressor classes to support classification and regression problems, respectively. The command-line interface also supports this feature through the -mode parameter. TPOT now allows you to specify a time limit for the optimization process with the max_time_mins parameter, so you don't need to guess how long TPOT will take any more to recommend a pipeline to you. Added a new operator that performs feature selection using ExtraTrees feature importance scores. XGBoost has been added as an optional dependency to TPOT. If you have XGBoost installed, TPOT will automatically detect your installation and use the XGBoostClassifier and XGBoostRegressor in its pipelines. TPOT now offers a verbosity level of 3 (\"science mode\"), which outputs the entire Pareto front instead of only the current best score. This feature may be useful for users looking to make a trade-off between pipeline complexity and score. Version 0.5 Major refactor: Each operator is defined in a separate class file. Hooray for easier-to-maintain code! TPOT now exports directly to scikit-learn Pipelines instead of hacky code. Internal representation of individuals now uses scikit-learn pipelines. Parameters for each operator have been optimized so TPOT spends less time exploring useless parameters. We have removed pandas as a dependency and instead use numpy matrices to store the data. TPOT now uses k-fold cross-validation when evaluating pipelines, with a default k = 3. This k parameter can be tuned when creating a new TPOT instance. Improved scoring function support : Even though TPOT uses balanced accuracy by default, you can now have TPOT use any of the scoring functions that cross_val_score supports. Added the scikit-learn Normalizer preprocessor. Minor text fixes. Version 0.4 In TPOT 0.4, we've made some major changes to the internals of TPOT and added some convenience functions. We've summarized the changes below. Added new sklearn models and preprocessors AdaBoostClassifier BernoulliNB ExtraTreesClassifier GaussianNB MultinomialNB LinearSVC PassiveAggressiveClassifier GradientBoostingClassifier RBFSampler FastICA FeatureAgglomeration Nystroem Added operator that inserts virtual features for the count of features with values of zero Reworked parameterization of TPOT operators Reduced parameter search space with information from a scikit-learn benchmark TPOT no longer generates arbitrary parameter values, but uses a fixed parameter set instead Removed XGBoost as a dependency Too many users were having install issues with XGBoost Replaced with scikit-learn's GradientBoostingClassifier Improved descriptiveness of TPOT command line parameter documentation Removed min/max/avg details during fit() when verbosity > 1 Replaced with tqdm progress bar Added tqdm as a dependency Added fit_predict() convenience function Added get_params() function so TPOT can operate in scikit-learn's cross_val_score & related functions Version 0.3 We revised the internal optimization process of TPOT to make it more efficient, in particular in regards to the model parameters that TPOT optimizes over. Version 0.2 TPOT now has the ability to export the optimized pipelines to sklearn code. Logistic regression, SVM, and k-nearest neighbors classifiers were added as pipeline operators. Previously, TPOT only included decision tree and random forest classifiers. TPOT can now use arbitrary scoring functions for the optimization process. TPOT now performs multi-objective Pareto optimization to balance model complexity (i.e., # of pipeline operators) and the score of the pipeline. Version 0.1 First public release of TPOT. Optimizes pipelines with decision trees and random forest classifiers as the model, and uses a handful of feature preprocessors.","title":"Release Notes"},{"location":"releases/#version-09","text":"TPOT now supports sparse matrices with a new built-in TPOT configuration, \"TPOT sparse\". We are using a custom OneHotEncoder implementation that supports missing values and continuous features. We have added an \"early stopping\" option for stopping the optimization process if no improvement is made within a set number of generations. Look up the early_stop parameter to access this functionality. TPOT now reduces the number of duplicated pipelines between generations, which saves you time during the optimization process. TPOT now supports custom scoring functions via the command-line mode. We have added a new optional argument, periodic_checkpoint_folder , that allows TPOT to periodically save the best pipeline so far to a local folder during optimization process. TPOT no longer uses sklearn.externals.joblib when n_jobs=1 to avoid the potential freezing issue that scikit-learn suffers from . We have added pandas as a dependency to read input datasets instead of numpy.recfromcsv . NumPy's recfromcsv function is unable to parse datasets with complex data types. Fixed a bug that DEFAULT in the parameter(s) of nested estimator raises KeyError when exporting pipelines. Fixed a bug related to setting random_state in nested estimators. The issue would happen with pipeline with SelectFromModel ( ExtraTreesClassifier as nested estimator) or StackingEstimator if nested estimator has random_state parameter. Fixed a bug in the missing value imputation function in TPOT to impute along columns instead rows. Refined input checking for sparse matrices in TPOT. Refined the TPOT pipeline mutation operator.","title":"Version 0.9"},{"location":"releases/#version-08","text":"TPOT now detects whether there are missing values in your dataset and replaces them with the median value of the column. TPOT now allows you to set a group parameter in the fit function so you can use the GroupKFold cross-validation strategy. TPOT now allows you to set a subsample ratio of the training instance with the subsample parameter. For example, setting subsample =0.5 tells TPOT to create a fixed subsample of half of the training data for the pipeline optimization process. This parameter can be useful for speeding up the pipeline optimization process, but may give less accurate performance estimates from cross-validation. TPOT now has more built-in configurations , including TPOT MDR and TPOT light, for both classification and regression problems. TPOTClassifier and TPOTRegressor now expose three useful internal attributes, fitted_pipeline_ , pareto_front_fitted_pipelines_ , and evaluated_individuals_ . These attributes are described in the API documentation . Oh, TPOT now has thorough API documentation . Check it out! Fixed a reproducibility issue where setting random_seed didn't necessarily result in the same results every time. This bug was present since TPOT v0.7. Refined input checking in TPOT. Removed Python 2 uncompliant code.","title":"Version 0.8"},{"location":"releases/#version-07","text":"TPOT now has multiprocessing support. TPOT allows you to use multiple processes in parallel to accelerate the pipeline optimization process in TPOT with the n_jobs parameter. TPOT now allows you to customize the operators and parameters considered during the optimization process , which can be accomplished with the new config_dict parameter. The format of this customized dictionary can be found in the online documentation , along with a list of built-in configurations . TPOT now allows you to specify a time limit for evaluating a single pipeline (default limit is 5 minutes) in optimization process with the max_eval_time_mins parameter, so TPOT won't spend hours evaluating overly-complex pipelines. We tweaked TPOT's underlying evolutionary optimization algorithm to work even better, including using the mu+lambda algorithm . This algorithm gives you more control of how many pipelines are generated every iteration with the offspring_size parameter. Refined the default operators and parameters in TPOT, so TPOT 0.7 should work even better than 0.6. TPOT now supports sample weights in the fitness function if some if your samples are more important to classify correctly than others. The sample weights option works the same as in scikit-learn, e.g., tpot.fit(x_train, y_train, sample_weights=sample_weights) . The default scoring metric in TPOT has been changed from balanced accuracy to accuracy, the same default metric for classification algorithms in scikit-learn. Balanced accuracy can still be used by setting scoring='balanced_accuracy' when creating a TPOT instance.","title":"Version 0.7"},{"location":"releases/#version-06","text":"TPOT now supports regression problems! We have created two separate TPOTClassifier and TPOTRegressor classes to support classification and regression problems, respectively. The command-line interface also supports this feature through the -mode parameter. TPOT now allows you to specify a time limit for the optimization process with the max_time_mins parameter, so you don't need to guess how long TPOT will take any more to recommend a pipeline to you. Added a new operator that performs feature selection using ExtraTrees feature importance scores. XGBoost has been added as an optional dependency to TPOT. If you have XGBoost installed, TPOT will automatically detect your installation and use the XGBoostClassifier and XGBoostRegressor in its pipelines. TPOT now offers a verbosity level of 3 (\"science mode\"), which outputs the entire Pareto front instead of only the current best score. This feature may be useful for users looking to make a trade-off between pipeline complexity and score.","title":"Version 0.6"},{"location":"releases/#version-05","text":"Major refactor: Each operator is defined in a separate class file. Hooray for easier-to-maintain code! TPOT now exports directly to scikit-learn Pipelines instead of hacky code. Internal representation of individuals now uses scikit-learn pipelines. Parameters for each operator have been optimized so TPOT spends less time exploring useless parameters. We have removed pandas as a dependency and instead use numpy matrices to store the data. TPOT now uses k-fold cross-validation when evaluating pipelines, with a default k = 3. This k parameter can be tuned when creating a new TPOT instance. Improved scoring function support : Even though TPOT uses balanced accuracy by default, you can now have TPOT use any of the scoring functions that cross_val_score supports. Added the scikit-learn Normalizer preprocessor. Minor text fixes.","title":"Version 0.5"},{"location":"releases/#version-04","text":"In TPOT 0.4, we've made some major changes to the internals of TPOT and added some convenience functions. We've summarized the changes below. Added new sklearn models and preprocessors AdaBoostClassifier BernoulliNB ExtraTreesClassifier GaussianNB MultinomialNB LinearSVC PassiveAggressiveClassifier GradientBoostingClassifier RBFSampler FastICA FeatureAgglomeration Nystroem Added operator that inserts virtual features for the count of features with values of zero Reworked parameterization of TPOT operators Reduced parameter search space with information from a scikit-learn benchmark TPOT no longer generates arbitrary parameter values, but uses a fixed parameter set instead Removed XGBoost as a dependency Too many users were having install issues with XGBoost Replaced with scikit-learn's GradientBoostingClassifier Improved descriptiveness of TPOT command line parameter documentation Removed min/max/avg details during fit() when verbosity > 1 Replaced with tqdm progress bar Added tqdm as a dependency Added fit_predict() convenience function Added get_params() function so TPOT can operate in scikit-learn's cross_val_score & related functions","title":"Version 0.4"},{"location":"releases/#version-03","text":"We revised the internal optimization process of TPOT to make it more efficient, in particular in regards to the model parameters that TPOT optimizes over.","title":"Version 0.3"},{"location":"releases/#version-02","text":"TPOT now has the ability to export the optimized pipelines to sklearn code. Logistic regression, SVM, and k-nearest neighbors classifiers were added as pipeline operators. Previously, TPOT only included decision tree and random forest classifiers. TPOT can now use arbitrary scoring functions for the optimization process. TPOT now performs multi-objective Pareto optimization to balance model complexity (i.e., # of pipeline operators) and the score of the pipeline.","title":"Version 0.2"},{"location":"releases/#version-01","text":"First public release of TPOT. Optimizes pipelines with decision trees and random forest classifiers as the model, and uses a handful of feature preprocessors.","title":"Version 0.1"},{"location":"support/","text":"TPOT was developed in the Computational Genetics Lab at the University of Pennsylvania with funding from the NIH under grant R01 AI117694. We are incredibly grateful for the support of the NIH and the University of Pennsylvania during the development of this project. The TPOT logo was designed by Todd Newmuis, who generously donated his time to the project.","title":"Support"},{"location":"using/","text":"What to expect from AutoML software Automated machine learning (AutoML) takes a higher-level approach to machine learning than most practitioners are used to, so we've gathered a handful of guidelines on what to expect when running AutoML software such as TPOT. AutoML algorithms aren't intended to run for only a few minutes Of course, you can run TPOT for only a few minutes and it will find a reasonably good pipeline for your dataset. However, if you don't run TPOT for long enough, it may not find the best possible pipeline for your dataset. It may even not find any suitable pipeline at all, in which case a RuntimeError('A pipeline has not yet been optimized. Please call fit() first.') will be raised. Often it is worthwhile to run multiple instances of TPOT in parallel for a long time (hours to days) to allow TPOT to thoroughly search the pipeline space for your dataset. AutoML algorithms can take a long time to finish their search AutoML algorithms aren't as simple as fitting one model on the dataset; they are considering multiple machine learning algorithms (random forests, linear models, SVMs, etc.) in a pipeline with multiple preprocessing steps (missing value imputation, scaling, PCA, feature selection, etc.), the hyperparameters for all of the models and preprocessing steps, as well as multiple ways to ensemble or stack the algorithms within the pipeline. As such, TPOT will take a while to run on larger datasets, but it's important to realize why. With the default TPOT settings (100 generations with 100 population size), TPOT will evaluate 10,000 pipeline configurations before finishing. To put this number into context, think about a grid search of 10,000 hyperparameter combinations for a machine learning algorithm and how long that grid search will take. That is 10,000 model configurations to evaluate with 10-fold cross-validation, which means that roughly 100,000 models are fit and evaluated on the training data in one grid search. That's a time-consuming procedure, even for simpler models like decision trees. Typical TPOT runs will take hours to days to finish (unless it's a small dataset), but you can always interrupt the run partway through and see the best results so far. TPOT also provides a warm_start parameter that lets you restart a TPOT run from where it left off. AutoML algorithms can recommend different solutions for the same dataset If you're working with a reasonably complex dataset or run TPOT for a short amount of time, different TPOT runs may result in different pipeline recommendations. TPOT's optimization algorithm is stochastic in nature, which means that it uses randomness (in part) to search the possible pipeline space. When two TPOT runs recommend different pipelines, this means that the TPOT runs didn't converge due to lack of time or that multiple pipelines perform more-or-less the same on your dataset. This is actually an advantage over fixed grid search techniques: TPOT is meant to be an assistant that gives you ideas on how to solve a particular machine learning problem by exploring pipeline configurations that you might have never considered, then leaves the fine-tuning to more constrained parameter tuning techniques such as grid search. TPOT with code We've taken care to design the TPOT interface to be as similar as possible to scikit-learn. TPOT can be imported just like any regular Python module. To import TPOT, type: from tpot import TPOTClassifier then create an instance of TPOT as follows: pipeline_optimizer = TPOTClassifier() It's also possible to use TPOT for regression problems with the TPOTRegressor class. Other than the class name, a TPOTRegressor is used the same way as a TPOTClassifier . You can read more about the TPOTClassifier and TPOTRegressor classes in the API documentation . Some example code with custom TPOT parameters might look like: pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) Now TPOT is ready to optimize a pipeline for you. You can tell TPOT to optimize a pipeline based on a data set with the fit function: pipeline_optimizer.fit(X_train, y_train) The fit function initializes the genetic programming algorithm to find the highest-scoring pipeline based on average k-fold cross-validation Then, the pipeline is trained on the entire set of provided samples, and the TPOT instance can be used as a fitted model. You can then proceed to evaluate the final pipeline on the testing set with the score function: print(pipeline_optimizer.score(X_test, y_test)) Finally, you can tell TPOT to export the corresponding Python code for the optimized pipeline to a text file with the export function: pipeline_optimizer.export('tpot_exported_pipeline.py') Once this code finishes running, tpot_exported_pipeline.py will contain the Python code for the optimized pipeline. Below is a full example script using TPOT to optimize a pipeline, score it, and export the best pipeline to a file. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) pipeline_optimizer.fit(X_train, y_train) print(pipeline_optimizer.score(X_test, y_test)) pipeline_optimizer.export('tpot_exported_pipeline.py') Check our examples to see TPOT applied to some specific data sets. TPOT on the command line To use TPOT via the command line, enter the following command with a path to the data file: tpot /path_to/data_file.csv An example command-line call to TPOT may look like: tpot data/mnist.csv -is , -target class -o tpot_exported_pipeline.py -g 5 -p 20 -cv 5 -s 42 -v 2 TPOT offers several arguments that can be provided at the command line. To see brief descriptions of these arguments, enter the following command: tpot --help Detailed descriptions of the command-line arguments are below. Argument Parameter Valid values Effect -is INPUT_SEPARATOR Any string Character used to separate columns in the input file. -target TARGET_NAME Any string Name of the target column in the input file. -mode TPOT_MODE ['classification', 'regression'] Whether TPOT is being used for a supervised classification or regression problem. -o OUTPUT_FILE String path to a file File to export the code for the final optimized pipeline. -g GENERATIONS Any positive integer Number of iterations to run the pipeline optimization process. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -p POPULATION_SIZE Any positive integer Number of individuals to retain in the GP population every generation. Generally, TPOT will work better when you give it more individuals (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -os OFFSPRING_SIZE Any positive integer Number of offspring to produce in each GP generation. By default, OFFSPRING_SIZE = POPULATION_SIZE. -mr MUTATION_RATE [0.0, 1.0] GP mutation rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to apply random changes to every generation. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. -xr CROSSOVER_RATE [0.0, 1.0] GP crossover rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to \"breed\" every generation. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. -scoring SCORING_FN 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'my_module.scorer_name*' Function used to evaluate the quality of a given pipeline for the problem. By default, accuracy is used for classification and mean squared error (MSE) is used for regression. TPOT assumes that any function with \"error\" or \"loss\" in the name is meant to be minimized, whereas any other functions will be maximized. my_module.scorer_name: You can also specify your own function or a full python path to an existing one. See the section on scoring functions for more details. -cv CV Any integer > 1 Number of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process. -sub SUBSAMPLE (0.0, 1.0] Subsample ratio of the training instance. Setting it to 0.5 means that TPOT randomly collects half of training samples for pipeline optimization process. -njobs NUM_JOBS Any positive integer or -1 Number of CPUs for evaluating pipelines in parallel during the TPOT optimization process. Assigning this to -1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. -maxtime MAX_TIME_MINS Any positive integer How many minutes TPOT has to optimize the pipeline. If provided, this setting will override the \"generations\" parameter and allow TPOT to run until it runs out of time. -maxeval MAX_EVAL_MINS Any positive float How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to consider more complex pipelines but will also allow TPOT to run longer. -s RANDOM_STATE Any positive integer Random number generator seed for reproducibility. Set this seed if you want your TPOT run to be reproducible with the same seed and data set in the future. -config CONFIG_FILE String or file path Operators and parameter configurations in TPOT: Path for configuration file: TPOT will use the path to a configuration file for customizing the operators and parameters that TPOT uses in the optimization process string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. -template TEMPLATE String Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. -memory MEMORY String or file path If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. Memory caching mode in TPOT: Path for a caching directory: TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown. string 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown. -cf CHECKPOINT_FOLDER Folder path If supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing. This is useful in multiple cases: sudden death before tpot could save an optimized pipeline progress tracking grabbing a pipeline while tpot is working Example: mkdir my_checkpoints -cf ./my_checkpoints -es EARLY_STOP Any positive integer How many generations TPOT checks whether there is no improvement in optimization process. End optimization process if there is no improvement in the set number of generations. -v VERBOSITY {0, 1, 2, 3} How much information TPOT communicates while it is running. 0 = none, 1 = minimal, 2 = high, 3 = all. A setting of 2 or higher will add a progress bar during the optimization procedure. --no-update-check Flag indicating whether the TPOT version checker should be disabled. --version Show TPOT's version number and exit. --help Show TPOT's help documentation and exit. Scoring functions TPOT makes use of sklearn.model_selection.cross_val_score for evaluating pipelines, and as such offers the same support for scoring functions. There are two ways to make use of scoring functions with TPOT: You can pass in a string to the scoring parameter from the list above. Any other strings will cause TPOT to throw an exception. You can pass the callable object/function with signature scorer(estimator, X, y) , where estimator is trained estimator to use for scoring, X are features that will be passed to estimator.predict and y are target values for X . To do this, you should implement your own function. See the example below for further explanation. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split from sklearn.metrics.scorer import make_scorer digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) # Make a custom metric function def my_custom_accuracy(y_true, y_pred): return float(sum(y_pred == y_true)) / len(y_true) # Make a custom a scorer from the custom metric function # Note: greater_is_better=False in make_scorer below would mean that the scoring function should be minimized. my_custom_scorer = make_scorer(my_custom_accuracy, greater_is_better=True) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, scoring=my_custom_scorer) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') You can pass a metric function with the signature score_func(y_true, y_pred) (e.g. my_custom_accuracy in the example above), where y_true are the true target values and y_pred are the predicted target values from an estimator. To do this, you should implement your own function. See the example above for further explanation. TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized ( greater_is_better=False in make_scorer ), whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. my_module.scorer_name : You can also use a custom score_func(y_true, y_pred) or scorer(estimator, X, y) function through the command line by adding the argument -scoring my_module.scorer to your command-line call. TPOT will import your module and use the custom scoring function from there. TPOT will include your current working directory when importing the module, so you can place it in the same directory where you are going to run TPOT. Example: -scoring sklearn.metrics.auc will use the function auc from sklearn.metrics module. Built-in TPOT configurations TPOT comes with a handful of default operators and parameter configurations that we believe work well for optimizing machine learning pipelines. Below is a list of the current built-in configurations that come with TPOT. Configuration Name Description Operators Default TPOT TPOT will search over a broad range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Some of these operators are complex and may take a long time to run, especially on larger datasets. Note: This is the default configuration for TPOT. To use this configuration, use the default value (None) for the config_dict parameter. Classification Regression TPOT light TPOT will search over a restricted range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Only simpler and fast-running operators will be used in these pipelines, so TPOT light is useful for finding quick and simple pipelines for a classification or regression problem. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression TPOT MDR TPOT will search over a series of feature selectors and Multifactor Dimensionality Reduction models to find a series of operators that maximize prediction accuracy. The TPOT MDR configuration is specialized for genome-wide association studies (GWAS) , and is described in detail online here . Note that TPOT MDR may be slow to run because the feature selection routines are computationally expensive, especially on large datasets. Classification Regression TPOT sparse TPOT uses a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression To use any of these configurations, simply pass the string name of the configuration to the config_dict parameter (or -config on the command line). For example, to use the \"TPOT light\" configuration: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict='TPOT light') tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Customizing TPOT's operators and parameters Beyond the default configurations that come with TPOT, in some cases it is useful to limit the algorithms and parameters that TPOT considers. For that reason, we allow users to provide TPOT with a custom configuration for its operators and parameters. The custom TPOT configuration must be in nested dictionary format, where the first level key is the path and name of the operator (e.g., sklearn.naive_bayes.MultinomialNB ) and the second level key is the corresponding parameter name for that operator (e.g., fit_prior ). The second level key should point to a list of parameter values for that parameter, e.g., 'fit_prior': [True, False] . For a simple example, the configuration could be: tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } in which case TPOT would only consider pipelines containing GaussianNB , BernoulliNB , MultinomialNB , and tune those algorithm's parameters in the ranges provided. This dictionary can be passed directly within the code to the TPOTClassifier / TPOTRegressor config_dict parameter, described above. For example: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict=tpot_config) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Command-line users must create a separate .py file with the custom configuration and provide the path to the file to the tpot call. For example, if the simple example configuration above is saved in tpot_classifier_config.py , that configuration could be used on the command line with the command: tpot data/mnist.csv -is , -target class -config tpot_classifier_config.py -g 5 -p 20 -v 2 -o tpot_exported_pipeline.py When using the command-line interface, the configuration file specified in the -config parameter must name its custom TPOT configuration tpot_config . Otherwise, TPOT will not be able to locate the configuration dictionary. For more detailed examples of how to customize TPOT's operator configuration, see the default configurations for classification and regression in TPOT's source code. Note that you must have all of the corresponding packages for the operators installed on your computer, otherwise TPOT will not be able to use them. For example, if XGBoost is not installed on your computer, then TPOT will simply not import nor use XGBoost in the pipelines it considers. Template option in TPOT Template option provides a way to specify a desired structure for machine learning pipeline, which may reduce TPOT computation time and potentially provide more interpretable results. Current implementation only supports linear pipelines. Below is a simple example to use template option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of SelectorMixin ), 2nd step is a feature transformer (a subclass of TransformerMixin ) and 3rd step is a classifier for classification (a subclass of ClassifierMixin ). The last step must be Classifier for TPOTClassifier 's template but Regressor for TPOTRegressor . Note: although SelectorMixin is subclass of TransformerMixin in scikit-leawrn, but Transformer in this option excludes those subclasses of SelectorMixin . tpot_obj = TPOTClassifier( template='Selector-Transformer-Classifier' ) If a specific operator, e.g. SelectPercentile , is prefered to used in the 1st step of pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'. FeatureSetSelector in TPOT FeatureSetSelector is a special new operator in TPOT. This operator enables feature selection based on priori export knowledge. For example, in RNA-seq gene expression analysis, this operator can be used to select one or more gene (feature) set(s) based on GO (Gene Ontology) terms or annotated gene sets Molecular Signatures Database ( MSigDB ) in the 1st step of pipeline via template option above, in order to reduce dimensions and TPOT computation time. This operator requires a dataset list in csv format. In this csv file, there are only three columns: 1st column is feature set names, 2nd column is the total number of features in one set and 3rd column is a list of feature names (if input X is pandas.DataFrame) or indexes (if input X is numpy.ndarray) delimited by \";\". Below is a example how to use this operator in TPOT. Please check our preprint paper for more details. from tpot import TPOTClassifier import numpy as np import pandas as pd from tpot.config import classifier_config_dict test_data = pd.read_csv(\"https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/tests.csv\") test_X = test_data.drop(\"class\", axis=1) test_y = test_data['class'] # add FeatureSetSelector into tpot configuration classifier_config_dict['tpot.builtins.FeatureSetSelector'] = { 'subset_list': ['https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/subset_test.csv'], 'sel_subset': [0,1] # select only one feature set, a list of index of subset in the list above #'sel_subset': list(combinations(range(3), 2)) # select two feature sets } tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, template='FeatureSetSelector-Transformer-Classifier', config_dict=classifier_config_dict) tpot.fit(test_X, test_y) Pipeline caching in TPOT With the memory parameter, pipelines can cache the results of each transformer after fitting them. This feature is used to avoid repeated computation by transformers within a pipeline if the parameters and input data are identical to another fitted pipeline during optimization process. TPOT allows users to specify a custom directory path or joblib.Memory in case they want to re-use the memory cache in future TPOT runs (or a warm_start run). There are three methods for enabling memory caching in TPOT: from tpot import TPOTClassifier from tempfile import mkdtemp from joblib import Memory from shutil import rmtree # Method 1, auto mode: TPOT uses memory caching with a temporary directory and cleans it up upon shutdown tpot = TPOTClassifier(memory='auto') # Method 2, with a custom directory for memory caching tpot = TPOTClassifier(memory='/to/your/path') # Method 3, with a Memory object cachedir = mkdtemp() # Create a temporary folder memory = Memory(cachedir=cachedir, verbose=0) tpot = TPOTClassifier(memory=memory) # Clear the cache directory when you don't need it anymore rmtree(cachedir) Note: TPOT does NOT clean up memory caches if users set a custom directory path or Memory object. We recommend that you clean up the memory caches when you don't need it anymore. Crash/freeze issue with n_jobs > 1 under OSX or Linux Internally, TPOT uses joblib to fit estimators in parallel. This is the same parallelization framework used by scikit-learn. But it may crash/freeze with n_jobs > 1 under OSX or Linux as scikit-learn does , especially with large datasets. One solution is to configure Python's multiprocessing module to use the forkserver start method (instead of the default fork ) to manage the process pools. You can enable the forkserver mode globally for your program by putting the following codes into your main script: import multiprocessing # other imports, custom code, load data, define model... if __name__ == '__main__': multiprocessing.set_start_method('forkserver') # call scikit-learn utils or tpot utils with n_jobs > 1 here More information about these start methods can be found in the multiprocessing documentation . Parallel Training with Dask For large problems or working on Jupyter notebook, we highly recommend that you can distribute the work on a Dask cluster. The dask-examples binder has a runnable example with a small dask cluster. To use your Dask cluster to fit a TPOT model, specify the use_dask keyword when you create the TPOT estimator. Note: if use_dask=True , TPOT will use as many cores as available on the your Dask cluster. If n_jobs is specified, then it will control the chunk size (10* n_jobs if it is less then offspring size) of parallel training. estimator = TPOTEstimator(use_dask=True, n_jobs=-1) This will use use all the workers on your cluster to do the training, and use Dask-ML's pipeline rewriting to avoid re-fitting estimators multiple times on the same set of data. It will also provide fine-grained diagnostics in the distributed scheduler UI . Alternatively, Dask implements a joblib backend. You can instruct TPOT to use the distributed backend during training by specifying a joblib.parallel_backend : import joblib import distributed.joblib from dask.distributed import Client # connect to the cluster client = Client('schedueler-address') # create the estimator normally estimator = TPOTClassifier(n_jobs=-1) # perform the fit in this context manager with joblib.parallel_backend(\"dask\"): estimator.fit(X, y) See dask's distributed joblib integration for more.","title":"Using TPOT"},{"location":"using/#what-to-expect-from-automl-software","text":"Automated machine learning (AutoML) takes a higher-level approach to machine learning than most practitioners are used to, so we've gathered a handful of guidelines on what to expect when running AutoML software such as TPOT.","title":"What to expect from AutoML software"},{"location":"using/#tpot-with-code","text":"We've taken care to design the TPOT interface to be as similar as possible to scikit-learn. TPOT can be imported just like any regular Python module. To import TPOT, type: from tpot import TPOTClassifier then create an instance of TPOT as follows: pipeline_optimizer = TPOTClassifier() It's also possible to use TPOT for regression problems with the TPOTRegressor class. Other than the class name, a TPOTRegressor is used the same way as a TPOTClassifier . You can read more about the TPOTClassifier and TPOTRegressor classes in the API documentation . Some example code with custom TPOT parameters might look like: pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) Now TPOT is ready to optimize a pipeline for you. You can tell TPOT to optimize a pipeline based on a data set with the fit function: pipeline_optimizer.fit(X_train, y_train) The fit function initializes the genetic programming algorithm to find the highest-scoring pipeline based on average k-fold cross-validation Then, the pipeline is trained on the entire set of provided samples, and the TPOT instance can be used as a fitted model. You can then proceed to evaluate the final pipeline on the testing set with the score function: print(pipeline_optimizer.score(X_test, y_test)) Finally, you can tell TPOT to export the corresponding Python code for the optimized pipeline to a text file with the export function: pipeline_optimizer.export('tpot_exported_pipeline.py') Once this code finishes running, tpot_exported_pipeline.py will contain the Python code for the optimized pipeline. Below is a full example script using TPOT to optimize a pipeline, score it, and export the best pipeline to a file. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) pipeline_optimizer.fit(X_train, y_train) print(pipeline_optimizer.score(X_test, y_test)) pipeline_optimizer.export('tpot_exported_pipeline.py') Check our examples to see TPOT applied to some specific data sets.","title":"TPOT with code"},{"location":"using/#tpot-on-the-command-line","text":"To use TPOT via the command line, enter the following command with a path to the data file: tpot /path_to/data_file.csv An example command-line call to TPOT may look like: tpot data/mnist.csv -is , -target class -o tpot_exported_pipeline.py -g 5 -p 20 -cv 5 -s 42 -v 2 TPOT offers several arguments that can be provided at the command line. To see brief descriptions of these arguments, enter the following command: tpot --help Detailed descriptions of the command-line arguments are below. Argument Parameter Valid values Effect -is INPUT_SEPARATOR Any string Character used to separate columns in the input file. -target TARGET_NAME Any string Name of the target column in the input file. -mode TPOT_MODE ['classification', 'regression'] Whether TPOT is being used for a supervised classification or regression problem. -o OUTPUT_FILE String path to a file File to export the code for the final optimized pipeline. -g GENERATIONS Any positive integer Number of iterations to run the pipeline optimization process. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -p POPULATION_SIZE Any positive integer Number of individuals to retain in the GP population every generation. Generally, TPOT will work better when you give it more individuals (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -os OFFSPRING_SIZE Any positive integer Number of offspring to produce in each GP generation. By default, OFFSPRING_SIZE = POPULATION_SIZE. -mr MUTATION_RATE [0.0, 1.0] GP mutation rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to apply random changes to every generation. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. -xr CROSSOVER_RATE [0.0, 1.0] GP crossover rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to \"breed\" every generation. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. -scoring SCORING_FN 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'my_module.scorer_name*' Function used to evaluate the quality of a given pipeline for the problem. By default, accuracy is used for classification and mean squared error (MSE) is used for regression. TPOT assumes that any function with \"error\" or \"loss\" in the name is meant to be minimized, whereas any other functions will be maximized. my_module.scorer_name: You can also specify your own function or a full python path to an existing one. See the section on scoring functions for more details. -cv CV Any integer > 1 Number of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process. -sub SUBSAMPLE (0.0, 1.0] Subsample ratio of the training instance. Setting it to 0.5 means that TPOT randomly collects half of training samples for pipeline optimization process. -njobs NUM_JOBS Any positive integer or -1 Number of CPUs for evaluating pipelines in parallel during the TPOT optimization process. Assigning this to -1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. -maxtime MAX_TIME_MINS Any positive integer How many minutes TPOT has to optimize the pipeline. If provided, this setting will override the \"generations\" parameter and allow TPOT to run until it runs out of time. -maxeval MAX_EVAL_MINS Any positive float How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to consider more complex pipelines but will also allow TPOT to run longer. -s RANDOM_STATE Any positive integer Random number generator seed for reproducibility. Set this seed if you want your TPOT run to be reproducible with the same seed and data set in the future. -config CONFIG_FILE String or file path Operators and parameter configurations in TPOT: Path for configuration file: TPOT will use the path to a configuration file for customizing the operators and parameters that TPOT uses in the optimization process string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. -template TEMPLATE String Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. -memory MEMORY String or file path If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. Memory caching mode in TPOT: Path for a caching directory: TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown. string 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown. -cf CHECKPOINT_FOLDER Folder path If supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing. This is useful in multiple cases: sudden death before tpot could save an optimized pipeline progress tracking grabbing a pipeline while tpot is working Example: mkdir my_checkpoints -cf ./my_checkpoints -es EARLY_STOP Any positive integer How many generations TPOT checks whether there is no improvement in optimization process. End optimization process if there is no improvement in the set number of generations. -v VERBOSITY {0, 1, 2, 3} How much information TPOT communicates while it is running. 0 = none, 1 = minimal, 2 = high, 3 = all. A setting of 2 or higher will add a progress bar during the optimization procedure. --no-update-check Flag indicating whether the TPOT version checker should be disabled. --version Show TPOT's version number and exit. --help Show TPOT's help documentation and exit.","title":"TPOT on the command line"},{"location":"using/#scoring-functions","text":"TPOT makes use of sklearn.model_selection.cross_val_score for evaluating pipelines, and as such offers the same support for scoring functions. There are two ways to make use of scoring functions with TPOT: You can pass in a string to the scoring parameter from the list above. Any other strings will cause TPOT to throw an exception. You can pass the callable object/function with signature scorer(estimator, X, y) , where estimator is trained estimator to use for scoring, X are features that will be passed to estimator.predict and y are target values for X . To do this, you should implement your own function. See the example below for further explanation. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split from sklearn.metrics.scorer import make_scorer digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) # Make a custom metric function def my_custom_accuracy(y_true, y_pred): return float(sum(y_pred == y_true)) / len(y_true) # Make a custom a scorer from the custom metric function # Note: greater_is_better=False in make_scorer below would mean that the scoring function should be minimized. my_custom_scorer = make_scorer(my_custom_accuracy, greater_is_better=True) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, scoring=my_custom_scorer) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') You can pass a metric function with the signature score_func(y_true, y_pred) (e.g. my_custom_accuracy in the example above), where y_true are the true target values and y_pred are the predicted target values from an estimator. To do this, you should implement your own function. See the example above for further explanation. TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized ( greater_is_better=False in make_scorer ), whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. my_module.scorer_name : You can also use a custom score_func(y_true, y_pred) or scorer(estimator, X, y) function through the command line by adding the argument -scoring my_module.scorer to your command-line call. TPOT will import your module and use the custom scoring function from there. TPOT will include your current working directory when importing the module, so you can place it in the same directory where you are going to run TPOT. Example: -scoring sklearn.metrics.auc will use the function auc from sklearn.metrics module.","title":"Scoring functions"},{"location":"using/#built-in-tpot-configurations","text":"TPOT comes with a handful of default operators and parameter configurations that we believe work well for optimizing machine learning pipelines. Below is a list of the current built-in configurations that come with TPOT. Configuration Name Description Operators Default TPOT TPOT will search over a broad range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Some of these operators are complex and may take a long time to run, especially on larger datasets. Note: This is the default configuration for TPOT. To use this configuration, use the default value (None) for the config_dict parameter. Classification Regression TPOT light TPOT will search over a restricted range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Only simpler and fast-running operators will be used in these pipelines, so TPOT light is useful for finding quick and simple pipelines for a classification or regression problem. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression TPOT MDR TPOT will search over a series of feature selectors and Multifactor Dimensionality Reduction models to find a series of operators that maximize prediction accuracy. The TPOT MDR configuration is specialized for genome-wide association studies (GWAS) , and is described in detail online here . Note that TPOT MDR may be slow to run because the feature selection routines are computationally expensive, especially on large datasets. Classification Regression TPOT sparse TPOT uses a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression To use any of these configurations, simply pass the string name of the configuration to the config_dict parameter (or -config on the command line). For example, to use the \"TPOT light\" configuration: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict='TPOT light') tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py')","title":"Built-in TPOT configurations"},{"location":"using/#customizing-tpots-operators-and-parameters","text":"Beyond the default configurations that come with TPOT, in some cases it is useful to limit the algorithms and parameters that TPOT considers. For that reason, we allow users to provide TPOT with a custom configuration for its operators and parameters. The custom TPOT configuration must be in nested dictionary format, where the first level key is the path and name of the operator (e.g., sklearn.naive_bayes.MultinomialNB ) and the second level key is the corresponding parameter name for that operator (e.g., fit_prior ). The second level key should point to a list of parameter values for that parameter, e.g., 'fit_prior': [True, False] . For a simple example, the configuration could be: tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } in which case TPOT would only consider pipelines containing GaussianNB , BernoulliNB , MultinomialNB , and tune those algorithm's parameters in the ranges provided. This dictionary can be passed directly within the code to the TPOTClassifier / TPOTRegressor config_dict parameter, described above. For example: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict=tpot_config) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Command-line users must create a separate .py file with the custom configuration and provide the path to the file to the tpot call. For example, if the simple example configuration above is saved in tpot_classifier_config.py , that configuration could be used on the command line with the command: tpot data/mnist.csv -is , -target class -config tpot_classifier_config.py -g 5 -p 20 -v 2 -o tpot_exported_pipeline.py When using the command-line interface, the configuration file specified in the -config parameter must name its custom TPOT configuration tpot_config . Otherwise, TPOT will not be able to locate the configuration dictionary. For more detailed examples of how to customize TPOT's operator configuration, see the default configurations for classification and regression in TPOT's source code. Note that you must have all of the corresponding packages for the operators installed on your computer, otherwise TPOT will not be able to use them. For example, if XGBoost is not installed on your computer, then TPOT will simply not import nor use XGBoost in the pipelines it considers.","title":"Customizing TPOT's operators and parameters"},{"location":"using/#template-option-in-tpot","text":"Template option provides a way to specify a desired structure for machine learning pipeline, which may reduce TPOT computation time and potentially provide more interpretable results. Current implementation only supports linear pipelines. Below is a simple example to use template option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of SelectorMixin ), 2nd step is a feature transformer (a subclass of TransformerMixin ) and 3rd step is a classifier for classification (a subclass of ClassifierMixin ). The last step must be Classifier for TPOTClassifier 's template but Regressor for TPOTRegressor . Note: although SelectorMixin is subclass of TransformerMixin in scikit-leawrn, but Transformer in this option excludes those subclasses of SelectorMixin . tpot_obj = TPOTClassifier( template='Selector-Transformer-Classifier' ) If a specific operator, e.g. SelectPercentile , is prefered to used in the 1st step of pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'.","title":"Template option in TPOT"},{"location":"using/#featuresetselector-in-tpot","text":"FeatureSetSelector is a special new operator in TPOT. This operator enables feature selection based on priori export knowledge. For example, in RNA-seq gene expression analysis, this operator can be used to select one or more gene (feature) set(s) based on GO (Gene Ontology) terms or annotated gene sets Molecular Signatures Database ( MSigDB ) in the 1st step of pipeline via template option above, in order to reduce dimensions and TPOT computation time. This operator requires a dataset list in csv format. In this csv file, there are only three columns: 1st column is feature set names, 2nd column is the total number of features in one set and 3rd column is a list of feature names (if input X is pandas.DataFrame) or indexes (if input X is numpy.ndarray) delimited by \";\". Below is a example how to use this operator in TPOT. Please check our preprint paper for more details. from tpot import TPOTClassifier import numpy as np import pandas as pd from tpot.config import classifier_config_dict test_data = pd.read_csv(\"https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/tests.csv\") test_X = test_data.drop(\"class\", axis=1) test_y = test_data['class'] # add FeatureSetSelector into tpot configuration classifier_config_dict['tpot.builtins.FeatureSetSelector'] = { 'subset_list': ['https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/subset_test.csv'], 'sel_subset': [0,1] # select only one feature set, a list of index of subset in the list above #'sel_subset': list(combinations(range(3), 2)) # select two feature sets } tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, template='FeatureSetSelector-Transformer-Classifier', config_dict=classifier_config_dict) tpot.fit(test_X, test_y)","title":"FeatureSetSelector in TPOT"},{"location":"using/#pipeline-caching-in-tpot","text":"With the memory parameter, pipelines can cache the results of each transformer after fitting them. This feature is used to avoid repeated computation by transformers within a pipeline if the parameters and input data are identical to another fitted pipeline during optimization process. TPOT allows users to specify a custom directory path or joblib.Memory in case they want to re-use the memory cache in future TPOT runs (or a warm_start run). There are three methods for enabling memory caching in TPOT: from tpot import TPOTClassifier from tempfile import mkdtemp from joblib import Memory from shutil import rmtree # Method 1, auto mode: TPOT uses memory caching with a temporary directory and cleans it up upon shutdown tpot = TPOTClassifier(memory='auto') # Method 2, with a custom directory for memory caching tpot = TPOTClassifier(memory='/to/your/path') # Method 3, with a Memory object cachedir = mkdtemp() # Create a temporary folder memory = Memory(cachedir=cachedir, verbose=0) tpot = TPOTClassifier(memory=memory) # Clear the cache directory when you don't need it anymore rmtree(cachedir) Note: TPOT does NOT clean up memory caches if users set a custom directory path or Memory object. We recommend that you clean up the memory caches when you don't need it anymore.","title":"Pipeline caching in TPOT"},{"location":"using/#crashfreeze-issue-with-n_jobs-1-under-osx-or-linux","text":"Internally, TPOT uses joblib to fit estimators in parallel. This is the same parallelization framework used by scikit-learn. But it may crash/freeze with n_jobs > 1 under OSX or Linux as scikit-learn does , especially with large datasets. One solution is to configure Python's multiprocessing module to use the forkserver start method (instead of the default fork ) to manage the process pools. You can enable the forkserver mode globally for your program by putting the following codes into your main script: import multiprocessing # other imports, custom code, load data, define model... if __name__ == '__main__': multiprocessing.set_start_method('forkserver') # call scikit-learn utils or tpot utils with n_jobs > 1 here More information about these start methods can be found in the multiprocessing documentation .","title":"Crash/freeze issue with n_jobs &gt; 1 under OSX or Linux"},{"location":"using/#parallel-training-with-dask","text":"For large problems or working on Jupyter notebook, we highly recommend that you can distribute the work on a Dask cluster. The dask-examples binder has a runnable example with a small dask cluster. To use your Dask cluster to fit a TPOT model, specify the use_dask keyword when you create the TPOT estimator. Note: if use_dask=True , TPOT will use as many cores as available on the your Dask cluster. If n_jobs is specified, then it will control the chunk size (10* n_jobs if it is less then offspring size) of parallel training. estimator = TPOTEstimator(use_dask=True, n_jobs=-1) This will use use all the workers on your cluster to do the training, and use Dask-ML's pipeline rewriting to avoid re-fitting estimators multiple times on the same set of data. It will also provide fine-grained diagnostics in the distributed scheduler UI . Alternatively, Dask implements a joblib backend. You can instruct TPOT to use the distributed backend during training by specifying a joblib.parallel_backend : import joblib import distributed.joblib from dask.distributed import Client # connect to the cluster client = Client('schedueler-address') # create the estimator normally estimator = TPOTClassifier(n_jobs=-1) # perform the fit in this context manager with joblib.parallel_backend(\"dask\"): estimator.fit(X, y) See dask's distributed joblib integration for more.","title":"Parallel Training with Dask"}]}
\ No newline at end of file
diff --git a/docs/using/index.html b/docs/using/index.html
index 1a3e0f7a..94af489a 100644
--- a/docs/using/index.html
+++ b/docs/using/index.html
@@ -519,7 +519,7 @@ <h1 id="scoring-functions">Scoring functions</h1>
                       scoring=my_custom_scorer)
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
-tpot.export('tpot_mnist_pipeline.py')
+tpot.export('tpot_digits_pipeline.py')
 </code></pre>
 
 <ul>
@@ -595,7 +595,7 @@ <h1 id="built-in-tpot-configurations">Built-in TPOT configurations</h1>
                       config_dict='TPOT light')
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
-tpot.export('tpot_mnist_pipeline.py')
+tpot.export('tpot_digits_pipeline.py')
 
 </code></pre>
 
@@ -647,7 +647,7 @@ <h1 id="customizing-tpots-operators-and-parameters">Customizing TPOT's operators
                       config_dict=tpot_config)
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
-tpot.export('tpot_mnist_pipeline.py')
+tpot.export('tpot_digits_pipeline.py')
 </code></pre>
 
 <p>Command-line users must create a separate <code>.py</code> file with the custom configuration and provide the path to the file to the <code>tpot</code> call. For example, if the simple example configuration above is saved in <code>tpot_classifier_config.py</code>, that configuration could be used on the command line with the command:</p>
diff --git a/docs_sources/api.md b/docs_sources/api.md
index a1106417..473509ac 100644
--- a/docs_sources/api.md
+++ b/docs_sources/api.md
@@ -268,7 +268,7 @@ X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,
 tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2)
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
-tpot.export('tpot_mnist_pipeline.py')
+tpot.export('tpot_digits_pipeline.py')
 ```
 
 <strong>Functions</strong>
diff --git a/docs_sources/examples.md b/docs_sources/examples.md
index 9096fdfa..e83a7f00 100644
--- a/docs_sources/examples.md
+++ b/docs_sources/examples.md
@@ -6,7 +6,7 @@ belonging to a typical class of machine learning tasks.
 | Dataset | Task                    | Task class             | Dataset description | Jupyter notebook                                                                           |
 | ------- | ----------------------- | ---------------------- |:-------------------:|:------------------------------------------------------------------------------------------:|
 | Iris                  | flower classification   | classification         | [link](https://archive.ics.uci.edu/ml/datasets/iris) | [link](https://github.com/EpistasisLab/tpot/blob/master/tutorials/IRIS.ipynb) |
-| MNIST                 | digit recognition       | (image) classification | [link](https://yann.lecun.com/exdb/mnist/) | [link](https://github.com/EpistasisLab/tpot/blob/master/tutorials/MNIST.ipynb) |
+| Optical Recognition of Handwritten Digits                 | digit recognition       | (image) classification | [link](https://scikit-learn.org/stable/datasets/index.html#digits-dataset) | [link](https://github.com/EpistasisLab/tpot/blob/master/tutorials/Digits.ipynb) |
 | Boston                | housing prices modeling | regression             | [link](https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html) | N/A    |
 | Titanic               | survival analysis       | classification         | [link](https://www.kaggle.com/c/titanic/data) | [link](https://github.com/EpistasisLab/tpot/blob/master/tutorials/Titanic_Kaggle.ipynb) |
 | Bank Marketing        | subscription prediction | classification         | [link](https://archive.ics.uci.edu/ml/datasets/Bank+Marketing) | [link](https://github.com/EpistasisLab/tpot/blob/master/tutorials/Portuguese%20Bank%20Marketing/Portuguese%20Bank%20Marketing%20Stratergy.ipynb) |
@@ -28,9 +28,9 @@ import numpy as np
 
 iris = load_iris()
 X_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64),
-    iris.target.astype(np.float64), train_size=0.75, test_size=0.25)
+    iris.target.astype(np.float64), train_size=0.75, test_size=0.25, random_state=42)
 
-tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2)
+tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=42)
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
 tpot.export('tpot_iris_pipeline.py')
@@ -40,31 +40,34 @@ Running this code should discover a pipeline (exported as `tpot_iris_pipeline.py
 
 ```Python
 import numpy as np
-
+import pandas as pd
 from sklearn.model_selection import train_test_split
-from sklearn.naive_bayes import GaussianNB
+from sklearn.neighbors import KNeighborsClassifier
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import Normalizer
+from tpot.export_utils import set_param_recursive
 
-# NOTE: Make sure that the class is labeled 'class' in the data file
-tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)
-features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1),
-                     tpot_data.dtype.names.index('class'), axis=1)
+# NOTE: Make sure that the class is labeled 'target' in the data file
+tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
-    train_test_split(features, tpot_data['class'], random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=42)
 
+# Average CV score on the training set was: 0.9826086956521738
 exported_pipeline = make_pipeline(
-    Normalizer(),
-    GaussianNB()
+    Normalizer(norm="l2"),
+    KNeighborsClassifier(n_neighbors=5, p=2, weights="distance")
 )
+# Fix random state for all the steps in exported pipeline
+set_param_recursive(exported_pipeline.steps, 'random_state', 42)
 
 exported_pipeline.fit(training_features, training_target)
 results = exported_pipeline.predict(testing_features)
 ```
 
-## MNIST digit recognition
+## Digits dataset
 
-Below is a minimal working example with the practice MNIST dataset, which is an _image classification problem_.
+Below is a minimal working example with the optical recognition of handwritten digits dataset, which is an _image classification problem_.
 
 ```Python
 from tpot import TPOTClassifier
@@ -73,30 +76,41 @@ from sklearn.model_selection import train_test_split
 
 digits = load_digits()
 X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,
-                                                    train_size=0.75, test_size=0.25)
+                                                    train_size=0.75, test_size=0.25, random_state=42)
 
-tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2)
+tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=42)
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
-tpot.export('tpot_mnist_pipeline.py')
+tpot.export('tpot_digits_pipeline.py')
 ```
 
-Running this code should discover a pipeline (exported as `tpot_mnist_pipeline.py`) that achieves about 98% test accuracy:
+Running this code should discover a pipeline (exported as `tpot_digits_pipeline.py`) that achieves about 98% test accuracy:
 
 ```Python
 import numpy as np
-
+import pandas as pd
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
-from sklearn.neighbors import KNeighborsClassifier
-
-# NOTE: Make sure that the class is labeled 'class' in the data file
-tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)
-features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1),
-                     tpot_data.dtype.names.index('class'), axis=1)
+from sklearn.pipeline import make_pipeline, make_union
+from sklearn.preprocessing import PolynomialFeatures
+from tpot.builtins import StackingEstimator
+from tpot.export_utils import set_param_recursive
+
+# NOTE: Make sure that the class is labeled 'target' in the data file
+tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
-    train_test_split(features, tpot_data['class'], random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=42)
 
-exported_pipeline = KNeighborsClassifier(n_neighbors=6, weights="distance")
+# Average CV score on the training set was: 0.9799428471757372
+exported_pipeline = make_pipeline(
+    PolynomialFeatures(degree=2, include_bias=False, interaction_only=False),
+    StackingEstimator(estimator=LogisticRegression(C=0.1, dual=False, penalty="l1")),
+    RandomForestClassifier(bootstrap=True, criterion="entropy", max_features=0.35000000000000003, min_samples_leaf=20, min_samples_split=19, n_estimators=100)
+)
+# Fix random state for all the steps in exported pipeline
+set_param_recursive(exported_pipeline.steps, 'random_state', 42)
 
 exported_pipeline.fit(training_features, training_target)
 results = exported_pipeline.predict(testing_features)
@@ -113,9 +127,9 @@ from sklearn.model_selection import train_test_split
 
 housing = load_boston()
 X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target,
-                                                    train_size=0.75, test_size=0.25)
+                                                    train_size=0.75, test_size=0.25, random_state=42)
 
-tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2)
+tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2, random_state=42)
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
 tpot.export('tpot_boston_pipeline.py')
@@ -125,20 +139,26 @@ Running this code should discover a pipeline (exported as `tpot_boston_pipeline.
 
 ```Python
 import numpy as np
-
-from sklearn.ensemble import GradientBoostingRegressor
+import pandas as pd
+from sklearn.ensemble import ExtraTreesRegressor
 from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import PolynomialFeatures
+from tpot.export_utils import set_param_recursive
 
-# NOTE: Make sure that the class is labeled 'class' in the data file
-tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)
-features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1),
-                     tpot_data.dtype.names.index('class'), axis=1)
+# NOTE: Make sure that the class is labeled 'target' in the data file
+tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
-    train_test_split(features, tpot_data['class'], random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=42)
 
-exported_pipeline = GradientBoostingRegressor(alpha=0.85, learning_rate=0.1, loss="ls",
-                                              max_features=0.9, min_samples_leaf=5,
-                                              min_samples_split=6)
+# Average CV score on the training set was: -10.812040755234403
+exported_pipeline = make_pipeline(
+    PolynomialFeatures(degree=2, include_bias=False, interaction_only=False),
+    ExtraTreesRegressor(bootstrap=False, max_features=0.5, min_samples_leaf=2, min_samples_split=3, n_estimators=100)
+)
+# Fix random state for all the steps in exported pipeline
+set_param_recursive(exported_pipeline.steps, 'random_state', 42)
 
 exported_pipeline.fit(training_features, training_target)
 results = exported_pipeline.predict(testing_features)
diff --git a/docs_sources/using.md b/docs_sources/using.md
index a8a55785..82ed0efc 100644
--- a/docs_sources/using.md
+++ b/docs_sources/using.md
@@ -384,7 +384,7 @@ tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2,
                       scoring=my_custom_scorer)
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
-tpot.export('tpot_mnist_pipeline.py')
+tpot.export('tpot_digits_pipeline.py')
 ```
 
 - You can pass a metric function with the signature `score_func(y_true, y_pred)` (e.g. `my_custom_accuracy` in the example above), where `y_true` are the true target values and `y_pred` are the predicted target values from an estimator. To do this, you should implement your own function. See the example above for further explanation. TPOT assumes that any function with "error" or "loss" in the function name is meant to be minimized (`greater_is_better=False` in [`make_scorer`](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html)), whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11.
@@ -461,7 +461,7 @@ tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2,
                       config_dict='TPOT light')
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
-tpot.export('tpot_mnist_pipeline.py')
+tpot.export('tpot_digits_pipeline.py')
 
 ```
 
@@ -520,7 +520,7 @@ tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2,
                       config_dict=tpot_config)
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
-tpot.export('tpot_mnist_pipeline.py')
+tpot.export('tpot_digits_pipeline.py')
 ```
 
 Command-line users must create a separate `.py` file with the custom configuration and provide the path to the file to the `tpot` call. For example, if the simple example configuration above is saved in `tpot_classifier_config.py`, that configuration could be used on the command line with the command:
diff --git a/tests/export_tests.py b/tests/export_tests.py
index 5b715e30..59f9f27d 100644
--- a/tests/export_tests.py
+++ b/tests/export_tests.py
@@ -51,9 +51,9 @@
     classifier_config_dict[test_operator_key_2]
 )
 
-mnist_data = load_digits()
+digits_data = load_digits()
 training_features, testing_features, training_target, testing_target = \
-    train_test_split(mnist_data.data.astype(np.float64), mnist_data.target.astype(np.float64), random_state=42)
+    train_test_split(digits_data.data.astype(np.float64), digits_data.target.astype(np.float64), random_state=42)
 
 tpot_obj = TPOTClassifier()
 tpot_obj._fit_init()
@@ -75,9 +75,9 @@ def test_export_random_ind():
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = tpot_data.drop('target', axis=1).values
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
-            train_test_split(features, tpot_data['target'].values, random_state=39)
+            train_test_split(features, tpot_data['target'], random_state=39)
 
 exported_pipeline = BernoulliNB(alpha=1.0, fit_prior=False)
 # Fix random state for all the steps in exported pipeline
@@ -130,9 +130,9 @@ def test_export_2():
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = tpot_data.drop('target', axis=1).values
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
-            train_test_split(features, tpot_data['target'].values, random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 exported_pipeline = KNeighborsClassifier(n_neighbors=10, p=1, weights="uniform")
 
@@ -323,9 +323,9 @@ def test_export_pipeline():
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = tpot_data.drop('target', axis=1).values
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
-            train_test_split(features, tpot_data['target'].values, random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 exported_pipeline = make_pipeline(
     make_union(
@@ -360,9 +360,9 @@ def test_export_pipeline_2():
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = tpot_data.drop('target', axis=1).values
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
-            train_test_split(features, tpot_data['target'].values, random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 exported_pipeline = KNeighborsClassifier(n_neighbors=10, p=1, weights="uniform")
 
@@ -391,9 +391,9 @@ def test_export_pipeline_3():
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = tpot_data.drop('target', axis=1).values
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
-            train_test_split(features, tpot_data['target'].values, random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 exported_pipeline = make_pipeline(
     SelectPercentile(score_func=f_classif, percentile=20),
@@ -431,9 +431,9 @@ def test_export_pipeline_4():
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = tpot_data.drop('target', axis=1).values
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
-            train_test_split(features, tpot_data['target'].values, random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 exported_pipeline = make_pipeline(
     make_union(
@@ -468,9 +468,9 @@ def test_export_pipeline_5():
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = tpot_data.drop('target', axis=1).values
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
-            train_test_split(features, tpot_data['target'].values, random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 exported_pipeline = make_pipeline(
     SelectFromModel(estimator=ExtraTreesRegressor(max_features=0.05, n_estimators=100), threshold=0.05),
@@ -503,9 +503,9 @@ def test_export_pipeline_6():
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('test_path', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = tpot_data.drop('target', axis=1).values
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
-            train_test_split(features, tpot_data['target'].values, random_state=42)
+            train_test_split(features, tpot_data['target'], random_state=42)
 
 exported_pipeline = KNeighborsClassifier(n_neighbors=10, p=1, weights="uniform")
 # Fix random state for all the steps in exported pipeline
@@ -598,9 +598,9 @@ def test_pipeline_score_save():
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = tpot_data.drop('target', axis=1).values
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
-            train_test_split(features, tpot_data['target'].values, random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 # Average CV score on the training set was: 0.929813743
 exported_pipeline = make_pipeline(
@@ -652,9 +652,9 @@ def test_imputer_in_export():
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = tpot_data.drop('target', axis=1).values
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
-            train_test_split(features, tpot_data['target'].values, random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 imputer = Imputer(strategy="median")
 imputer.fit(training_features)
diff --git a/tests/tpot_tests.py b/tests/tpot_tests.py
index 490628e4..0088833c 100644
--- a/tests/tpot_tests.py
+++ b/tests/tpot_tests.py
@@ -83,10 +83,10 @@ def closing(arg):
 else:
     from contextlib import closing
 
-# Set up the MNIST data set for testing
-mnist_data = load_digits()
+# Set up the digits data set for testing
+digits_data = load_digits()
 training_features, testing_features, training_target, testing_target = \
-    train_test_split(mnist_data.data.astype(np.float64), mnist_data.target.astype(np.float64), random_state=42)
+    train_test_split(digits_data.data.astype(np.float64), digits_data.target.astype(np.float64), random_state=42)
 
 # Set up test data with missing value
 features_with_nan = np.copy(training_features)
@@ -713,7 +713,7 @@ def test_template_4():
 
 def test_fit_GroupKFold():
     """Assert that TPOT properly handles the group parameter when using GroupKFold."""
-    # This check tests if the darker MNIST images would generalize to the lighter ones.
+    # This check tests if the darker digits images would generalize to the lighter ones.
     means = np.mean(training_features, axis=1)
     groups = means >= np.median(means)
 
diff --git a/tpot/base.py b/tpot/base.py
index 61ec6c39..7bfc56e5 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -1122,7 +1122,8 @@ def export(self, output_file_name='', data_file_path=''):
         if output_file_name is not '':
             with open(output_file_name, 'w') as output_file:
                 output_file.write(to_write)
-        return to_write
+        else:
+            return to_write
 
 
     def _impute_values(self, features):
diff --git a/tpot/export_utils.py b/tpot/export_utils.py
index b2e373f1..4cf8db47 100644
--- a/tpot/export_utils.py
+++ b/tpot/export_utils.py
@@ -98,9 +98,9 @@ def export_pipeline(exported_pipeline,
     pipeline_text += """
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('{}', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = tpot_data.drop('target', axis=1).values
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
-            train_test_split(features, tpot_data['target'].values, random_state={})
+            train_test_split(features, tpot_data['target'], random_state={})
 """.format(data_file_path, random_state)
 
     # Add the imputation step if it was used by TPOT
diff --git a/tutorials/MNIST.ipynb b/tutorials/Digits.ipynb
similarity index 98%
rename from tutorials/MNIST.ipynb
rename to tutorials/Digits.ipynb
index 8c4387c2..ff4992a5 100644
--- a/tutorials/MNIST.ipynb
+++ b/tutorials/Digits.ipynb
@@ -198,7 +198,7 @@
    },
    "outputs": [],
    "source": [
-    "tpot.export('tpot_mnist_pipeline.py')"
+    "tpot.export('tpot_digits_pipeline.py')"
    ]
   },
   {
@@ -211,7 +211,7 @@
    },
    "outputs": [],
    "source": [
-    "# %load tpot_mnist_pipeline.py\n",
+    "# %load tpot_digits_pipeline.py\n",
     "import numpy as np\n",
     "\n",
     "from sklearn.model_selection import train_test_split\n",
diff --git a/tutorials/MAGIC Gamma Telescope/MAGIC Gamma Telescope.ipynb b/tutorials/MAGIC Gamma Telescope/MAGIC Gamma Telescope.ipynb
index 3dee4074..4eb400af 100644
--- a/tutorials/MAGIC Gamma Telescope/MAGIC Gamma Telescope.ipynb	
+++ b/tutorials/MAGIC Gamma Telescope/MAGIC Gamma Telescope.ipynb	
@@ -934,9 +934,9 @@
     "\n",
     "# NOTE: Make sure that the class is labeled 'target' in the data file\n",
     "tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)\n",
-    "features = tpot_data.drop('target', axis=1).values\n",
+    "features = tpot_data.drop('target', axis=1)\n",
     "training_features, testing_features, training_target, testing_target = \\\n",
-    "            train_test_split(features, tpot_data['target'].values, random_state=None)\n",
+    "            train_test_split(features, tpot_data['target'], random_state=None)\n",
     "\n",
     "# Average CV score on the training set was:0.853347788745\n",
     "exported_pipeline = make_pipeline(\n",
diff --git a/tutorials/MAGIC Gamma Telescope/tpot_MAGIC_Gamma_Telescope_pipeline.py b/tutorials/MAGIC Gamma Telescope/tpot_MAGIC_Gamma_Telescope_pipeline.py
index 9fc55dae..388f04e3 100644
--- a/tutorials/MAGIC Gamma Telescope/tpot_MAGIC_Gamma_Telescope_pipeline.py	
+++ b/tutorials/MAGIC Gamma Telescope/tpot_MAGIC_Gamma_Telescope_pipeline.py	
@@ -8,9 +8,9 @@
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = tpot_data.drop('target', axis=1).values
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
-            train_test_split(features, tpot_data['target'].values, random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 # Average CV score on the training set was:0.853347788745
 exported_pipeline = make_pipeline(
diff --git a/tutorials/Portuguese Bank Marketing/Portuguese Bank Marketing Strategy.ipynb b/tutorials/Portuguese Bank Marketing/Portuguese Bank Marketing Strategy.ipynb
index 005b2f99..cd4c9713 100644
--- a/tutorials/Portuguese Bank Marketing/Portuguese Bank Marketing Strategy.ipynb	
+++ b/tutorials/Portuguese Bank Marketing/Portuguese Bank Marketing Strategy.ipynb	
@@ -925,9 +925,9 @@
     "\n",
     "# NOTE: Make sure that the class is labeled 'target' in the data file\n",
     "tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)\n",
-    "features = tpot_data.drop('target', axis=1).values\n",
+    "features = tpot_data.drop('target', axis=1)\n",
     "training_features, testing_features, training_target, testing_target = \\\n",
-    "            train_test_split(features, tpot_data['target'].values, random_state=None)\n",
+    "            train_test_split(features, tpot_data['target'], random_state=None)\n",
     "\n",
     "# Average CV score on the training set was:0.913728927925\n",
     "exported_pipeline = DecisionTreeClassifier(criterion=\"gini\", max_depth=5, min_samples_leaf=16, min_samples_split=8)\n",
diff --git a/tutorials/Portuguese Bank Marketing/tpot_marketing_pipeline.py b/tutorials/Portuguese Bank Marketing/tpot_marketing_pipeline.py
index da8b3a78..5e737569 100644
--- a/tutorials/Portuguese Bank Marketing/tpot_marketing_pipeline.py	
+++ b/tutorials/Portuguese Bank Marketing/tpot_marketing_pipeline.py	
@@ -5,9 +5,9 @@
 
 # NOTE: Make sure that the class is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = tpot_data.drop('target', axis=1).values
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
-            train_test_split(features, tpot_data['target'].values, random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 # Average CV score on the training set was:0.913728927925
 exported_pipeline = DecisionTreeClassifier(criterion="gini", max_depth=5, min_samples_leaf=16, min_samples_split=8)

From 474d15983f255906073e563932332c34eeb6e60d Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 09:51:30 -0500
Subject: [PATCH 27/44] update docs #947

---
 README.md                                     |  4 ++--
 docs/examples/index.html                      | 18 ++++++++---------
 docs/search/search_index.json                 |  2 +-
 docs_sources/examples.md                      |  6 +++---
 tests/export_tests.py                         | 20 +++++++++----------
 tpot/export_utils.py                          |  2 +-
 tutorials/Digits.ipynb                        | 10 +++++-----
 tutorials/IRIS.ipynb                          | 10 +++++-----
 .../MAGIC Gamma Telescope.ipynb               |  2 +-
 .../tpot_MAGIC_Gamma_Telescope_pipeline.py    |  2 +-
 .../Portuguese Bank Marketing Strategy.ipynb  |  2 +-
 .../tpot_marketing_pipeline.py                |  2 +-
 tutorials/Titanic_Kaggle.ipynb                | 10 +++++-----
 tutorials/tpot_iris_pipeline.py               | 10 +++++-----
 tutorials/tpot_mnist_pipeline.py              | 10 +++++-----
 tutorials/tpot_titanic_pipeline.py            | 10 +++++-----
 16 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/README.md b/README.md
index 40251369..949af676 100644
--- a/README.md
+++ b/README.md
@@ -85,7 +85,7 @@ from sklearn.preprocessing import PolynomialFeatures
 from tpot.builtins import StackingEstimator
 from tpot.export_utils import set_param_recursive
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
@@ -134,7 +134,7 @@ from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import PolynomialFeatures
 from tpot.export_utils import set_param_recursive
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
diff --git a/docs/examples/index.html b/docs/examples/index.html
index 5ceffe11..063c117a 100644
--- a/docs/examples/index.html
+++ b/docs/examples/index.html
@@ -243,12 +243,12 @@ <h2 id="iris-flower-classification">Iris flower classification</h2>
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import Normalizer
 
-# NOTE: Make sure that the class is labeled 'class' in the data file
-tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
+tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1),
                      tpot_data.dtype.names.index('class'), axis=1)
 training_features, testing_features, training_target, testing_target = \
-    train_test_split(features, tpot_data['class'], random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 exported_pipeline = make_pipeline(
     Normalizer(),
@@ -281,12 +281,12 @@ <h2 id="mnist-digit-recognition">MNIST digit recognition</h2>
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
 
-# NOTE: Make sure that the class is labeled 'class' in the data file
-tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
+tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1),
                      tpot_data.dtype.names.index('class'), axis=1)
 training_features, testing_features, training_target, testing_target = \
-    train_test_split(features, tpot_data['class'], random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 exported_pipeline = KNeighborsClassifier(n_neighbors=6, weights=&quot;distance&quot;)
 
@@ -316,12 +316,12 @@ <h2 id="boston-housing-prices-modeling">Boston housing prices modeling</h2>
 from sklearn.ensemble import GradientBoostingRegressor
 from sklearn.model_selection import train_test_split
 
-# NOTE: Make sure that the class is labeled 'class' in the data file
-tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
+tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1),
                      tpot_data.dtype.names.index('class'), axis=1)
 training_features, testing_features, training_target, testing_target = \
-    train_test_split(features, tpot_data['class'], random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 exported_pipeline = GradientBoostingRegressor(alpha=0.85, learning_rate=0.1, loss=&quot;ls&quot;,
                                               max_features=0.9, min_samples_leaf=5,
diff --git a/docs/search/search_index.json b/docs/search/search_index.json
index b0e5bcf8..ee592d04 100644
--- a/docs/search/search_index.json
+++ b/docs/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Consider TPOT your Data Science Assistant . TPOT is a Python Automated Machine Learning tool that optimizes machine learning pipelines using genetic programming. TPOT will automate the most tedious part of machine learning by intelligently exploring thousands of possible pipelines to find the best one for your data. An example machine learning pipeline Once TPOT is finished searching (or you get tired of waiting), it provides you with the Python code for the best pipeline it found so you can tinker with the pipeline from there. An example TPOT pipeline TPOT is built on top of scikit-learn, so all of the code it generates should look familiar... if you're familiar with scikit-learn, anyway. TPOT is still under active development and we encourage you to check back on this repository regularly for updates.","title":"Home"},{"location":"api/","text":"Classification class tpot. TPOTClassifier ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='accuracy', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised classification tasks. The TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTClassifier will also search over the hyperparameters of all objects in the pipeline. By default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters. However, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='accuracy') Function used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used: 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc' If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a StratifiedKFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets. max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTClassifier configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: pareto_front_fitted_pipelines_ is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Functions fit (features, classes[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the classes for a feature set. predict_proba (features) Use the optimized pipeline to estimate the class probabilities for a feature set. score (testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, classes, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. classes : array-like {n_samples} List of class labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the classes for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted classes for the samples in the feature matrix predict_proba(features) Use the optimized pipeline to estimate the class probabilities for a feature set. Note: This function will only work for pipelines whose final classifier supports the predict_proba function. TPOT will raise an error otherwise. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples, n_classes} The class probabilities of the input samples score(testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'accuracy'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_classes : array-like {n_samples} List of class labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything Regression class tpot. TPOTRegressor ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='neg_mean_squared_error', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised regression tasks. The TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTRegressor will also search over the hyperparameters of all objects in the pipeline. By default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters. However, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='neg_mean_squared_error') Function used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used: 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2' Note that we recommend using the neg version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric. If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a KFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTRegressor configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: _pareto_front_fitted_pipelines is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split digits = load_boston() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Functions fit (features, target[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the target values for a feature set. score (testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, target, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. target : array-like {n_samples} List of target labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the target values for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted target values for the samples in the feature matrix score(testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'mean_squared_error'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_target : array-like {n_samples} List of target labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything","title":"TPOT API"},{"location":"api/#classification","text":"class tpot. TPOTClassifier ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='accuracy', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised classification tasks. The TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTClassifier will also search over the hyperparameters of all objects in the pipeline. By default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters. However, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='accuracy') Function used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used: 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc' If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a StratifiedKFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets. max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTClassifier configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: pareto_front_fitted_pipelines_ is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Functions fit (features, classes[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the classes for a feature set. predict_proba (features) Use the optimized pipeline to estimate the class probabilities for a feature set. score (testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, classes, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. classes : array-like {n_samples} List of class labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the classes for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted classes for the samples in the feature matrix predict_proba(features) Use the optimized pipeline to estimate the class probabilities for a feature set. Note: This function will only work for pipelines whose final classifier supports the predict_proba function. TPOT will raise an error otherwise. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples, n_classes} The class probabilities of the input samples score(testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'accuracy'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_classes : array-like {n_samples} List of class labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything","title":"Classification"},{"location":"api/#regression","text":"class tpot. TPOTRegressor ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='neg_mean_squared_error', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised regression tasks. The TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTRegressor will also search over the hyperparameters of all objects in the pipeline. By default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters. However, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='neg_mean_squared_error') Function used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used: 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2' Note that we recommend using the neg version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric. If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a KFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTRegressor configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: _pareto_front_fitted_pipelines is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split digits = load_boston() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Functions fit (features, target[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the target values for a feature set. score (testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, target, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. target : array-like {n_samples} List of target labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the target values for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted target values for the samples in the feature matrix score(testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'mean_squared_error'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_target : array-like {n_samples} List of target labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything","title":"Regression"},{"location":"citing/","text":"If you use TPOT in a scientific publication, please consider citing at least one of the following papers: Randal S. Olson, Ryan J. Urbanowicz, Peter C. Andrews, Nicole A. Lavender, La Creis Kidd, and Jason H. Moore (2016). Automating biomedical data science through tree-based pipeline optimization . Applications of Evolutionary Computation , pages 123-137. BibTeX entry: @inbook{Olson2016EvoBio, author={Olson, Randal S. and Urbanowicz, Ryan J. and Andrews, Peter C. and Lavender, Nicole A. and Kidd, La Creis and Moore, Jason H.}, editor={Squillero, Giovanni and Burelli, Paolo}, chapter={Automating Biomedical Data Science Through Tree-Based Pipeline Optimization}, title={Applications of Evolutionary Computation: 19th European Conference, EvoApplications 2016, Porto, Portugal, March 30 -- April 1, 2016, Proceedings, Part I}, year={2016}, publisher={Springer International Publishing}, pages={123--137}, isbn={978-3-319-31204-0}, doi={10.1007/978-3-319-31204-0_9}, url={http://dx.doi.org/10.1007/978-3-319-31204-0_9} } Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science Randal S. Olson, Nathan Bartley, Ryan J. Urbanowicz, and Jason H. Moore (2016). Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science . Proceedings of GECCO 2016 , pages 485-492. BibTeX entry: @inproceedings{OlsonGECCO2016, author = {Olson, Randal S. and Bartley, Nathan and Urbanowicz, Ryan J. and Moore, Jason H.}, title = {Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science}, booktitle = {Proceedings of the Genetic and Evolutionary Computation Conference 2016}, series = {GECCO '16}, year = {2016}, isbn = {978-1-4503-4206-3}, location = {Denver, Colorado, USA}, pages = {485--492}, numpages = {8}, url = {http://doi.acm.org/10.1145/2908812.2908918}, doi = {10.1145/2908812.2908918}, acmid = {2908918}, publisher = {ACM}, address = {New York, NY, USA}, } Alternatively, you can cite the repository directly with the following DOI:","title":"Citing"},{"location":"contributing/","text":"We welcome you to check the existing issues for bugs or enhancements to work on. If you have an idea for an extension to TPOT, please file a new issue so we can discuss it. Project layout The latest stable release of TPOT is on the master branch , whereas the latest version of TPOT in development is on the development branch . Make sure you are looking at and working on the correct branch if you're looking to contribute code. In terms of directory structure: All of TPOT's code sources are in the tpot directory The documentation sources are in the docs_sources directory Images in the documentation are in the images directory Tutorials for TPOT are in the tutorials directory Unit tests for TPOT are in the tests.py file Make sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the development branch. How to contribute The preferred way to contribute to TPOT is to fork the main repository on GitHub: Fork the project repository : click on the 'Fork' button near the top of the page. This creates a copy of the code under your account on the GitHub server. Clone this copy to your local disk: $ git clone git@github.com:YourUsername/tpot.git $ cd tpot Create a branch to hold your changes: $ git checkout -b my-contribution Make sure your local environment is setup correctly for development. Installation instructions are almost identical to the user instructions except that TPOT should not be installed. If you have TPOT installed on your computer then make sure you are using a virtual environment that does not have TPOT installed. Furthermore, you should make sure you have installed the nose package into your development environment so that you can test changes locally. $ conda install nose Start making changes on your newly created branch, remembering to never work on the master branch! Work on this copy on your computer using Git to do the version control. Once some changes are saved locally, you can use your tweaked version of TPOT by navigating to the project's base directory and running TPOT directly from the command line: $ python -m tpot.driver or by running script that imports and uses the TPOT module with code similar to from tpot import TPOTClassifier To check your changes haven't broken any existing tests and to check new tests you've added pass run the following (note, you must have the nose package installed within your dev environment for this to work): $ nosetests -s -v When you're done editing and local testing, run: $ git add modified_files $ git commit to record your changes in Git, then push them to GitHub with: $ git push -u origin my-contribution Finally, go to the web page of your fork of the TPOT repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. Make sure that you send your PR to the development branch, as the master branch is reserved for the latest stable release. This will start the CI server to check all the project's unit tests run and send an email to the maintainers. (If any of the above seems like magic to you, then look up the Git documentation on the web.) Before submitting your pull request Before you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes. If your contribution changes TPOT in any way: Update the documentation so all of your changes are reflected there. Update the README if anything there has changed. If your contribution involves any code changes: Update the project unit tests to test your code changes. Make sure that your code is properly commented with docstrings and comments explaining your rationale behind non-obvious coding practices. If your code affected any of the pipeline operators, make sure that the corresponding export functionality reflects those changes. If your contribution requires a new library dependency: Double-check that the new dependency is easy to install via pip or Anaconda and supports both Python 2 and 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep TPOT easy to install. Add the required version of the library to .travis.yml Add a line to pip install the library to .travis_install.sh Add a line to print the version of the library to .travis_install.sh Similarly add a line to print the version of the library to .travis_test.sh After submitting your pull request After submitting your pull request, Travis-CI will automatically run unit tests on your changes and make sure that your updated code builds and runs on Python 2 and 3. We also use services that automatically check code quality and test coverage. Check back shortly after submitting your pull request to make sure that your code passes these checks. If any of the checks come back with a red X, then do your best to address the errors.","title":"Contributing"},{"location":"contributing/#project-layout","text":"The latest stable release of TPOT is on the master branch , whereas the latest version of TPOT in development is on the development branch . Make sure you are looking at and working on the correct branch if you're looking to contribute code. In terms of directory structure: All of TPOT's code sources are in the tpot directory The documentation sources are in the docs_sources directory Images in the documentation are in the images directory Tutorials for TPOT are in the tutorials directory Unit tests for TPOT are in the tests.py file Make sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the development branch.","title":"Project layout"},{"location":"contributing/#how-to-contribute","text":"The preferred way to contribute to TPOT is to fork the main repository on GitHub: Fork the project repository : click on the 'Fork' button near the top of the page. This creates a copy of the code under your account on the GitHub server. Clone this copy to your local disk: $ git clone git@github.com:YourUsername/tpot.git $ cd tpot Create a branch to hold your changes: $ git checkout -b my-contribution Make sure your local environment is setup correctly for development. Installation instructions are almost identical to the user instructions except that TPOT should not be installed. If you have TPOT installed on your computer then make sure you are using a virtual environment that does not have TPOT installed. Furthermore, you should make sure you have installed the nose package into your development environment so that you can test changes locally. $ conda install nose Start making changes on your newly created branch, remembering to never work on the master branch! Work on this copy on your computer using Git to do the version control. Once some changes are saved locally, you can use your tweaked version of TPOT by navigating to the project's base directory and running TPOT directly from the command line: $ python -m tpot.driver or by running script that imports and uses the TPOT module with code similar to from tpot import TPOTClassifier To check your changes haven't broken any existing tests and to check new tests you've added pass run the following (note, you must have the nose package installed within your dev environment for this to work): $ nosetests -s -v When you're done editing and local testing, run: $ git add modified_files $ git commit to record your changes in Git, then push them to GitHub with: $ git push -u origin my-contribution Finally, go to the web page of your fork of the TPOT repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. Make sure that you send your PR to the development branch, as the master branch is reserved for the latest stable release. This will start the CI server to check all the project's unit tests run and send an email to the maintainers. (If any of the above seems like magic to you, then look up the Git documentation on the web.)","title":"How to contribute"},{"location":"contributing/#before-submitting-your-pull-request","text":"Before you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes. If your contribution changes TPOT in any way: Update the documentation so all of your changes are reflected there. Update the README if anything there has changed. If your contribution involves any code changes: Update the project unit tests to test your code changes. Make sure that your code is properly commented with docstrings and comments explaining your rationale behind non-obvious coding practices. If your code affected any of the pipeline operators, make sure that the corresponding export functionality reflects those changes. If your contribution requires a new library dependency: Double-check that the new dependency is easy to install via pip or Anaconda and supports both Python 2 and 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep TPOT easy to install. Add the required version of the library to .travis.yml Add a line to pip install the library to .travis_install.sh Add a line to print the version of the library to .travis_install.sh Similarly add a line to print the version of the library to .travis_test.sh","title":"Before submitting your pull request"},{"location":"contributing/#after-submitting-your-pull-request","text":"After submitting your pull request, Travis-CI will automatically run unit tests on your changes and make sure that your updated code builds and runs on Python 2 and 3. We also use services that automatically check code quality and test coverage. Check back shortly after submitting your pull request to make sure that your code passes these checks. If any of the checks come back with a red X, then do your best to address the errors.","title":"After submitting your pull request"},{"location":"examples/","text":"Overview The following sections illustrate the usage of TPOT with various datasets, each belonging to a typical class of machine learning tasks. Dataset Task Task class Dataset description Jupyter notebook Iris flower classification classification link link MNIST digit recognition (image) classification link link Boston housing prices modeling regression link N/A Titanic survival analysis classification link link Bank Marketing subscription prediction classification link link MAGIC Gamma Telescope event detection classification link link Notes: - For details on how the fit() , score() and export() methods work, refer to the usage documentation . - Upon re-running the experiments, your resulting pipelines may differ (to some extent) from the ones demonstrated here. Iris flower classification The following code illustrates how TPOT can be employed for performing a simple classification task over the Iris dataset. from tpot import TPOTClassifier from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import numpy as np iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64), iris.target.astype(np.float64), train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_iris_pipeline.py') Running this code should discover a pipeline (exported as tpot_iris_pipeline.py ) that achieves about 97% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline from sklearn.preprocessing import Normalizer # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = make_pipeline( Normalizer(), GaussianNB() ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) MNIST digit recognition Below is a minimal working example with the practice MNIST dataset, which is an image classification problem . from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Running this code should discover a pipeline (exported as tpot_digits_pipeline.py ) that achieves about 98% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = KNeighborsClassifier(n_neighbors=6, weights=\"distance\") exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) Boston housing prices modeling The following code illustrates how TPOT can be employed for performing a regression task over the Boston housing prices dataset. from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split housing = load_boston() X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Running this code should discover a pipeline (exported as tpot_boston_pipeline.py ) that achieves at least 10 mean squared error (MSE) on the test set: import numpy as np from sklearn.ensemble import GradientBoostingRegressor from sklearn.model_selection import train_test_split # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = GradientBoostingRegressor(alpha=0.85, learning_rate=0.1, loss=\"ls\", max_features=0.9, min_samples_leaf=5, min_samples_split=6) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) Titanic survival analysis To see the TPOT applied the Titanic Kaggle dataset, see the Jupyter notebook here . This example shows how to take a messy dataset and preprocess it such that it can be used in scikit-learn and TPOT. Portuguese Bank Marketing The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here . MAGIC Gamma Telescope The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here .","title":"Examples"},{"location":"examples/#overview","text":"The following sections illustrate the usage of TPOT with various datasets, each belonging to a typical class of machine learning tasks. Dataset Task Task class Dataset description Jupyter notebook Iris flower classification classification link link MNIST digit recognition (image) classification link link Boston housing prices modeling regression link N/A Titanic survival analysis classification link link Bank Marketing subscription prediction classification link link MAGIC Gamma Telescope event detection classification link link Notes: - For details on how the fit() , score() and export() methods work, refer to the usage documentation . - Upon re-running the experiments, your resulting pipelines may differ (to some extent) from the ones demonstrated here.","title":"Overview"},{"location":"examples/#iris-flower-classification","text":"The following code illustrates how TPOT can be employed for performing a simple classification task over the Iris dataset. from tpot import TPOTClassifier from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import numpy as np iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64), iris.target.astype(np.float64), train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_iris_pipeline.py') Running this code should discover a pipeline (exported as tpot_iris_pipeline.py ) that achieves about 97% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline from sklearn.preprocessing import Normalizer # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = make_pipeline( Normalizer(), GaussianNB() ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)","title":"Iris flower classification"},{"location":"examples/#mnist-digit-recognition","text":"Below is a minimal working example with the practice MNIST dataset, which is an image classification problem . from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Running this code should discover a pipeline (exported as tpot_digits_pipeline.py ) that achieves about 98% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = KNeighborsClassifier(n_neighbors=6, weights=\"distance\") exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)","title":"MNIST digit recognition"},{"location":"examples/#boston-housing-prices-modeling","text":"The following code illustrates how TPOT can be employed for performing a regression task over the Boston housing prices dataset. from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split housing = load_boston() X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Running this code should discover a pipeline (exported as tpot_boston_pipeline.py ) that achieves at least 10 mean squared error (MSE) on the test set: import numpy as np from sklearn.ensemble import GradientBoostingRegressor from sklearn.model_selection import train_test_split # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = GradientBoostingRegressor(alpha=0.85, learning_rate=0.1, loss=\"ls\", max_features=0.9, min_samples_leaf=5, min_samples_split=6) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)","title":"Boston housing prices modeling"},{"location":"examples/#titanic-survival-analysis","text":"To see the TPOT applied the Titanic Kaggle dataset, see the Jupyter notebook here . This example shows how to take a messy dataset and preprocess it such that it can be used in scikit-learn and TPOT.","title":"Titanic survival analysis"},{"location":"examples/#portuguese-bank-marketing","text":"The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here .","title":"Portuguese Bank Marketing"},{"location":"examples/#magic-gamma-telescope","text":"The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here .","title":"MAGIC Gamma Telescope"},{"location":"installing/","text":"TPOT is built on top of several existing Python libraries, including: NumPy SciPy scikit-learn DEAP update_checker tqdm stopit pandas joblib Most of the necessary Python packages can be installed via the Anaconda Python distribution , which we strongly recommend that you use. We also strongly recommend that you use of Python 3 over Python 2 if you're given the choice. NumPy, SciPy, scikit-learn, pandas and joblib can be installed in Anaconda via the command: conda install numpy scipy scikit-learn pandas joblib DEAP, update_checker, tqdm and stopit can be installed with pip via the command: pip install deap update_checker tqdm stopit For the Windows users , the pywin32 module is required if Python is NOT installed via the Anaconda Python distribution and can be installed with pip for Python verion <=3.3 or conda (e.g. miniconda) for any Python version: conda install pywin32 Optionally , you can install XGBoost if you would like TPOT to use the eXtreme Gradient Boosting models. XGBoost is entirely optional, and TPOT will still function normally without XGBoost if you do not have it installed. Windows users: pip installation may not work on some Windows environments, and it may cause unexpected errors. pip install xgboost If you have issues installing XGBoost, check the XGBoost installation documentation . If you plan to use Dask for parallel training, make sure to install dask[delay] and dask_ml . pip install dask[delayed] dask-ml If you plan to use the TPOT-MDR configuration , make sure to install scikit-mdr and scikit-rebate : pip install scikit-mdr skrebate Finally to install TPOT itself, run the following command: pip install tpot Please file a new issue if you run into installation problems.","title":"Installation"},{"location":"related/","text":"Other Automated Machine Learning (AutoML) tools and related projects: Name Language License Description Auto-WEKA Java GPL-v3 Automated model selection and hyper-parameter tuning for Weka models. auto-sklearn Python BSD-3-Clause An automated machine learning toolkit and a drop-in replacement for a scikit-learn estimator. auto_ml Python MIT Automated machine learning for analytics & production. Supports manual feature type declarations. H2O AutoML Java with Python, Scala & R APIs and web GUI Apache 2.0 Automated: data prep, hyperparameter tuning, random grid search and stacked ensembles in a distributed ML platform. devol Python MIT Automated deep neural network design via genetic programming. MLBox Python BSD-3-Clause Accurate hyper-parameter optimization in high-dimensional space with support for distributed computing. Recipe C GPL-v3 Machine-learning pipeline optimization through genetic programming. Uses grammars to define pipeline structure. Xcessiv Python Apache 2.0 A web-based application for quick, scalable, and automated hyper-parameter tuning and stacked ensembling in Python. GAMA Python Apache 2.0 Machine-learning pipeline optimization through asynchronous evaluation based genetic programming.","title":"Related"},{"location":"releases/","text":"Version 0.9 TPOT now supports sparse matrices with a new built-in TPOT configuration, \"TPOT sparse\". We are using a custom OneHotEncoder implementation that supports missing values and continuous features. We have added an \"early stopping\" option for stopping the optimization process if no improvement is made within a set number of generations. Look up the early_stop parameter to access this functionality. TPOT now reduces the number of duplicated pipelines between generations, which saves you time during the optimization process. TPOT now supports custom scoring functions via the command-line mode. We have added a new optional argument, periodic_checkpoint_folder , that allows TPOT to periodically save the best pipeline so far to a local folder during optimization process. TPOT no longer uses sklearn.externals.joblib when n_jobs=1 to avoid the potential freezing issue that scikit-learn suffers from . We have added pandas as a dependency to read input datasets instead of numpy.recfromcsv . NumPy's recfromcsv function is unable to parse datasets with complex data types. Fixed a bug that DEFAULT in the parameter(s) of nested estimator raises KeyError when exporting pipelines. Fixed a bug related to setting random_state in nested estimators. The issue would happen with pipeline with SelectFromModel ( ExtraTreesClassifier as nested estimator) or StackingEstimator if nested estimator has random_state parameter. Fixed a bug in the missing value imputation function in TPOT to impute along columns instead rows. Refined input checking for sparse matrices in TPOT. Refined the TPOT pipeline mutation operator. Version 0.8 TPOT now detects whether there are missing values in your dataset and replaces them with the median value of the column. TPOT now allows you to set a group parameter in the fit function so you can use the GroupKFold cross-validation strategy. TPOT now allows you to set a subsample ratio of the training instance with the subsample parameter. For example, setting subsample =0.5 tells TPOT to create a fixed subsample of half of the training data for the pipeline optimization process. This parameter can be useful for speeding up the pipeline optimization process, but may give less accurate performance estimates from cross-validation. TPOT now has more built-in configurations , including TPOT MDR and TPOT light, for both classification and regression problems. TPOTClassifier and TPOTRegressor now expose three useful internal attributes, fitted_pipeline_ , pareto_front_fitted_pipelines_ , and evaluated_individuals_ . These attributes are described in the API documentation . Oh, TPOT now has thorough API documentation . Check it out! Fixed a reproducibility issue where setting random_seed didn't necessarily result in the same results every time. This bug was present since TPOT v0.7. Refined input checking in TPOT. Removed Python 2 uncompliant code. Version 0.7 TPOT now has multiprocessing support. TPOT allows you to use multiple processes in parallel to accelerate the pipeline optimization process in TPOT with the n_jobs parameter. TPOT now allows you to customize the operators and parameters considered during the optimization process , which can be accomplished with the new config_dict parameter. The format of this customized dictionary can be found in the online documentation , along with a list of built-in configurations . TPOT now allows you to specify a time limit for evaluating a single pipeline (default limit is 5 minutes) in optimization process with the max_eval_time_mins parameter, so TPOT won't spend hours evaluating overly-complex pipelines. We tweaked TPOT's underlying evolutionary optimization algorithm to work even better, including using the mu+lambda algorithm . This algorithm gives you more control of how many pipelines are generated every iteration with the offspring_size parameter. Refined the default operators and parameters in TPOT, so TPOT 0.7 should work even better than 0.6. TPOT now supports sample weights in the fitness function if some if your samples are more important to classify correctly than others. The sample weights option works the same as in scikit-learn, e.g., tpot.fit(x_train, y_train, sample_weights=sample_weights) . The default scoring metric in TPOT has been changed from balanced accuracy to accuracy, the same default metric for classification algorithms in scikit-learn. Balanced accuracy can still be used by setting scoring='balanced_accuracy' when creating a TPOT instance. Version 0.6 TPOT now supports regression problems! We have created two separate TPOTClassifier and TPOTRegressor classes to support classification and regression problems, respectively. The command-line interface also supports this feature through the -mode parameter. TPOT now allows you to specify a time limit for the optimization process with the max_time_mins parameter, so you don't need to guess how long TPOT will take any more to recommend a pipeline to you. Added a new operator that performs feature selection using ExtraTrees feature importance scores. XGBoost has been added as an optional dependency to TPOT. If you have XGBoost installed, TPOT will automatically detect your installation and use the XGBoostClassifier and XGBoostRegressor in its pipelines. TPOT now offers a verbosity level of 3 (\"science mode\"), which outputs the entire Pareto front instead of only the current best score. This feature may be useful for users looking to make a trade-off between pipeline complexity and score. Version 0.5 Major refactor: Each operator is defined in a separate class file. Hooray for easier-to-maintain code! TPOT now exports directly to scikit-learn Pipelines instead of hacky code. Internal representation of individuals now uses scikit-learn pipelines. Parameters for each operator have been optimized so TPOT spends less time exploring useless parameters. We have removed pandas as a dependency and instead use numpy matrices to store the data. TPOT now uses k-fold cross-validation when evaluating pipelines, with a default k = 3. This k parameter can be tuned when creating a new TPOT instance. Improved scoring function support : Even though TPOT uses balanced accuracy by default, you can now have TPOT use any of the scoring functions that cross_val_score supports. Added the scikit-learn Normalizer preprocessor. Minor text fixes. Version 0.4 In TPOT 0.4, we've made some major changes to the internals of TPOT and added some convenience functions. We've summarized the changes below. Added new sklearn models and preprocessors AdaBoostClassifier BernoulliNB ExtraTreesClassifier GaussianNB MultinomialNB LinearSVC PassiveAggressiveClassifier GradientBoostingClassifier RBFSampler FastICA FeatureAgglomeration Nystroem Added operator that inserts virtual features for the count of features with values of zero Reworked parameterization of TPOT operators Reduced parameter search space with information from a scikit-learn benchmark TPOT no longer generates arbitrary parameter values, but uses a fixed parameter set instead Removed XGBoost as a dependency Too many users were having install issues with XGBoost Replaced with scikit-learn's GradientBoostingClassifier Improved descriptiveness of TPOT command line parameter documentation Removed min/max/avg details during fit() when verbosity > 1 Replaced with tqdm progress bar Added tqdm as a dependency Added fit_predict() convenience function Added get_params() function so TPOT can operate in scikit-learn's cross_val_score & related functions Version 0.3 We revised the internal optimization process of TPOT to make it more efficient, in particular in regards to the model parameters that TPOT optimizes over. Version 0.2 TPOT now has the ability to export the optimized pipelines to sklearn code. Logistic regression, SVM, and k-nearest neighbors classifiers were added as pipeline operators. Previously, TPOT only included decision tree and random forest classifiers. TPOT can now use arbitrary scoring functions for the optimization process. TPOT now performs multi-objective Pareto optimization to balance model complexity (i.e., # of pipeline operators) and the score of the pipeline. Version 0.1 First public release of TPOT. Optimizes pipelines with decision trees and random forest classifiers as the model, and uses a handful of feature preprocessors.","title":"Release Notes"},{"location":"releases/#version-09","text":"TPOT now supports sparse matrices with a new built-in TPOT configuration, \"TPOT sparse\". We are using a custom OneHotEncoder implementation that supports missing values and continuous features. We have added an \"early stopping\" option for stopping the optimization process if no improvement is made within a set number of generations. Look up the early_stop parameter to access this functionality. TPOT now reduces the number of duplicated pipelines between generations, which saves you time during the optimization process. TPOT now supports custom scoring functions via the command-line mode. We have added a new optional argument, periodic_checkpoint_folder , that allows TPOT to periodically save the best pipeline so far to a local folder during optimization process. TPOT no longer uses sklearn.externals.joblib when n_jobs=1 to avoid the potential freezing issue that scikit-learn suffers from . We have added pandas as a dependency to read input datasets instead of numpy.recfromcsv . NumPy's recfromcsv function is unable to parse datasets with complex data types. Fixed a bug that DEFAULT in the parameter(s) of nested estimator raises KeyError when exporting pipelines. Fixed a bug related to setting random_state in nested estimators. The issue would happen with pipeline with SelectFromModel ( ExtraTreesClassifier as nested estimator) or StackingEstimator if nested estimator has random_state parameter. Fixed a bug in the missing value imputation function in TPOT to impute along columns instead rows. Refined input checking for sparse matrices in TPOT. Refined the TPOT pipeline mutation operator.","title":"Version 0.9"},{"location":"releases/#version-08","text":"TPOT now detects whether there are missing values in your dataset and replaces them with the median value of the column. TPOT now allows you to set a group parameter in the fit function so you can use the GroupKFold cross-validation strategy. TPOT now allows you to set a subsample ratio of the training instance with the subsample parameter. For example, setting subsample =0.5 tells TPOT to create a fixed subsample of half of the training data for the pipeline optimization process. This parameter can be useful for speeding up the pipeline optimization process, but may give less accurate performance estimates from cross-validation. TPOT now has more built-in configurations , including TPOT MDR and TPOT light, for both classification and regression problems. TPOTClassifier and TPOTRegressor now expose three useful internal attributes, fitted_pipeline_ , pareto_front_fitted_pipelines_ , and evaluated_individuals_ . These attributes are described in the API documentation . Oh, TPOT now has thorough API documentation . Check it out! Fixed a reproducibility issue where setting random_seed didn't necessarily result in the same results every time. This bug was present since TPOT v0.7. Refined input checking in TPOT. Removed Python 2 uncompliant code.","title":"Version 0.8"},{"location":"releases/#version-07","text":"TPOT now has multiprocessing support. TPOT allows you to use multiple processes in parallel to accelerate the pipeline optimization process in TPOT with the n_jobs parameter. TPOT now allows you to customize the operators and parameters considered during the optimization process , which can be accomplished with the new config_dict parameter. The format of this customized dictionary can be found in the online documentation , along with a list of built-in configurations . TPOT now allows you to specify a time limit for evaluating a single pipeline (default limit is 5 minutes) in optimization process with the max_eval_time_mins parameter, so TPOT won't spend hours evaluating overly-complex pipelines. We tweaked TPOT's underlying evolutionary optimization algorithm to work even better, including using the mu+lambda algorithm . This algorithm gives you more control of how many pipelines are generated every iteration with the offspring_size parameter. Refined the default operators and parameters in TPOT, so TPOT 0.7 should work even better than 0.6. TPOT now supports sample weights in the fitness function if some if your samples are more important to classify correctly than others. The sample weights option works the same as in scikit-learn, e.g., tpot.fit(x_train, y_train, sample_weights=sample_weights) . The default scoring metric in TPOT has been changed from balanced accuracy to accuracy, the same default metric for classification algorithms in scikit-learn. Balanced accuracy can still be used by setting scoring='balanced_accuracy' when creating a TPOT instance.","title":"Version 0.7"},{"location":"releases/#version-06","text":"TPOT now supports regression problems! We have created two separate TPOTClassifier and TPOTRegressor classes to support classification and regression problems, respectively. The command-line interface also supports this feature through the -mode parameter. TPOT now allows you to specify a time limit for the optimization process with the max_time_mins parameter, so you don't need to guess how long TPOT will take any more to recommend a pipeline to you. Added a new operator that performs feature selection using ExtraTrees feature importance scores. XGBoost has been added as an optional dependency to TPOT. If you have XGBoost installed, TPOT will automatically detect your installation and use the XGBoostClassifier and XGBoostRegressor in its pipelines. TPOT now offers a verbosity level of 3 (\"science mode\"), which outputs the entire Pareto front instead of only the current best score. This feature may be useful for users looking to make a trade-off between pipeline complexity and score.","title":"Version 0.6"},{"location":"releases/#version-05","text":"Major refactor: Each operator is defined in a separate class file. Hooray for easier-to-maintain code! TPOT now exports directly to scikit-learn Pipelines instead of hacky code. Internal representation of individuals now uses scikit-learn pipelines. Parameters for each operator have been optimized so TPOT spends less time exploring useless parameters. We have removed pandas as a dependency and instead use numpy matrices to store the data. TPOT now uses k-fold cross-validation when evaluating pipelines, with a default k = 3. This k parameter can be tuned when creating a new TPOT instance. Improved scoring function support : Even though TPOT uses balanced accuracy by default, you can now have TPOT use any of the scoring functions that cross_val_score supports. Added the scikit-learn Normalizer preprocessor. Minor text fixes.","title":"Version 0.5"},{"location":"releases/#version-04","text":"In TPOT 0.4, we've made some major changes to the internals of TPOT and added some convenience functions. We've summarized the changes below. Added new sklearn models and preprocessors AdaBoostClassifier BernoulliNB ExtraTreesClassifier GaussianNB MultinomialNB LinearSVC PassiveAggressiveClassifier GradientBoostingClassifier RBFSampler FastICA FeatureAgglomeration Nystroem Added operator that inserts virtual features for the count of features with values of zero Reworked parameterization of TPOT operators Reduced parameter search space with information from a scikit-learn benchmark TPOT no longer generates arbitrary parameter values, but uses a fixed parameter set instead Removed XGBoost as a dependency Too many users were having install issues with XGBoost Replaced with scikit-learn's GradientBoostingClassifier Improved descriptiveness of TPOT command line parameter documentation Removed min/max/avg details during fit() when verbosity > 1 Replaced with tqdm progress bar Added tqdm as a dependency Added fit_predict() convenience function Added get_params() function so TPOT can operate in scikit-learn's cross_val_score & related functions","title":"Version 0.4"},{"location":"releases/#version-03","text":"We revised the internal optimization process of TPOT to make it more efficient, in particular in regards to the model parameters that TPOT optimizes over.","title":"Version 0.3"},{"location":"releases/#version-02","text":"TPOT now has the ability to export the optimized pipelines to sklearn code. Logistic regression, SVM, and k-nearest neighbors classifiers were added as pipeline operators. Previously, TPOT only included decision tree and random forest classifiers. TPOT can now use arbitrary scoring functions for the optimization process. TPOT now performs multi-objective Pareto optimization to balance model complexity (i.e., # of pipeline operators) and the score of the pipeline.","title":"Version 0.2"},{"location":"releases/#version-01","text":"First public release of TPOT. Optimizes pipelines with decision trees and random forest classifiers as the model, and uses a handful of feature preprocessors.","title":"Version 0.1"},{"location":"support/","text":"TPOT was developed in the Computational Genetics Lab at the University of Pennsylvania with funding from the NIH under grant R01 AI117694. We are incredibly grateful for the support of the NIH and the University of Pennsylvania during the development of this project. The TPOT logo was designed by Todd Newmuis, who generously donated his time to the project.","title":"Support"},{"location":"using/","text":"What to expect from AutoML software Automated machine learning (AutoML) takes a higher-level approach to machine learning than most practitioners are used to, so we've gathered a handful of guidelines on what to expect when running AutoML software such as TPOT. AutoML algorithms aren't intended to run for only a few minutes Of course, you can run TPOT for only a few minutes and it will find a reasonably good pipeline for your dataset. However, if you don't run TPOT for long enough, it may not find the best possible pipeline for your dataset. It may even not find any suitable pipeline at all, in which case a RuntimeError('A pipeline has not yet been optimized. Please call fit() first.') will be raised. Often it is worthwhile to run multiple instances of TPOT in parallel for a long time (hours to days) to allow TPOT to thoroughly search the pipeline space for your dataset. AutoML algorithms can take a long time to finish their search AutoML algorithms aren't as simple as fitting one model on the dataset; they are considering multiple machine learning algorithms (random forests, linear models, SVMs, etc.) in a pipeline with multiple preprocessing steps (missing value imputation, scaling, PCA, feature selection, etc.), the hyperparameters for all of the models and preprocessing steps, as well as multiple ways to ensemble or stack the algorithms within the pipeline. As such, TPOT will take a while to run on larger datasets, but it's important to realize why. With the default TPOT settings (100 generations with 100 population size), TPOT will evaluate 10,000 pipeline configurations before finishing. To put this number into context, think about a grid search of 10,000 hyperparameter combinations for a machine learning algorithm and how long that grid search will take. That is 10,000 model configurations to evaluate with 10-fold cross-validation, which means that roughly 100,000 models are fit and evaluated on the training data in one grid search. That's a time-consuming procedure, even for simpler models like decision trees. Typical TPOT runs will take hours to days to finish (unless it's a small dataset), but you can always interrupt the run partway through and see the best results so far. TPOT also provides a warm_start parameter that lets you restart a TPOT run from where it left off. AutoML algorithms can recommend different solutions for the same dataset If you're working with a reasonably complex dataset or run TPOT for a short amount of time, different TPOT runs may result in different pipeline recommendations. TPOT's optimization algorithm is stochastic in nature, which means that it uses randomness (in part) to search the possible pipeline space. When two TPOT runs recommend different pipelines, this means that the TPOT runs didn't converge due to lack of time or that multiple pipelines perform more-or-less the same on your dataset. This is actually an advantage over fixed grid search techniques: TPOT is meant to be an assistant that gives you ideas on how to solve a particular machine learning problem by exploring pipeline configurations that you might have never considered, then leaves the fine-tuning to more constrained parameter tuning techniques such as grid search. TPOT with code We've taken care to design the TPOT interface to be as similar as possible to scikit-learn. TPOT can be imported just like any regular Python module. To import TPOT, type: from tpot import TPOTClassifier then create an instance of TPOT as follows: pipeline_optimizer = TPOTClassifier() It's also possible to use TPOT for regression problems with the TPOTRegressor class. Other than the class name, a TPOTRegressor is used the same way as a TPOTClassifier . You can read more about the TPOTClassifier and TPOTRegressor classes in the API documentation . Some example code with custom TPOT parameters might look like: pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) Now TPOT is ready to optimize a pipeline for you. You can tell TPOT to optimize a pipeline based on a data set with the fit function: pipeline_optimizer.fit(X_train, y_train) The fit function initializes the genetic programming algorithm to find the highest-scoring pipeline based on average k-fold cross-validation Then, the pipeline is trained on the entire set of provided samples, and the TPOT instance can be used as a fitted model. You can then proceed to evaluate the final pipeline on the testing set with the score function: print(pipeline_optimizer.score(X_test, y_test)) Finally, you can tell TPOT to export the corresponding Python code for the optimized pipeline to a text file with the export function: pipeline_optimizer.export('tpot_exported_pipeline.py') Once this code finishes running, tpot_exported_pipeline.py will contain the Python code for the optimized pipeline. Below is a full example script using TPOT to optimize a pipeline, score it, and export the best pipeline to a file. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) pipeline_optimizer.fit(X_train, y_train) print(pipeline_optimizer.score(X_test, y_test)) pipeline_optimizer.export('tpot_exported_pipeline.py') Check our examples to see TPOT applied to some specific data sets. TPOT on the command line To use TPOT via the command line, enter the following command with a path to the data file: tpot /path_to/data_file.csv An example command-line call to TPOT may look like: tpot data/mnist.csv -is , -target class -o tpot_exported_pipeline.py -g 5 -p 20 -cv 5 -s 42 -v 2 TPOT offers several arguments that can be provided at the command line. To see brief descriptions of these arguments, enter the following command: tpot --help Detailed descriptions of the command-line arguments are below. Argument Parameter Valid values Effect -is INPUT_SEPARATOR Any string Character used to separate columns in the input file. -target TARGET_NAME Any string Name of the target column in the input file. -mode TPOT_MODE ['classification', 'regression'] Whether TPOT is being used for a supervised classification or regression problem. -o OUTPUT_FILE String path to a file File to export the code for the final optimized pipeline. -g GENERATIONS Any positive integer Number of iterations to run the pipeline optimization process. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -p POPULATION_SIZE Any positive integer Number of individuals to retain in the GP population every generation. Generally, TPOT will work better when you give it more individuals (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -os OFFSPRING_SIZE Any positive integer Number of offspring to produce in each GP generation. By default, OFFSPRING_SIZE = POPULATION_SIZE. -mr MUTATION_RATE [0.0, 1.0] GP mutation rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to apply random changes to every generation. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. -xr CROSSOVER_RATE [0.0, 1.0] GP crossover rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to \"breed\" every generation. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. -scoring SCORING_FN 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'my_module.scorer_name*' Function used to evaluate the quality of a given pipeline for the problem. By default, accuracy is used for classification and mean squared error (MSE) is used for regression. TPOT assumes that any function with \"error\" or \"loss\" in the name is meant to be minimized, whereas any other functions will be maximized. my_module.scorer_name: You can also specify your own function or a full python path to an existing one. See the section on scoring functions for more details. -cv CV Any integer > 1 Number of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process. -sub SUBSAMPLE (0.0, 1.0] Subsample ratio of the training instance. Setting it to 0.5 means that TPOT randomly collects half of training samples for pipeline optimization process. -njobs NUM_JOBS Any positive integer or -1 Number of CPUs for evaluating pipelines in parallel during the TPOT optimization process. Assigning this to -1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. -maxtime MAX_TIME_MINS Any positive integer How many minutes TPOT has to optimize the pipeline. If provided, this setting will override the \"generations\" parameter and allow TPOT to run until it runs out of time. -maxeval MAX_EVAL_MINS Any positive float How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to consider more complex pipelines but will also allow TPOT to run longer. -s RANDOM_STATE Any positive integer Random number generator seed for reproducibility. Set this seed if you want your TPOT run to be reproducible with the same seed and data set in the future. -config CONFIG_FILE String or file path Operators and parameter configurations in TPOT: Path for configuration file: TPOT will use the path to a configuration file for customizing the operators and parameters that TPOT uses in the optimization process string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. -template TEMPLATE String Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. -memory MEMORY String or file path If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. Memory caching mode in TPOT: Path for a caching directory: TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown. string 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown. -cf CHECKPOINT_FOLDER Folder path If supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing. This is useful in multiple cases: sudden death before tpot could save an optimized pipeline progress tracking grabbing a pipeline while tpot is working Example: mkdir my_checkpoints -cf ./my_checkpoints -es EARLY_STOP Any positive integer How many generations TPOT checks whether there is no improvement in optimization process. End optimization process if there is no improvement in the set number of generations. -v VERBOSITY {0, 1, 2, 3} How much information TPOT communicates while it is running. 0 = none, 1 = minimal, 2 = high, 3 = all. A setting of 2 or higher will add a progress bar during the optimization procedure. --no-update-check Flag indicating whether the TPOT version checker should be disabled. --version Show TPOT's version number and exit. --help Show TPOT's help documentation and exit. Scoring functions TPOT makes use of sklearn.model_selection.cross_val_score for evaluating pipelines, and as such offers the same support for scoring functions. There are two ways to make use of scoring functions with TPOT: You can pass in a string to the scoring parameter from the list above. Any other strings will cause TPOT to throw an exception. You can pass the callable object/function with signature scorer(estimator, X, y) , where estimator is trained estimator to use for scoring, X are features that will be passed to estimator.predict and y are target values for X . To do this, you should implement your own function. See the example below for further explanation. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split from sklearn.metrics.scorer import make_scorer digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) # Make a custom metric function def my_custom_accuracy(y_true, y_pred): return float(sum(y_pred == y_true)) / len(y_true) # Make a custom a scorer from the custom metric function # Note: greater_is_better=False in make_scorer below would mean that the scoring function should be minimized. my_custom_scorer = make_scorer(my_custom_accuracy, greater_is_better=True) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, scoring=my_custom_scorer) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') You can pass a metric function with the signature score_func(y_true, y_pred) (e.g. my_custom_accuracy in the example above), where y_true are the true target values and y_pred are the predicted target values from an estimator. To do this, you should implement your own function. See the example above for further explanation. TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized ( greater_is_better=False in make_scorer ), whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. my_module.scorer_name : You can also use a custom score_func(y_true, y_pred) or scorer(estimator, X, y) function through the command line by adding the argument -scoring my_module.scorer to your command-line call. TPOT will import your module and use the custom scoring function from there. TPOT will include your current working directory when importing the module, so you can place it in the same directory where you are going to run TPOT. Example: -scoring sklearn.metrics.auc will use the function auc from sklearn.metrics module. Built-in TPOT configurations TPOT comes with a handful of default operators and parameter configurations that we believe work well for optimizing machine learning pipelines. Below is a list of the current built-in configurations that come with TPOT. Configuration Name Description Operators Default TPOT TPOT will search over a broad range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Some of these operators are complex and may take a long time to run, especially on larger datasets. Note: This is the default configuration for TPOT. To use this configuration, use the default value (None) for the config_dict parameter. Classification Regression TPOT light TPOT will search over a restricted range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Only simpler and fast-running operators will be used in these pipelines, so TPOT light is useful for finding quick and simple pipelines for a classification or regression problem. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression TPOT MDR TPOT will search over a series of feature selectors and Multifactor Dimensionality Reduction models to find a series of operators that maximize prediction accuracy. The TPOT MDR configuration is specialized for genome-wide association studies (GWAS) , and is described in detail online here . Note that TPOT MDR may be slow to run because the feature selection routines are computationally expensive, especially on large datasets. Classification Regression TPOT sparse TPOT uses a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression To use any of these configurations, simply pass the string name of the configuration to the config_dict parameter (or -config on the command line). For example, to use the \"TPOT light\" configuration: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict='TPOT light') tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Customizing TPOT's operators and parameters Beyond the default configurations that come with TPOT, in some cases it is useful to limit the algorithms and parameters that TPOT considers. For that reason, we allow users to provide TPOT with a custom configuration for its operators and parameters. The custom TPOT configuration must be in nested dictionary format, where the first level key is the path and name of the operator (e.g., sklearn.naive_bayes.MultinomialNB ) and the second level key is the corresponding parameter name for that operator (e.g., fit_prior ). The second level key should point to a list of parameter values for that parameter, e.g., 'fit_prior': [True, False] . For a simple example, the configuration could be: tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } in which case TPOT would only consider pipelines containing GaussianNB , BernoulliNB , MultinomialNB , and tune those algorithm's parameters in the ranges provided. This dictionary can be passed directly within the code to the TPOTClassifier / TPOTRegressor config_dict parameter, described above. For example: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict=tpot_config) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Command-line users must create a separate .py file with the custom configuration and provide the path to the file to the tpot call. For example, if the simple example configuration above is saved in tpot_classifier_config.py , that configuration could be used on the command line with the command: tpot data/mnist.csv -is , -target class -config tpot_classifier_config.py -g 5 -p 20 -v 2 -o tpot_exported_pipeline.py When using the command-line interface, the configuration file specified in the -config parameter must name its custom TPOT configuration tpot_config . Otherwise, TPOT will not be able to locate the configuration dictionary. For more detailed examples of how to customize TPOT's operator configuration, see the default configurations for classification and regression in TPOT's source code. Note that you must have all of the corresponding packages for the operators installed on your computer, otherwise TPOT will not be able to use them. For example, if XGBoost is not installed on your computer, then TPOT will simply not import nor use XGBoost in the pipelines it considers. Template option in TPOT Template option provides a way to specify a desired structure for machine learning pipeline, which may reduce TPOT computation time and potentially provide more interpretable results. Current implementation only supports linear pipelines. Below is a simple example to use template option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of SelectorMixin ), 2nd step is a feature transformer (a subclass of TransformerMixin ) and 3rd step is a classifier for classification (a subclass of ClassifierMixin ). The last step must be Classifier for TPOTClassifier 's template but Regressor for TPOTRegressor . Note: although SelectorMixin is subclass of TransformerMixin in scikit-leawrn, but Transformer in this option excludes those subclasses of SelectorMixin . tpot_obj = TPOTClassifier( template='Selector-Transformer-Classifier' ) If a specific operator, e.g. SelectPercentile , is prefered to used in the 1st step of pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'. FeatureSetSelector in TPOT FeatureSetSelector is a special new operator in TPOT. This operator enables feature selection based on priori export knowledge. For example, in RNA-seq gene expression analysis, this operator can be used to select one or more gene (feature) set(s) based on GO (Gene Ontology) terms or annotated gene sets Molecular Signatures Database ( MSigDB ) in the 1st step of pipeline via template option above, in order to reduce dimensions and TPOT computation time. This operator requires a dataset list in csv format. In this csv file, there are only three columns: 1st column is feature set names, 2nd column is the total number of features in one set and 3rd column is a list of feature names (if input X is pandas.DataFrame) or indexes (if input X is numpy.ndarray) delimited by \";\". Below is a example how to use this operator in TPOT. Please check our preprint paper for more details. from tpot import TPOTClassifier import numpy as np import pandas as pd from tpot.config import classifier_config_dict test_data = pd.read_csv(\"https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/tests.csv\") test_X = test_data.drop(\"class\", axis=1) test_y = test_data['class'] # add FeatureSetSelector into tpot configuration classifier_config_dict['tpot.builtins.FeatureSetSelector'] = { 'subset_list': ['https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/subset_test.csv'], 'sel_subset': [0,1] # select only one feature set, a list of index of subset in the list above #'sel_subset': list(combinations(range(3), 2)) # select two feature sets } tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, template='FeatureSetSelector-Transformer-Classifier', config_dict=classifier_config_dict) tpot.fit(test_X, test_y) Pipeline caching in TPOT With the memory parameter, pipelines can cache the results of each transformer after fitting them. This feature is used to avoid repeated computation by transformers within a pipeline if the parameters and input data are identical to another fitted pipeline during optimization process. TPOT allows users to specify a custom directory path or joblib.Memory in case they want to re-use the memory cache in future TPOT runs (or a warm_start run). There are three methods for enabling memory caching in TPOT: from tpot import TPOTClassifier from tempfile import mkdtemp from joblib import Memory from shutil import rmtree # Method 1, auto mode: TPOT uses memory caching with a temporary directory and cleans it up upon shutdown tpot = TPOTClassifier(memory='auto') # Method 2, with a custom directory for memory caching tpot = TPOTClassifier(memory='/to/your/path') # Method 3, with a Memory object cachedir = mkdtemp() # Create a temporary folder memory = Memory(cachedir=cachedir, verbose=0) tpot = TPOTClassifier(memory=memory) # Clear the cache directory when you don't need it anymore rmtree(cachedir) Note: TPOT does NOT clean up memory caches if users set a custom directory path or Memory object. We recommend that you clean up the memory caches when you don't need it anymore. Crash/freeze issue with n_jobs > 1 under OSX or Linux Internally, TPOT uses joblib to fit estimators in parallel. This is the same parallelization framework used by scikit-learn. But it may crash/freeze with n_jobs > 1 under OSX or Linux as scikit-learn does , especially with large datasets. One solution is to configure Python's multiprocessing module to use the forkserver start method (instead of the default fork ) to manage the process pools. You can enable the forkserver mode globally for your program by putting the following codes into your main script: import multiprocessing # other imports, custom code, load data, define model... if __name__ == '__main__': multiprocessing.set_start_method('forkserver') # call scikit-learn utils or tpot utils with n_jobs > 1 here More information about these start methods can be found in the multiprocessing documentation . Parallel Training with Dask For large problems or working on Jupyter notebook, we highly recommend that you can distribute the work on a Dask cluster. The dask-examples binder has a runnable example with a small dask cluster. To use your Dask cluster to fit a TPOT model, specify the use_dask keyword when you create the TPOT estimator. Note: if use_dask=True , TPOT will use as many cores as available on the your Dask cluster. If n_jobs is specified, then it will control the chunk size (10* n_jobs if it is less then offspring size) of parallel training. estimator = TPOTEstimator(use_dask=True, n_jobs=-1) This will use use all the workers on your cluster to do the training, and use Dask-ML's pipeline rewriting to avoid re-fitting estimators multiple times on the same set of data. It will also provide fine-grained diagnostics in the distributed scheduler UI . Alternatively, Dask implements a joblib backend. You can instruct TPOT to use the distributed backend during training by specifying a joblib.parallel_backend : import joblib import distributed.joblib from dask.distributed import Client # connect to the cluster client = Client('schedueler-address') # create the estimator normally estimator = TPOTClassifier(n_jobs=-1) # perform the fit in this context manager with joblib.parallel_backend(\"dask\"): estimator.fit(X, y) See dask's distributed joblib integration for more.","title":"Using TPOT"},{"location":"using/#what-to-expect-from-automl-software","text":"Automated machine learning (AutoML) takes a higher-level approach to machine learning than most practitioners are used to, so we've gathered a handful of guidelines on what to expect when running AutoML software such as TPOT.","title":"What to expect from AutoML software"},{"location":"using/#tpot-with-code","text":"We've taken care to design the TPOT interface to be as similar as possible to scikit-learn. TPOT can be imported just like any regular Python module. To import TPOT, type: from tpot import TPOTClassifier then create an instance of TPOT as follows: pipeline_optimizer = TPOTClassifier() It's also possible to use TPOT for regression problems with the TPOTRegressor class. Other than the class name, a TPOTRegressor is used the same way as a TPOTClassifier . You can read more about the TPOTClassifier and TPOTRegressor classes in the API documentation . Some example code with custom TPOT parameters might look like: pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) Now TPOT is ready to optimize a pipeline for you. You can tell TPOT to optimize a pipeline based on a data set with the fit function: pipeline_optimizer.fit(X_train, y_train) The fit function initializes the genetic programming algorithm to find the highest-scoring pipeline based on average k-fold cross-validation Then, the pipeline is trained on the entire set of provided samples, and the TPOT instance can be used as a fitted model. You can then proceed to evaluate the final pipeline on the testing set with the score function: print(pipeline_optimizer.score(X_test, y_test)) Finally, you can tell TPOT to export the corresponding Python code for the optimized pipeline to a text file with the export function: pipeline_optimizer.export('tpot_exported_pipeline.py') Once this code finishes running, tpot_exported_pipeline.py will contain the Python code for the optimized pipeline. Below is a full example script using TPOT to optimize a pipeline, score it, and export the best pipeline to a file. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) pipeline_optimizer.fit(X_train, y_train) print(pipeline_optimizer.score(X_test, y_test)) pipeline_optimizer.export('tpot_exported_pipeline.py') Check our examples to see TPOT applied to some specific data sets.","title":"TPOT with code"},{"location":"using/#tpot-on-the-command-line","text":"To use TPOT via the command line, enter the following command with a path to the data file: tpot /path_to/data_file.csv An example command-line call to TPOT may look like: tpot data/mnist.csv -is , -target class -o tpot_exported_pipeline.py -g 5 -p 20 -cv 5 -s 42 -v 2 TPOT offers several arguments that can be provided at the command line. To see brief descriptions of these arguments, enter the following command: tpot --help Detailed descriptions of the command-line arguments are below. Argument Parameter Valid values Effect -is INPUT_SEPARATOR Any string Character used to separate columns in the input file. -target TARGET_NAME Any string Name of the target column in the input file. -mode TPOT_MODE ['classification', 'regression'] Whether TPOT is being used for a supervised classification or regression problem. -o OUTPUT_FILE String path to a file File to export the code for the final optimized pipeline. -g GENERATIONS Any positive integer Number of iterations to run the pipeline optimization process. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -p POPULATION_SIZE Any positive integer Number of individuals to retain in the GP population every generation. Generally, TPOT will work better when you give it more individuals (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -os OFFSPRING_SIZE Any positive integer Number of offspring to produce in each GP generation. By default, OFFSPRING_SIZE = POPULATION_SIZE. -mr MUTATION_RATE [0.0, 1.0] GP mutation rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to apply random changes to every generation. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. -xr CROSSOVER_RATE [0.0, 1.0] GP crossover rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to \"breed\" every generation. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. -scoring SCORING_FN 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'my_module.scorer_name*' Function used to evaluate the quality of a given pipeline for the problem. By default, accuracy is used for classification and mean squared error (MSE) is used for regression. TPOT assumes that any function with \"error\" or \"loss\" in the name is meant to be minimized, whereas any other functions will be maximized. my_module.scorer_name: You can also specify your own function or a full python path to an existing one. See the section on scoring functions for more details. -cv CV Any integer > 1 Number of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process. -sub SUBSAMPLE (0.0, 1.0] Subsample ratio of the training instance. Setting it to 0.5 means that TPOT randomly collects half of training samples for pipeline optimization process. -njobs NUM_JOBS Any positive integer or -1 Number of CPUs for evaluating pipelines in parallel during the TPOT optimization process. Assigning this to -1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. -maxtime MAX_TIME_MINS Any positive integer How many minutes TPOT has to optimize the pipeline. If provided, this setting will override the \"generations\" parameter and allow TPOT to run until it runs out of time. -maxeval MAX_EVAL_MINS Any positive float How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to consider more complex pipelines but will also allow TPOT to run longer. -s RANDOM_STATE Any positive integer Random number generator seed for reproducibility. Set this seed if you want your TPOT run to be reproducible with the same seed and data set in the future. -config CONFIG_FILE String or file path Operators and parameter configurations in TPOT: Path for configuration file: TPOT will use the path to a configuration file for customizing the operators and parameters that TPOT uses in the optimization process string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. -template TEMPLATE String Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. -memory MEMORY String or file path If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. Memory caching mode in TPOT: Path for a caching directory: TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown. string 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown. -cf CHECKPOINT_FOLDER Folder path If supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing. This is useful in multiple cases: sudden death before tpot could save an optimized pipeline progress tracking grabbing a pipeline while tpot is working Example: mkdir my_checkpoints -cf ./my_checkpoints -es EARLY_STOP Any positive integer How many generations TPOT checks whether there is no improvement in optimization process. End optimization process if there is no improvement in the set number of generations. -v VERBOSITY {0, 1, 2, 3} How much information TPOT communicates while it is running. 0 = none, 1 = minimal, 2 = high, 3 = all. A setting of 2 or higher will add a progress bar during the optimization procedure. --no-update-check Flag indicating whether the TPOT version checker should be disabled. --version Show TPOT's version number and exit. --help Show TPOT's help documentation and exit.","title":"TPOT on the command line"},{"location":"using/#scoring-functions","text":"TPOT makes use of sklearn.model_selection.cross_val_score for evaluating pipelines, and as such offers the same support for scoring functions. There are two ways to make use of scoring functions with TPOT: You can pass in a string to the scoring parameter from the list above. Any other strings will cause TPOT to throw an exception. You can pass the callable object/function with signature scorer(estimator, X, y) , where estimator is trained estimator to use for scoring, X are features that will be passed to estimator.predict and y are target values for X . To do this, you should implement your own function. See the example below for further explanation. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split from sklearn.metrics.scorer import make_scorer digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) # Make a custom metric function def my_custom_accuracy(y_true, y_pred): return float(sum(y_pred == y_true)) / len(y_true) # Make a custom a scorer from the custom metric function # Note: greater_is_better=False in make_scorer below would mean that the scoring function should be minimized. my_custom_scorer = make_scorer(my_custom_accuracy, greater_is_better=True) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, scoring=my_custom_scorer) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') You can pass a metric function with the signature score_func(y_true, y_pred) (e.g. my_custom_accuracy in the example above), where y_true are the true target values and y_pred are the predicted target values from an estimator. To do this, you should implement your own function. See the example above for further explanation. TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized ( greater_is_better=False in make_scorer ), whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. my_module.scorer_name : You can also use a custom score_func(y_true, y_pred) or scorer(estimator, X, y) function through the command line by adding the argument -scoring my_module.scorer to your command-line call. TPOT will import your module and use the custom scoring function from there. TPOT will include your current working directory when importing the module, so you can place it in the same directory where you are going to run TPOT. Example: -scoring sklearn.metrics.auc will use the function auc from sklearn.metrics module.","title":"Scoring functions"},{"location":"using/#built-in-tpot-configurations","text":"TPOT comes with a handful of default operators and parameter configurations that we believe work well for optimizing machine learning pipelines. Below is a list of the current built-in configurations that come with TPOT. Configuration Name Description Operators Default TPOT TPOT will search over a broad range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Some of these operators are complex and may take a long time to run, especially on larger datasets. Note: This is the default configuration for TPOT. To use this configuration, use the default value (None) for the config_dict parameter. Classification Regression TPOT light TPOT will search over a restricted range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Only simpler and fast-running operators will be used in these pipelines, so TPOT light is useful for finding quick and simple pipelines for a classification or regression problem. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression TPOT MDR TPOT will search over a series of feature selectors and Multifactor Dimensionality Reduction models to find a series of operators that maximize prediction accuracy. The TPOT MDR configuration is specialized for genome-wide association studies (GWAS) , and is described in detail online here . Note that TPOT MDR may be slow to run because the feature selection routines are computationally expensive, especially on large datasets. Classification Regression TPOT sparse TPOT uses a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression To use any of these configurations, simply pass the string name of the configuration to the config_dict parameter (or -config on the command line). For example, to use the \"TPOT light\" configuration: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict='TPOT light') tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py')","title":"Built-in TPOT configurations"},{"location":"using/#customizing-tpots-operators-and-parameters","text":"Beyond the default configurations that come with TPOT, in some cases it is useful to limit the algorithms and parameters that TPOT considers. For that reason, we allow users to provide TPOT with a custom configuration for its operators and parameters. The custom TPOT configuration must be in nested dictionary format, where the first level key is the path and name of the operator (e.g., sklearn.naive_bayes.MultinomialNB ) and the second level key is the corresponding parameter name for that operator (e.g., fit_prior ). The second level key should point to a list of parameter values for that parameter, e.g., 'fit_prior': [True, False] . For a simple example, the configuration could be: tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } in which case TPOT would only consider pipelines containing GaussianNB , BernoulliNB , MultinomialNB , and tune those algorithm's parameters in the ranges provided. This dictionary can be passed directly within the code to the TPOTClassifier / TPOTRegressor config_dict parameter, described above. For example: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict=tpot_config) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Command-line users must create a separate .py file with the custom configuration and provide the path to the file to the tpot call. For example, if the simple example configuration above is saved in tpot_classifier_config.py , that configuration could be used on the command line with the command: tpot data/mnist.csv -is , -target class -config tpot_classifier_config.py -g 5 -p 20 -v 2 -o tpot_exported_pipeline.py When using the command-line interface, the configuration file specified in the -config parameter must name its custom TPOT configuration tpot_config . Otherwise, TPOT will not be able to locate the configuration dictionary. For more detailed examples of how to customize TPOT's operator configuration, see the default configurations for classification and regression in TPOT's source code. Note that you must have all of the corresponding packages for the operators installed on your computer, otherwise TPOT will not be able to use them. For example, if XGBoost is not installed on your computer, then TPOT will simply not import nor use XGBoost in the pipelines it considers.","title":"Customizing TPOT's operators and parameters"},{"location":"using/#template-option-in-tpot","text":"Template option provides a way to specify a desired structure for machine learning pipeline, which may reduce TPOT computation time and potentially provide more interpretable results. Current implementation only supports linear pipelines. Below is a simple example to use template option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of SelectorMixin ), 2nd step is a feature transformer (a subclass of TransformerMixin ) and 3rd step is a classifier for classification (a subclass of ClassifierMixin ). The last step must be Classifier for TPOTClassifier 's template but Regressor for TPOTRegressor . Note: although SelectorMixin is subclass of TransformerMixin in scikit-leawrn, but Transformer in this option excludes those subclasses of SelectorMixin . tpot_obj = TPOTClassifier( template='Selector-Transformer-Classifier' ) If a specific operator, e.g. SelectPercentile , is prefered to used in the 1st step of pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'.","title":"Template option in TPOT"},{"location":"using/#featuresetselector-in-tpot","text":"FeatureSetSelector is a special new operator in TPOT. This operator enables feature selection based on priori export knowledge. For example, in RNA-seq gene expression analysis, this operator can be used to select one or more gene (feature) set(s) based on GO (Gene Ontology) terms or annotated gene sets Molecular Signatures Database ( MSigDB ) in the 1st step of pipeline via template option above, in order to reduce dimensions and TPOT computation time. This operator requires a dataset list in csv format. In this csv file, there are only three columns: 1st column is feature set names, 2nd column is the total number of features in one set and 3rd column is a list of feature names (if input X is pandas.DataFrame) or indexes (if input X is numpy.ndarray) delimited by \";\". Below is a example how to use this operator in TPOT. Please check our preprint paper for more details. from tpot import TPOTClassifier import numpy as np import pandas as pd from tpot.config import classifier_config_dict test_data = pd.read_csv(\"https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/tests.csv\") test_X = test_data.drop(\"class\", axis=1) test_y = test_data['class'] # add FeatureSetSelector into tpot configuration classifier_config_dict['tpot.builtins.FeatureSetSelector'] = { 'subset_list': ['https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/subset_test.csv'], 'sel_subset': [0,1] # select only one feature set, a list of index of subset in the list above #'sel_subset': list(combinations(range(3), 2)) # select two feature sets } tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, template='FeatureSetSelector-Transformer-Classifier', config_dict=classifier_config_dict) tpot.fit(test_X, test_y)","title":"FeatureSetSelector in TPOT"},{"location":"using/#pipeline-caching-in-tpot","text":"With the memory parameter, pipelines can cache the results of each transformer after fitting them. This feature is used to avoid repeated computation by transformers within a pipeline if the parameters and input data are identical to another fitted pipeline during optimization process. TPOT allows users to specify a custom directory path or joblib.Memory in case they want to re-use the memory cache in future TPOT runs (or a warm_start run). There are three methods for enabling memory caching in TPOT: from tpot import TPOTClassifier from tempfile import mkdtemp from joblib import Memory from shutil import rmtree # Method 1, auto mode: TPOT uses memory caching with a temporary directory and cleans it up upon shutdown tpot = TPOTClassifier(memory='auto') # Method 2, with a custom directory for memory caching tpot = TPOTClassifier(memory='/to/your/path') # Method 3, with a Memory object cachedir = mkdtemp() # Create a temporary folder memory = Memory(cachedir=cachedir, verbose=0) tpot = TPOTClassifier(memory=memory) # Clear the cache directory when you don't need it anymore rmtree(cachedir) Note: TPOT does NOT clean up memory caches if users set a custom directory path or Memory object. We recommend that you clean up the memory caches when you don't need it anymore.","title":"Pipeline caching in TPOT"},{"location":"using/#crashfreeze-issue-with-n_jobs-1-under-osx-or-linux","text":"Internally, TPOT uses joblib to fit estimators in parallel. This is the same parallelization framework used by scikit-learn. But it may crash/freeze with n_jobs > 1 under OSX or Linux as scikit-learn does , especially with large datasets. One solution is to configure Python's multiprocessing module to use the forkserver start method (instead of the default fork ) to manage the process pools. You can enable the forkserver mode globally for your program by putting the following codes into your main script: import multiprocessing # other imports, custom code, load data, define model... if __name__ == '__main__': multiprocessing.set_start_method('forkserver') # call scikit-learn utils or tpot utils with n_jobs > 1 here More information about these start methods can be found in the multiprocessing documentation .","title":"Crash/freeze issue with n_jobs &gt; 1 under OSX or Linux"},{"location":"using/#parallel-training-with-dask","text":"For large problems or working on Jupyter notebook, we highly recommend that you can distribute the work on a Dask cluster. The dask-examples binder has a runnable example with a small dask cluster. To use your Dask cluster to fit a TPOT model, specify the use_dask keyword when you create the TPOT estimator. Note: if use_dask=True , TPOT will use as many cores as available on the your Dask cluster. If n_jobs is specified, then it will control the chunk size (10* n_jobs if it is less then offspring size) of parallel training. estimator = TPOTEstimator(use_dask=True, n_jobs=-1) This will use use all the workers on your cluster to do the training, and use Dask-ML's pipeline rewriting to avoid re-fitting estimators multiple times on the same set of data. It will also provide fine-grained diagnostics in the distributed scheduler UI . Alternatively, Dask implements a joblib backend. You can instruct TPOT to use the distributed backend during training by specifying a joblib.parallel_backend : import joblib import distributed.joblib from dask.distributed import Client # connect to the cluster client = Client('schedueler-address') # create the estimator normally estimator = TPOTClassifier(n_jobs=-1) # perform the fit in this context manager with joblib.parallel_backend(\"dask\"): estimator.fit(X, y) See dask's distributed joblib integration for more.","title":"Parallel Training with Dask"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Consider TPOT your Data Science Assistant . TPOT is a Python Automated Machine Learning tool that optimizes machine learning pipelines using genetic programming. TPOT will automate the most tedious part of machine learning by intelligently exploring thousands of possible pipelines to find the best one for your data. An example machine learning pipeline Once TPOT is finished searching (or you get tired of waiting), it provides you with the Python code for the best pipeline it found so you can tinker with the pipeline from there. An example TPOT pipeline TPOT is built on top of scikit-learn, so all of the code it generates should look familiar... if you're familiar with scikit-learn, anyway. TPOT is still under active development and we encourage you to check back on this repository regularly for updates.","title":"Home"},{"location":"api/","text":"Classification class tpot. TPOTClassifier ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='accuracy', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised classification tasks. The TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTClassifier will also search over the hyperparameters of all objects in the pipeline. By default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters. However, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='accuracy') Function used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used: 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc' If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a StratifiedKFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets. max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTClassifier configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: pareto_front_fitted_pipelines_ is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Functions fit (features, classes[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the classes for a feature set. predict_proba (features) Use the optimized pipeline to estimate the class probabilities for a feature set. score (testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, classes, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. classes : array-like {n_samples} List of class labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the classes for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted classes for the samples in the feature matrix predict_proba(features) Use the optimized pipeline to estimate the class probabilities for a feature set. Note: This function will only work for pipelines whose final classifier supports the predict_proba function. TPOT will raise an error otherwise. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples, n_classes} The class probabilities of the input samples score(testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'accuracy'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_classes : array-like {n_samples} List of class labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything Regression class tpot. TPOTRegressor ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='neg_mean_squared_error', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised regression tasks. The TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTRegressor will also search over the hyperparameters of all objects in the pipeline. By default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters. However, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='neg_mean_squared_error') Function used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used: 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2' Note that we recommend using the neg version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric. If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a KFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTRegressor configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: _pareto_front_fitted_pipelines is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split digits = load_boston() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Functions fit (features, target[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the target values for a feature set. score (testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, target, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. target : array-like {n_samples} List of target labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the target values for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted target values for the samples in the feature matrix score(testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'mean_squared_error'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_target : array-like {n_samples} List of target labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything","title":"TPOT API"},{"location":"api/#classification","text":"class tpot. TPOTClassifier ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='accuracy', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised classification tasks. The TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTClassifier will also search over the hyperparameters of all objects in the pipeline. By default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters. However, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='accuracy') Function used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used: 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc' If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a StratifiedKFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets. max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTClassifier configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: pareto_front_fitted_pipelines_ is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Functions fit (features, classes[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the classes for a feature set. predict_proba (features) Use the optimized pipeline to estimate the class probabilities for a feature set. score (testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, classes, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. classes : array-like {n_samples} List of class labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the classes for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted classes for the samples in the feature matrix predict_proba(features) Use the optimized pipeline to estimate the class probabilities for a feature set. Note: This function will only work for pipelines whose final classifier supports the predict_proba function. TPOT will raise an error otherwise. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples, n_classes} The class probabilities of the input samples score(testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'accuracy'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_classes : array-like {n_samples} List of class labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything","title":"Classification"},{"location":"api/#regression","text":"class tpot. TPOTRegressor ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='neg_mean_squared_error', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised regression tasks. The TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTRegressor will also search over the hyperparameters of all objects in the pipeline. By default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters. However, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='neg_mean_squared_error') Function used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used: 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2' Note that we recommend using the neg version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric. If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a KFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTRegressor configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: _pareto_front_fitted_pipelines is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split digits = load_boston() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Functions fit (features, target[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the target values for a feature set. score (testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, target, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. target : array-like {n_samples} List of target labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the target values for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted target values for the samples in the feature matrix score(testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'mean_squared_error'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_target : array-like {n_samples} List of target labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything","title":"Regression"},{"location":"citing/","text":"If you use TPOT in a scientific publication, please consider citing at least one of the following papers: Randal S. Olson, Ryan J. Urbanowicz, Peter C. Andrews, Nicole A. Lavender, La Creis Kidd, and Jason H. Moore (2016). Automating biomedical data science through tree-based pipeline optimization . Applications of Evolutionary Computation , pages 123-137. BibTeX entry: @inbook{Olson2016EvoBio, author={Olson, Randal S. and Urbanowicz, Ryan J. and Andrews, Peter C. and Lavender, Nicole A. and Kidd, La Creis and Moore, Jason H.}, editor={Squillero, Giovanni and Burelli, Paolo}, chapter={Automating Biomedical Data Science Through Tree-Based Pipeline Optimization}, title={Applications of Evolutionary Computation: 19th European Conference, EvoApplications 2016, Porto, Portugal, March 30 -- April 1, 2016, Proceedings, Part I}, year={2016}, publisher={Springer International Publishing}, pages={123--137}, isbn={978-3-319-31204-0}, doi={10.1007/978-3-319-31204-0_9}, url={http://dx.doi.org/10.1007/978-3-319-31204-0_9} } Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science Randal S. Olson, Nathan Bartley, Ryan J. Urbanowicz, and Jason H. Moore (2016). Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science . Proceedings of GECCO 2016 , pages 485-492. BibTeX entry: @inproceedings{OlsonGECCO2016, author = {Olson, Randal S. and Bartley, Nathan and Urbanowicz, Ryan J. and Moore, Jason H.}, title = {Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science}, booktitle = {Proceedings of the Genetic and Evolutionary Computation Conference 2016}, series = {GECCO '16}, year = {2016}, isbn = {978-1-4503-4206-3}, location = {Denver, Colorado, USA}, pages = {485--492}, numpages = {8}, url = {http://doi.acm.org/10.1145/2908812.2908918}, doi = {10.1145/2908812.2908918}, acmid = {2908918}, publisher = {ACM}, address = {New York, NY, USA}, } Alternatively, you can cite the repository directly with the following DOI:","title":"Citing"},{"location":"contributing/","text":"We welcome you to check the existing issues for bugs or enhancements to work on. If you have an idea for an extension to TPOT, please file a new issue so we can discuss it. Project layout The latest stable release of TPOT is on the master branch , whereas the latest version of TPOT in development is on the development branch . Make sure you are looking at and working on the correct branch if you're looking to contribute code. In terms of directory structure: All of TPOT's code sources are in the tpot directory The documentation sources are in the docs_sources directory Images in the documentation are in the images directory Tutorials for TPOT are in the tutorials directory Unit tests for TPOT are in the tests.py file Make sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the development branch. How to contribute The preferred way to contribute to TPOT is to fork the main repository on GitHub: Fork the project repository : click on the 'Fork' button near the top of the page. This creates a copy of the code under your account on the GitHub server. Clone this copy to your local disk: $ git clone git@github.com:YourUsername/tpot.git $ cd tpot Create a branch to hold your changes: $ git checkout -b my-contribution Make sure your local environment is setup correctly for development. Installation instructions are almost identical to the user instructions except that TPOT should not be installed. If you have TPOT installed on your computer then make sure you are using a virtual environment that does not have TPOT installed. Furthermore, you should make sure you have installed the nose package into your development environment so that you can test changes locally. $ conda install nose Start making changes on your newly created branch, remembering to never work on the master branch! Work on this copy on your computer using Git to do the version control. Once some changes are saved locally, you can use your tweaked version of TPOT by navigating to the project's base directory and running TPOT directly from the command line: $ python -m tpot.driver or by running script that imports and uses the TPOT module with code similar to from tpot import TPOTClassifier To check your changes haven't broken any existing tests and to check new tests you've added pass run the following (note, you must have the nose package installed within your dev environment for this to work): $ nosetests -s -v When you're done editing and local testing, run: $ git add modified_files $ git commit to record your changes in Git, then push them to GitHub with: $ git push -u origin my-contribution Finally, go to the web page of your fork of the TPOT repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. Make sure that you send your PR to the development branch, as the master branch is reserved for the latest stable release. This will start the CI server to check all the project's unit tests run and send an email to the maintainers. (If any of the above seems like magic to you, then look up the Git documentation on the web.) Before submitting your pull request Before you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes. If your contribution changes TPOT in any way: Update the documentation so all of your changes are reflected there. Update the README if anything there has changed. If your contribution involves any code changes: Update the project unit tests to test your code changes. Make sure that your code is properly commented with docstrings and comments explaining your rationale behind non-obvious coding practices. If your code affected any of the pipeline operators, make sure that the corresponding export functionality reflects those changes. If your contribution requires a new library dependency: Double-check that the new dependency is easy to install via pip or Anaconda and supports both Python 2 and 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep TPOT easy to install. Add the required version of the library to .travis.yml Add a line to pip install the library to .travis_install.sh Add a line to print the version of the library to .travis_install.sh Similarly add a line to print the version of the library to .travis_test.sh After submitting your pull request After submitting your pull request, Travis-CI will automatically run unit tests on your changes and make sure that your updated code builds and runs on Python 2 and 3. We also use services that automatically check code quality and test coverage. Check back shortly after submitting your pull request to make sure that your code passes these checks. If any of the checks come back with a red X, then do your best to address the errors.","title":"Contributing"},{"location":"contributing/#project-layout","text":"The latest stable release of TPOT is on the master branch , whereas the latest version of TPOT in development is on the development branch . Make sure you are looking at and working on the correct branch if you're looking to contribute code. In terms of directory structure: All of TPOT's code sources are in the tpot directory The documentation sources are in the docs_sources directory Images in the documentation are in the images directory Tutorials for TPOT are in the tutorials directory Unit tests for TPOT are in the tests.py file Make sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the development branch.","title":"Project layout"},{"location":"contributing/#how-to-contribute","text":"The preferred way to contribute to TPOT is to fork the main repository on GitHub: Fork the project repository : click on the 'Fork' button near the top of the page. This creates a copy of the code under your account on the GitHub server. Clone this copy to your local disk: $ git clone git@github.com:YourUsername/tpot.git $ cd tpot Create a branch to hold your changes: $ git checkout -b my-contribution Make sure your local environment is setup correctly for development. Installation instructions are almost identical to the user instructions except that TPOT should not be installed. If you have TPOT installed on your computer then make sure you are using a virtual environment that does not have TPOT installed. Furthermore, you should make sure you have installed the nose package into your development environment so that you can test changes locally. $ conda install nose Start making changes on your newly created branch, remembering to never work on the master branch! Work on this copy on your computer using Git to do the version control. Once some changes are saved locally, you can use your tweaked version of TPOT by navigating to the project's base directory and running TPOT directly from the command line: $ python -m tpot.driver or by running script that imports and uses the TPOT module with code similar to from tpot import TPOTClassifier To check your changes haven't broken any existing tests and to check new tests you've added pass run the following (note, you must have the nose package installed within your dev environment for this to work): $ nosetests -s -v When you're done editing and local testing, run: $ git add modified_files $ git commit to record your changes in Git, then push them to GitHub with: $ git push -u origin my-contribution Finally, go to the web page of your fork of the TPOT repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. Make sure that you send your PR to the development branch, as the master branch is reserved for the latest stable release. This will start the CI server to check all the project's unit tests run and send an email to the maintainers. (If any of the above seems like magic to you, then look up the Git documentation on the web.)","title":"How to contribute"},{"location":"contributing/#before-submitting-your-pull-request","text":"Before you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes. If your contribution changes TPOT in any way: Update the documentation so all of your changes are reflected there. Update the README if anything there has changed. If your contribution involves any code changes: Update the project unit tests to test your code changes. Make sure that your code is properly commented with docstrings and comments explaining your rationale behind non-obvious coding practices. If your code affected any of the pipeline operators, make sure that the corresponding export functionality reflects those changes. If your contribution requires a new library dependency: Double-check that the new dependency is easy to install via pip or Anaconda and supports both Python 2 and 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep TPOT easy to install. Add the required version of the library to .travis.yml Add a line to pip install the library to .travis_install.sh Add a line to print the version of the library to .travis_install.sh Similarly add a line to print the version of the library to .travis_test.sh","title":"Before submitting your pull request"},{"location":"contributing/#after-submitting-your-pull-request","text":"After submitting your pull request, Travis-CI will automatically run unit tests on your changes and make sure that your updated code builds and runs on Python 2 and 3. We also use services that automatically check code quality and test coverage. Check back shortly after submitting your pull request to make sure that your code passes these checks. If any of the checks come back with a red X, then do your best to address the errors.","title":"After submitting your pull request"},{"location":"examples/","text":"Overview The following sections illustrate the usage of TPOT with various datasets, each belonging to a typical class of machine learning tasks. Dataset Task Task class Dataset description Jupyter notebook Iris flower classification classification link link MNIST digit recognition (image) classification link link Boston housing prices modeling regression link N/A Titanic survival analysis classification link link Bank Marketing subscription prediction classification link link MAGIC Gamma Telescope event detection classification link link Notes: - For details on how the fit() , score() and export() methods work, refer to the usage documentation . - Upon re-running the experiments, your resulting pipelines may differ (to some extent) from the ones demonstrated here. Iris flower classification The following code illustrates how TPOT can be employed for performing a simple classification task over the Iris dataset. from tpot import TPOTClassifier from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import numpy as np iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64), iris.target.astype(np.float64), train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_iris_pipeline.py') Running this code should discover a pipeline (exported as tpot_iris_pipeline.py ) that achieves about 97% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline from sklearn.preprocessing import Normalizer # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = make_pipeline( Normalizer(), GaussianNB() ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) MNIST digit recognition Below is a minimal working example with the practice MNIST dataset, which is an image classification problem . from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Running this code should discover a pipeline (exported as tpot_digits_pipeline.py ) that achieves about 98% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = KNeighborsClassifier(n_neighbors=6, weights=\"distance\") exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) Boston housing prices modeling The following code illustrates how TPOT can be employed for performing a regression task over the Boston housing prices dataset. from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split housing = load_boston() X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Running this code should discover a pipeline (exported as tpot_boston_pipeline.py ) that achieves at least 10 mean squared error (MSE) on the test set: import numpy as np from sklearn.ensemble import GradientBoostingRegressor from sklearn.model_selection import train_test_split # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = GradientBoostingRegressor(alpha=0.85, learning_rate=0.1, loss=\"ls\", max_features=0.9, min_samples_leaf=5, min_samples_split=6) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) Titanic survival analysis To see the TPOT applied the Titanic Kaggle dataset, see the Jupyter notebook here . This example shows how to take a messy dataset and preprocess it such that it can be used in scikit-learn and TPOT. Portuguese Bank Marketing The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here . MAGIC Gamma Telescope The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here .","title":"Examples"},{"location":"examples/#overview","text":"The following sections illustrate the usage of TPOT with various datasets, each belonging to a typical class of machine learning tasks. Dataset Task Task class Dataset description Jupyter notebook Iris flower classification classification link link MNIST digit recognition (image) classification link link Boston housing prices modeling regression link N/A Titanic survival analysis classification link link Bank Marketing subscription prediction classification link link MAGIC Gamma Telescope event detection classification link link Notes: - For details on how the fit() , score() and export() methods work, refer to the usage documentation . - Upon re-running the experiments, your resulting pipelines may differ (to some extent) from the ones demonstrated here.","title":"Overview"},{"location":"examples/#iris-flower-classification","text":"The following code illustrates how TPOT can be employed for performing a simple classification task over the Iris dataset. from tpot import TPOTClassifier from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import numpy as np iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64), iris.target.astype(np.float64), train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_iris_pipeline.py') Running this code should discover a pipeline (exported as tpot_iris_pipeline.py ) that achieves about 97% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline from sklearn.preprocessing import Normalizer # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = make_pipeline( Normalizer(), GaussianNB() ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)","title":"Iris flower classification"},{"location":"examples/#mnist-digit-recognition","text":"Below is a minimal working example with the practice MNIST dataset, which is an image classification problem . from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Running this code should discover a pipeline (exported as tpot_digits_pipeline.py ) that achieves about 98% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = KNeighborsClassifier(n_neighbors=6, weights=\"distance\") exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)","title":"MNIST digit recognition"},{"location":"examples/#boston-housing-prices-modeling","text":"The following code illustrates how TPOT can be employed for performing a regression task over the Boston housing prices dataset. from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split housing = load_boston() X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Running this code should discover a pipeline (exported as tpot_boston_pipeline.py ) that achieves at least 10 mean squared error (MSE) on the test set: import numpy as np from sklearn.ensemble import GradientBoostingRegressor from sklearn.model_selection import train_test_split # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = GradientBoostingRegressor(alpha=0.85, learning_rate=0.1, loss=\"ls\", max_features=0.9, min_samples_leaf=5, min_samples_split=6) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)","title":"Boston housing prices modeling"},{"location":"examples/#titanic-survival-analysis","text":"To see the TPOT applied the Titanic Kaggle dataset, see the Jupyter notebook here . This example shows how to take a messy dataset and preprocess it such that it can be used in scikit-learn and TPOT.","title":"Titanic survival analysis"},{"location":"examples/#portuguese-bank-marketing","text":"The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here .","title":"Portuguese Bank Marketing"},{"location":"examples/#magic-gamma-telescope","text":"The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here .","title":"MAGIC Gamma Telescope"},{"location":"installing/","text":"TPOT is built on top of several existing Python libraries, including: NumPy SciPy scikit-learn DEAP update_checker tqdm stopit pandas joblib Most of the necessary Python packages can be installed via the Anaconda Python distribution , which we strongly recommend that you use. We also strongly recommend that you use of Python 3 over Python 2 if you're given the choice. NumPy, SciPy, scikit-learn, pandas and joblib can be installed in Anaconda via the command: conda install numpy scipy scikit-learn pandas joblib DEAP, update_checker, tqdm and stopit can be installed with pip via the command: pip install deap update_checker tqdm stopit For the Windows users , the pywin32 module is required if Python is NOT installed via the Anaconda Python distribution and can be installed with pip for Python verion <=3.3 or conda (e.g. miniconda) for any Python version: conda install pywin32 Optionally , you can install XGBoost if you would like TPOT to use the eXtreme Gradient Boosting models. XGBoost is entirely optional, and TPOT will still function normally without XGBoost if you do not have it installed. Windows users: pip installation may not work on some Windows environments, and it may cause unexpected errors. pip install xgboost If you have issues installing XGBoost, check the XGBoost installation documentation . If you plan to use Dask for parallel training, make sure to install dask[delay] and dask_ml . pip install dask[delayed] dask-ml If you plan to use the TPOT-MDR configuration , make sure to install scikit-mdr and scikit-rebate : pip install scikit-mdr skrebate Finally to install TPOT itself, run the following command: pip install tpot Please file a new issue if you run into installation problems.","title":"Installation"},{"location":"related/","text":"Other Automated Machine Learning (AutoML) tools and related projects: Name Language License Description Auto-WEKA Java GPL-v3 Automated model selection and hyper-parameter tuning for Weka models. auto-sklearn Python BSD-3-Clause An automated machine learning toolkit and a drop-in replacement for a scikit-learn estimator. auto_ml Python MIT Automated machine learning for analytics & production. Supports manual feature type declarations. H2O AutoML Java with Python, Scala & R APIs and web GUI Apache 2.0 Automated: data prep, hyperparameter tuning, random grid search and stacked ensembles in a distributed ML platform. devol Python MIT Automated deep neural network design via genetic programming. MLBox Python BSD-3-Clause Accurate hyper-parameter optimization in high-dimensional space with support for distributed computing. Recipe C GPL-v3 Machine-learning pipeline optimization through genetic programming. Uses grammars to define pipeline structure. Xcessiv Python Apache 2.0 A web-based application for quick, scalable, and automated hyper-parameter tuning and stacked ensembling in Python. GAMA Python Apache 2.0 Machine-learning pipeline optimization through asynchronous evaluation based genetic programming.","title":"Related"},{"location":"releases/","text":"Version 0.9 TPOT now supports sparse matrices with a new built-in TPOT configuration, \"TPOT sparse\". We are using a custom OneHotEncoder implementation that supports missing values and continuous features. We have added an \"early stopping\" option for stopping the optimization process if no improvement is made within a set number of generations. Look up the early_stop parameter to access this functionality. TPOT now reduces the number of duplicated pipelines between generations, which saves you time during the optimization process. TPOT now supports custom scoring functions via the command-line mode. We have added a new optional argument, periodic_checkpoint_folder , that allows TPOT to periodically save the best pipeline so far to a local folder during optimization process. TPOT no longer uses sklearn.externals.joblib when n_jobs=1 to avoid the potential freezing issue that scikit-learn suffers from . We have added pandas as a dependency to read input datasets instead of numpy.recfromcsv . NumPy's recfromcsv function is unable to parse datasets with complex data types. Fixed a bug that DEFAULT in the parameter(s) of nested estimator raises KeyError when exporting pipelines. Fixed a bug related to setting random_state in nested estimators. The issue would happen with pipeline with SelectFromModel ( ExtraTreesClassifier as nested estimator) or StackingEstimator if nested estimator has random_state parameter. Fixed a bug in the missing value imputation function in TPOT to impute along columns instead rows. Refined input checking for sparse matrices in TPOT. Refined the TPOT pipeline mutation operator. Version 0.8 TPOT now detects whether there are missing values in your dataset and replaces them with the median value of the column. TPOT now allows you to set a group parameter in the fit function so you can use the GroupKFold cross-validation strategy. TPOT now allows you to set a subsample ratio of the training instance with the subsample parameter. For example, setting subsample =0.5 tells TPOT to create a fixed subsample of half of the training data for the pipeline optimization process. This parameter can be useful for speeding up the pipeline optimization process, but may give less accurate performance estimates from cross-validation. TPOT now has more built-in configurations , including TPOT MDR and TPOT light, for both classification and regression problems. TPOTClassifier and TPOTRegressor now expose three useful internal attributes, fitted_pipeline_ , pareto_front_fitted_pipelines_ , and evaluated_individuals_ . These attributes are described in the API documentation . Oh, TPOT now has thorough API documentation . Check it out! Fixed a reproducibility issue where setting random_seed didn't necessarily result in the same results every time. This bug was present since TPOT v0.7. Refined input checking in TPOT. Removed Python 2 uncompliant code. Version 0.7 TPOT now has multiprocessing support. TPOT allows you to use multiple processes in parallel to accelerate the pipeline optimization process in TPOT with the n_jobs parameter. TPOT now allows you to customize the operators and parameters considered during the optimization process , which can be accomplished with the new config_dict parameter. The format of this customized dictionary can be found in the online documentation , along with a list of built-in configurations . TPOT now allows you to specify a time limit for evaluating a single pipeline (default limit is 5 minutes) in optimization process with the max_eval_time_mins parameter, so TPOT won't spend hours evaluating overly-complex pipelines. We tweaked TPOT's underlying evolutionary optimization algorithm to work even better, including using the mu+lambda algorithm . This algorithm gives you more control of how many pipelines are generated every iteration with the offspring_size parameter. Refined the default operators and parameters in TPOT, so TPOT 0.7 should work even better than 0.6. TPOT now supports sample weights in the fitness function if some if your samples are more important to classify correctly than others. The sample weights option works the same as in scikit-learn, e.g., tpot.fit(x_train, y_train, sample_weights=sample_weights) . The default scoring metric in TPOT has been changed from balanced accuracy to accuracy, the same default metric for classification algorithms in scikit-learn. Balanced accuracy can still be used by setting scoring='balanced_accuracy' when creating a TPOT instance. Version 0.6 TPOT now supports regression problems! We have created two separate TPOTClassifier and TPOTRegressor classes to support classification and regression problems, respectively. The command-line interface also supports this feature through the -mode parameter. TPOT now allows you to specify a time limit for the optimization process with the max_time_mins parameter, so you don't need to guess how long TPOT will take any more to recommend a pipeline to you. Added a new operator that performs feature selection using ExtraTrees feature importance scores. XGBoost has been added as an optional dependency to TPOT. If you have XGBoost installed, TPOT will automatically detect your installation and use the XGBoostClassifier and XGBoostRegressor in its pipelines. TPOT now offers a verbosity level of 3 (\"science mode\"), which outputs the entire Pareto front instead of only the current best score. This feature may be useful for users looking to make a trade-off between pipeline complexity and score. Version 0.5 Major refactor: Each operator is defined in a separate class file. Hooray for easier-to-maintain code! TPOT now exports directly to scikit-learn Pipelines instead of hacky code. Internal representation of individuals now uses scikit-learn pipelines. Parameters for each operator have been optimized so TPOT spends less time exploring useless parameters. We have removed pandas as a dependency and instead use numpy matrices to store the data. TPOT now uses k-fold cross-validation when evaluating pipelines, with a default k = 3. This k parameter can be tuned when creating a new TPOT instance. Improved scoring function support : Even though TPOT uses balanced accuracy by default, you can now have TPOT use any of the scoring functions that cross_val_score supports. Added the scikit-learn Normalizer preprocessor. Minor text fixes. Version 0.4 In TPOT 0.4, we've made some major changes to the internals of TPOT and added some convenience functions. We've summarized the changes below. Added new sklearn models and preprocessors AdaBoostClassifier BernoulliNB ExtraTreesClassifier GaussianNB MultinomialNB LinearSVC PassiveAggressiveClassifier GradientBoostingClassifier RBFSampler FastICA FeatureAgglomeration Nystroem Added operator that inserts virtual features for the count of features with values of zero Reworked parameterization of TPOT operators Reduced parameter search space with information from a scikit-learn benchmark TPOT no longer generates arbitrary parameter values, but uses a fixed parameter set instead Removed XGBoost as a dependency Too many users were having install issues with XGBoost Replaced with scikit-learn's GradientBoostingClassifier Improved descriptiveness of TPOT command line parameter documentation Removed min/max/avg details during fit() when verbosity > 1 Replaced with tqdm progress bar Added tqdm as a dependency Added fit_predict() convenience function Added get_params() function so TPOT can operate in scikit-learn's cross_val_score & related functions Version 0.3 We revised the internal optimization process of TPOT to make it more efficient, in particular in regards to the model parameters that TPOT optimizes over. Version 0.2 TPOT now has the ability to export the optimized pipelines to sklearn code. Logistic regression, SVM, and k-nearest neighbors classifiers were added as pipeline operators. Previously, TPOT only included decision tree and random forest classifiers. TPOT can now use arbitrary scoring functions for the optimization process. TPOT now performs multi-objective Pareto optimization to balance model complexity (i.e., # of pipeline operators) and the score of the pipeline. Version 0.1 First public release of TPOT. Optimizes pipelines with decision trees and random forest classifiers as the model, and uses a handful of feature preprocessors.","title":"Release Notes"},{"location":"releases/#version-09","text":"TPOT now supports sparse matrices with a new built-in TPOT configuration, \"TPOT sparse\". We are using a custom OneHotEncoder implementation that supports missing values and continuous features. We have added an \"early stopping\" option for stopping the optimization process if no improvement is made within a set number of generations. Look up the early_stop parameter to access this functionality. TPOT now reduces the number of duplicated pipelines between generations, which saves you time during the optimization process. TPOT now supports custom scoring functions via the command-line mode. We have added a new optional argument, periodic_checkpoint_folder , that allows TPOT to periodically save the best pipeline so far to a local folder during optimization process. TPOT no longer uses sklearn.externals.joblib when n_jobs=1 to avoid the potential freezing issue that scikit-learn suffers from . We have added pandas as a dependency to read input datasets instead of numpy.recfromcsv . NumPy's recfromcsv function is unable to parse datasets with complex data types. Fixed a bug that DEFAULT in the parameter(s) of nested estimator raises KeyError when exporting pipelines. Fixed a bug related to setting random_state in nested estimators. The issue would happen with pipeline with SelectFromModel ( ExtraTreesClassifier as nested estimator) or StackingEstimator if nested estimator has random_state parameter. Fixed a bug in the missing value imputation function in TPOT to impute along columns instead rows. Refined input checking for sparse matrices in TPOT. Refined the TPOT pipeline mutation operator.","title":"Version 0.9"},{"location":"releases/#version-08","text":"TPOT now detects whether there are missing values in your dataset and replaces them with the median value of the column. TPOT now allows you to set a group parameter in the fit function so you can use the GroupKFold cross-validation strategy. TPOT now allows you to set a subsample ratio of the training instance with the subsample parameter. For example, setting subsample =0.5 tells TPOT to create a fixed subsample of half of the training data for the pipeline optimization process. This parameter can be useful for speeding up the pipeline optimization process, but may give less accurate performance estimates from cross-validation. TPOT now has more built-in configurations , including TPOT MDR and TPOT light, for both classification and regression problems. TPOTClassifier and TPOTRegressor now expose three useful internal attributes, fitted_pipeline_ , pareto_front_fitted_pipelines_ , and evaluated_individuals_ . These attributes are described in the API documentation . Oh, TPOT now has thorough API documentation . Check it out! Fixed a reproducibility issue where setting random_seed didn't necessarily result in the same results every time. This bug was present since TPOT v0.7. Refined input checking in TPOT. Removed Python 2 uncompliant code.","title":"Version 0.8"},{"location":"releases/#version-07","text":"TPOT now has multiprocessing support. TPOT allows you to use multiple processes in parallel to accelerate the pipeline optimization process in TPOT with the n_jobs parameter. TPOT now allows you to customize the operators and parameters considered during the optimization process , which can be accomplished with the new config_dict parameter. The format of this customized dictionary can be found in the online documentation , along with a list of built-in configurations . TPOT now allows you to specify a time limit for evaluating a single pipeline (default limit is 5 minutes) in optimization process with the max_eval_time_mins parameter, so TPOT won't spend hours evaluating overly-complex pipelines. We tweaked TPOT's underlying evolutionary optimization algorithm to work even better, including using the mu+lambda algorithm . This algorithm gives you more control of how many pipelines are generated every iteration with the offspring_size parameter. Refined the default operators and parameters in TPOT, so TPOT 0.7 should work even better than 0.6. TPOT now supports sample weights in the fitness function if some if your samples are more important to classify correctly than others. The sample weights option works the same as in scikit-learn, e.g., tpot.fit(x_train, y_train, sample_weights=sample_weights) . The default scoring metric in TPOT has been changed from balanced accuracy to accuracy, the same default metric for classification algorithms in scikit-learn. Balanced accuracy can still be used by setting scoring='balanced_accuracy' when creating a TPOT instance.","title":"Version 0.7"},{"location":"releases/#version-06","text":"TPOT now supports regression problems! We have created two separate TPOTClassifier and TPOTRegressor classes to support classification and regression problems, respectively. The command-line interface also supports this feature through the -mode parameter. TPOT now allows you to specify a time limit for the optimization process with the max_time_mins parameter, so you don't need to guess how long TPOT will take any more to recommend a pipeline to you. Added a new operator that performs feature selection using ExtraTrees feature importance scores. XGBoost has been added as an optional dependency to TPOT. If you have XGBoost installed, TPOT will automatically detect your installation and use the XGBoostClassifier and XGBoostRegressor in its pipelines. TPOT now offers a verbosity level of 3 (\"science mode\"), which outputs the entire Pareto front instead of only the current best score. This feature may be useful for users looking to make a trade-off between pipeline complexity and score.","title":"Version 0.6"},{"location":"releases/#version-05","text":"Major refactor: Each operator is defined in a separate class file. Hooray for easier-to-maintain code! TPOT now exports directly to scikit-learn Pipelines instead of hacky code. Internal representation of individuals now uses scikit-learn pipelines. Parameters for each operator have been optimized so TPOT spends less time exploring useless parameters. We have removed pandas as a dependency and instead use numpy matrices to store the data. TPOT now uses k-fold cross-validation when evaluating pipelines, with a default k = 3. This k parameter can be tuned when creating a new TPOT instance. Improved scoring function support : Even though TPOT uses balanced accuracy by default, you can now have TPOT use any of the scoring functions that cross_val_score supports. Added the scikit-learn Normalizer preprocessor. Minor text fixes.","title":"Version 0.5"},{"location":"releases/#version-04","text":"In TPOT 0.4, we've made some major changes to the internals of TPOT and added some convenience functions. We've summarized the changes below. Added new sklearn models and preprocessors AdaBoostClassifier BernoulliNB ExtraTreesClassifier GaussianNB MultinomialNB LinearSVC PassiveAggressiveClassifier GradientBoostingClassifier RBFSampler FastICA FeatureAgglomeration Nystroem Added operator that inserts virtual features for the count of features with values of zero Reworked parameterization of TPOT operators Reduced parameter search space with information from a scikit-learn benchmark TPOT no longer generates arbitrary parameter values, but uses a fixed parameter set instead Removed XGBoost as a dependency Too many users were having install issues with XGBoost Replaced with scikit-learn's GradientBoostingClassifier Improved descriptiveness of TPOT command line parameter documentation Removed min/max/avg details during fit() when verbosity > 1 Replaced with tqdm progress bar Added tqdm as a dependency Added fit_predict() convenience function Added get_params() function so TPOT can operate in scikit-learn's cross_val_score & related functions","title":"Version 0.4"},{"location":"releases/#version-03","text":"We revised the internal optimization process of TPOT to make it more efficient, in particular in regards to the model parameters that TPOT optimizes over.","title":"Version 0.3"},{"location":"releases/#version-02","text":"TPOT now has the ability to export the optimized pipelines to sklearn code. Logistic regression, SVM, and k-nearest neighbors classifiers were added as pipeline operators. Previously, TPOT only included decision tree and random forest classifiers. TPOT can now use arbitrary scoring functions for the optimization process. TPOT now performs multi-objective Pareto optimization to balance model complexity (i.e., # of pipeline operators) and the score of the pipeline.","title":"Version 0.2"},{"location":"releases/#version-01","text":"First public release of TPOT. Optimizes pipelines with decision trees and random forest classifiers as the model, and uses a handful of feature preprocessors.","title":"Version 0.1"},{"location":"support/","text":"TPOT was developed in the Computational Genetics Lab at the University of Pennsylvania with funding from the NIH under grant R01 AI117694. We are incredibly grateful for the support of the NIH and the University of Pennsylvania during the development of this project. The TPOT logo was designed by Todd Newmuis, who generously donated his time to the project.","title":"Support"},{"location":"using/","text":"What to expect from AutoML software Automated machine learning (AutoML) takes a higher-level approach to machine learning than most practitioners are used to, so we've gathered a handful of guidelines on what to expect when running AutoML software such as TPOT. AutoML algorithms aren't intended to run for only a few minutes Of course, you can run TPOT for only a few minutes and it will find a reasonably good pipeline for your dataset. However, if you don't run TPOT for long enough, it may not find the best possible pipeline for your dataset. It may even not find any suitable pipeline at all, in which case a RuntimeError('A pipeline has not yet been optimized. Please call fit() first.') will be raised. Often it is worthwhile to run multiple instances of TPOT in parallel for a long time (hours to days) to allow TPOT to thoroughly search the pipeline space for your dataset. AutoML algorithms can take a long time to finish their search AutoML algorithms aren't as simple as fitting one model on the dataset; they are considering multiple machine learning algorithms (random forests, linear models, SVMs, etc.) in a pipeline with multiple preprocessing steps (missing value imputation, scaling, PCA, feature selection, etc.), the hyperparameters for all of the models and preprocessing steps, as well as multiple ways to ensemble or stack the algorithms within the pipeline. As such, TPOT will take a while to run on larger datasets, but it's important to realize why. With the default TPOT settings (100 generations with 100 population size), TPOT will evaluate 10,000 pipeline configurations before finishing. To put this number into context, think about a grid search of 10,000 hyperparameter combinations for a machine learning algorithm and how long that grid search will take. That is 10,000 model configurations to evaluate with 10-fold cross-validation, which means that roughly 100,000 models are fit and evaluated on the training data in one grid search. That's a time-consuming procedure, even for simpler models like decision trees. Typical TPOT runs will take hours to days to finish (unless it's a small dataset), but you can always interrupt the run partway through and see the best results so far. TPOT also provides a warm_start parameter that lets you restart a TPOT run from where it left off. AutoML algorithms can recommend different solutions for the same dataset If you're working with a reasonably complex dataset or run TPOT for a short amount of time, different TPOT runs may result in different pipeline recommendations. TPOT's optimization algorithm is stochastic in nature, which means that it uses randomness (in part) to search the possible pipeline space. When two TPOT runs recommend different pipelines, this means that the TPOT runs didn't converge due to lack of time or that multiple pipelines perform more-or-less the same on your dataset. This is actually an advantage over fixed grid search techniques: TPOT is meant to be an assistant that gives you ideas on how to solve a particular machine learning problem by exploring pipeline configurations that you might have never considered, then leaves the fine-tuning to more constrained parameter tuning techniques such as grid search. TPOT with code We've taken care to design the TPOT interface to be as similar as possible to scikit-learn. TPOT can be imported just like any regular Python module. To import TPOT, type: from tpot import TPOTClassifier then create an instance of TPOT as follows: pipeline_optimizer = TPOTClassifier() It's also possible to use TPOT for regression problems with the TPOTRegressor class. Other than the class name, a TPOTRegressor is used the same way as a TPOTClassifier . You can read more about the TPOTClassifier and TPOTRegressor classes in the API documentation . Some example code with custom TPOT parameters might look like: pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) Now TPOT is ready to optimize a pipeline for you. You can tell TPOT to optimize a pipeline based on a data set with the fit function: pipeline_optimizer.fit(X_train, y_train) The fit function initializes the genetic programming algorithm to find the highest-scoring pipeline based on average k-fold cross-validation Then, the pipeline is trained on the entire set of provided samples, and the TPOT instance can be used as a fitted model. You can then proceed to evaluate the final pipeline on the testing set with the score function: print(pipeline_optimizer.score(X_test, y_test)) Finally, you can tell TPOT to export the corresponding Python code for the optimized pipeline to a text file with the export function: pipeline_optimizer.export('tpot_exported_pipeline.py') Once this code finishes running, tpot_exported_pipeline.py will contain the Python code for the optimized pipeline. Below is a full example script using TPOT to optimize a pipeline, score it, and export the best pipeline to a file. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) pipeline_optimizer.fit(X_train, y_train) print(pipeline_optimizer.score(X_test, y_test)) pipeline_optimizer.export('tpot_exported_pipeline.py') Check our examples to see TPOT applied to some specific data sets. TPOT on the command line To use TPOT via the command line, enter the following command with a path to the data file: tpot /path_to/data_file.csv An example command-line call to TPOT may look like: tpot data/mnist.csv -is , -target class -o tpot_exported_pipeline.py -g 5 -p 20 -cv 5 -s 42 -v 2 TPOT offers several arguments that can be provided at the command line. To see brief descriptions of these arguments, enter the following command: tpot --help Detailed descriptions of the command-line arguments are below. Argument Parameter Valid values Effect -is INPUT_SEPARATOR Any string Character used to separate columns in the input file. -target TARGET_NAME Any string Name of the target column in the input file. -mode TPOT_MODE ['classification', 'regression'] Whether TPOT is being used for a supervised classification or regression problem. -o OUTPUT_FILE String path to a file File to export the code for the final optimized pipeline. -g GENERATIONS Any positive integer Number of iterations to run the pipeline optimization process. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -p POPULATION_SIZE Any positive integer Number of individuals to retain in the GP population every generation. Generally, TPOT will work better when you give it more individuals (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -os OFFSPRING_SIZE Any positive integer Number of offspring to produce in each GP generation. By default, OFFSPRING_SIZE = POPULATION_SIZE. -mr MUTATION_RATE [0.0, 1.0] GP mutation rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to apply random changes to every generation. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. -xr CROSSOVER_RATE [0.0, 1.0] GP crossover rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to \"breed\" every generation. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. -scoring SCORING_FN 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'my_module.scorer_name*' Function used to evaluate the quality of a given pipeline for the problem. By default, accuracy is used for classification and mean squared error (MSE) is used for regression. TPOT assumes that any function with \"error\" or \"loss\" in the name is meant to be minimized, whereas any other functions will be maximized. my_module.scorer_name: You can also specify your own function or a full python path to an existing one. See the section on scoring functions for more details. -cv CV Any integer > 1 Number of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process. -sub SUBSAMPLE (0.0, 1.0] Subsample ratio of the training instance. Setting it to 0.5 means that TPOT randomly collects half of training samples for pipeline optimization process. -njobs NUM_JOBS Any positive integer or -1 Number of CPUs for evaluating pipelines in parallel during the TPOT optimization process. Assigning this to -1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. -maxtime MAX_TIME_MINS Any positive integer How many minutes TPOT has to optimize the pipeline. If provided, this setting will override the \"generations\" parameter and allow TPOT to run until it runs out of time. -maxeval MAX_EVAL_MINS Any positive float How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to consider more complex pipelines but will also allow TPOT to run longer. -s RANDOM_STATE Any positive integer Random number generator seed for reproducibility. Set this seed if you want your TPOT run to be reproducible with the same seed and data set in the future. -config CONFIG_FILE String or file path Operators and parameter configurations in TPOT: Path for configuration file: TPOT will use the path to a configuration file for customizing the operators and parameters that TPOT uses in the optimization process string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. -template TEMPLATE String Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. -memory MEMORY String or file path If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. Memory caching mode in TPOT: Path for a caching directory: TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown. string 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown. -cf CHECKPOINT_FOLDER Folder path If supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing. This is useful in multiple cases: sudden death before tpot could save an optimized pipeline progress tracking grabbing a pipeline while tpot is working Example: mkdir my_checkpoints -cf ./my_checkpoints -es EARLY_STOP Any positive integer How many generations TPOT checks whether there is no improvement in optimization process. End optimization process if there is no improvement in the set number of generations. -v VERBOSITY {0, 1, 2, 3} How much information TPOT communicates while it is running. 0 = none, 1 = minimal, 2 = high, 3 = all. A setting of 2 or higher will add a progress bar during the optimization procedure. --no-update-check Flag indicating whether the TPOT version checker should be disabled. --version Show TPOT's version number and exit. --help Show TPOT's help documentation and exit. Scoring functions TPOT makes use of sklearn.model_selection.cross_val_score for evaluating pipelines, and as such offers the same support for scoring functions. There are two ways to make use of scoring functions with TPOT: You can pass in a string to the scoring parameter from the list above. Any other strings will cause TPOT to throw an exception. You can pass the callable object/function with signature scorer(estimator, X, y) , where estimator is trained estimator to use for scoring, X are features that will be passed to estimator.predict and y are target values for X . To do this, you should implement your own function. See the example below for further explanation. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split from sklearn.metrics.scorer import make_scorer digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) # Make a custom metric function def my_custom_accuracy(y_true, y_pred): return float(sum(y_pred == y_true)) / len(y_true) # Make a custom a scorer from the custom metric function # Note: greater_is_better=False in make_scorer below would mean that the scoring function should be minimized. my_custom_scorer = make_scorer(my_custom_accuracy, greater_is_better=True) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, scoring=my_custom_scorer) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') You can pass a metric function with the signature score_func(y_true, y_pred) (e.g. my_custom_accuracy in the example above), where y_true are the true target values and y_pred are the predicted target values from an estimator. To do this, you should implement your own function. See the example above for further explanation. TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized ( greater_is_better=False in make_scorer ), whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. my_module.scorer_name : You can also use a custom score_func(y_true, y_pred) or scorer(estimator, X, y) function through the command line by adding the argument -scoring my_module.scorer to your command-line call. TPOT will import your module and use the custom scoring function from there. TPOT will include your current working directory when importing the module, so you can place it in the same directory where you are going to run TPOT. Example: -scoring sklearn.metrics.auc will use the function auc from sklearn.metrics module. Built-in TPOT configurations TPOT comes with a handful of default operators and parameter configurations that we believe work well for optimizing machine learning pipelines. Below is a list of the current built-in configurations that come with TPOT. Configuration Name Description Operators Default TPOT TPOT will search over a broad range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Some of these operators are complex and may take a long time to run, especially on larger datasets. Note: This is the default configuration for TPOT. To use this configuration, use the default value (None) for the config_dict parameter. Classification Regression TPOT light TPOT will search over a restricted range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Only simpler and fast-running operators will be used in these pipelines, so TPOT light is useful for finding quick and simple pipelines for a classification or regression problem. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression TPOT MDR TPOT will search over a series of feature selectors and Multifactor Dimensionality Reduction models to find a series of operators that maximize prediction accuracy. The TPOT MDR configuration is specialized for genome-wide association studies (GWAS) , and is described in detail online here . Note that TPOT MDR may be slow to run because the feature selection routines are computationally expensive, especially on large datasets. Classification Regression TPOT sparse TPOT uses a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression To use any of these configurations, simply pass the string name of the configuration to the config_dict parameter (or -config on the command line). For example, to use the \"TPOT light\" configuration: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict='TPOT light') tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Customizing TPOT's operators and parameters Beyond the default configurations that come with TPOT, in some cases it is useful to limit the algorithms and parameters that TPOT considers. For that reason, we allow users to provide TPOT with a custom configuration for its operators and parameters. The custom TPOT configuration must be in nested dictionary format, where the first level key is the path and name of the operator (e.g., sklearn.naive_bayes.MultinomialNB ) and the second level key is the corresponding parameter name for that operator (e.g., fit_prior ). The second level key should point to a list of parameter values for that parameter, e.g., 'fit_prior': [True, False] . For a simple example, the configuration could be: tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } in which case TPOT would only consider pipelines containing GaussianNB , BernoulliNB , MultinomialNB , and tune those algorithm's parameters in the ranges provided. This dictionary can be passed directly within the code to the TPOTClassifier / TPOTRegressor config_dict parameter, described above. For example: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict=tpot_config) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Command-line users must create a separate .py file with the custom configuration and provide the path to the file to the tpot call. For example, if the simple example configuration above is saved in tpot_classifier_config.py , that configuration could be used on the command line with the command: tpot data/mnist.csv -is , -target class -config tpot_classifier_config.py -g 5 -p 20 -v 2 -o tpot_exported_pipeline.py When using the command-line interface, the configuration file specified in the -config parameter must name its custom TPOT configuration tpot_config . Otherwise, TPOT will not be able to locate the configuration dictionary. For more detailed examples of how to customize TPOT's operator configuration, see the default configurations for classification and regression in TPOT's source code. Note that you must have all of the corresponding packages for the operators installed on your computer, otherwise TPOT will not be able to use them. For example, if XGBoost is not installed on your computer, then TPOT will simply not import nor use XGBoost in the pipelines it considers. Template option in TPOT Template option provides a way to specify a desired structure for machine learning pipeline, which may reduce TPOT computation time and potentially provide more interpretable results. Current implementation only supports linear pipelines. Below is a simple example to use template option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of SelectorMixin ), 2nd step is a feature transformer (a subclass of TransformerMixin ) and 3rd step is a classifier for classification (a subclass of ClassifierMixin ). The last step must be Classifier for TPOTClassifier 's template but Regressor for TPOTRegressor . Note: although SelectorMixin is subclass of TransformerMixin in scikit-leawrn, but Transformer in this option excludes those subclasses of SelectorMixin . tpot_obj = TPOTClassifier( template='Selector-Transformer-Classifier' ) If a specific operator, e.g. SelectPercentile , is prefered to used in the 1st step of pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'. FeatureSetSelector in TPOT FeatureSetSelector is a special new operator in TPOT. This operator enables feature selection based on priori export knowledge. For example, in RNA-seq gene expression analysis, this operator can be used to select one or more gene (feature) set(s) based on GO (Gene Ontology) terms or annotated gene sets Molecular Signatures Database ( MSigDB ) in the 1st step of pipeline via template option above, in order to reduce dimensions and TPOT computation time. This operator requires a dataset list in csv format. In this csv file, there are only three columns: 1st column is feature set names, 2nd column is the total number of features in one set and 3rd column is a list of feature names (if input X is pandas.DataFrame) or indexes (if input X is numpy.ndarray) delimited by \";\". Below is a example how to use this operator in TPOT. Please check our preprint paper for more details. from tpot import TPOTClassifier import numpy as np import pandas as pd from tpot.config import classifier_config_dict test_data = pd.read_csv(\"https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/tests.csv\") test_X = test_data.drop(\"class\", axis=1) test_y = test_data['class'] # add FeatureSetSelector into tpot configuration classifier_config_dict['tpot.builtins.FeatureSetSelector'] = { 'subset_list': ['https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/subset_test.csv'], 'sel_subset': [0,1] # select only one feature set, a list of index of subset in the list above #'sel_subset': list(combinations(range(3), 2)) # select two feature sets } tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, template='FeatureSetSelector-Transformer-Classifier', config_dict=classifier_config_dict) tpot.fit(test_X, test_y) Pipeline caching in TPOT With the memory parameter, pipelines can cache the results of each transformer after fitting them. This feature is used to avoid repeated computation by transformers within a pipeline if the parameters and input data are identical to another fitted pipeline during optimization process. TPOT allows users to specify a custom directory path or joblib.Memory in case they want to re-use the memory cache in future TPOT runs (or a warm_start run). There are three methods for enabling memory caching in TPOT: from tpot import TPOTClassifier from tempfile import mkdtemp from joblib import Memory from shutil import rmtree # Method 1, auto mode: TPOT uses memory caching with a temporary directory and cleans it up upon shutdown tpot = TPOTClassifier(memory='auto') # Method 2, with a custom directory for memory caching tpot = TPOTClassifier(memory='/to/your/path') # Method 3, with a Memory object cachedir = mkdtemp() # Create a temporary folder memory = Memory(cachedir=cachedir, verbose=0) tpot = TPOTClassifier(memory=memory) # Clear the cache directory when you don't need it anymore rmtree(cachedir) Note: TPOT does NOT clean up memory caches if users set a custom directory path or Memory object. We recommend that you clean up the memory caches when you don't need it anymore. Crash/freeze issue with n_jobs > 1 under OSX or Linux Internally, TPOT uses joblib to fit estimators in parallel. This is the same parallelization framework used by scikit-learn. But it may crash/freeze with n_jobs > 1 under OSX or Linux as scikit-learn does , especially with large datasets. One solution is to configure Python's multiprocessing module to use the forkserver start method (instead of the default fork ) to manage the process pools. You can enable the forkserver mode globally for your program by putting the following codes into your main script: import multiprocessing # other imports, custom code, load data, define model... if __name__ == '__main__': multiprocessing.set_start_method('forkserver') # call scikit-learn utils or tpot utils with n_jobs > 1 here More information about these start methods can be found in the multiprocessing documentation . Parallel Training with Dask For large problems or working on Jupyter notebook, we highly recommend that you can distribute the work on a Dask cluster. The dask-examples binder has a runnable example with a small dask cluster. To use your Dask cluster to fit a TPOT model, specify the use_dask keyword when you create the TPOT estimator. Note: if use_dask=True , TPOT will use as many cores as available on the your Dask cluster. If n_jobs is specified, then it will control the chunk size (10* n_jobs if it is less then offspring size) of parallel training. estimator = TPOTEstimator(use_dask=True, n_jobs=-1) This will use use all the workers on your cluster to do the training, and use Dask-ML's pipeline rewriting to avoid re-fitting estimators multiple times on the same set of data. It will also provide fine-grained diagnostics in the distributed scheduler UI . Alternatively, Dask implements a joblib backend. You can instruct TPOT to use the distributed backend during training by specifying a joblib.parallel_backend : import joblib import distributed.joblib from dask.distributed import Client # connect to the cluster client = Client('schedueler-address') # create the estimator normally estimator = TPOTClassifier(n_jobs=-1) # perform the fit in this context manager with joblib.parallel_backend(\"dask\"): estimator.fit(X, y) See dask's distributed joblib integration for more.","title":"Using TPOT"},{"location":"using/#what-to-expect-from-automl-software","text":"Automated machine learning (AutoML) takes a higher-level approach to machine learning than most practitioners are used to, so we've gathered a handful of guidelines on what to expect when running AutoML software such as TPOT.","title":"What to expect from AutoML software"},{"location":"using/#tpot-with-code","text":"We've taken care to design the TPOT interface to be as similar as possible to scikit-learn. TPOT can be imported just like any regular Python module. To import TPOT, type: from tpot import TPOTClassifier then create an instance of TPOT as follows: pipeline_optimizer = TPOTClassifier() It's also possible to use TPOT for regression problems with the TPOTRegressor class. Other than the class name, a TPOTRegressor is used the same way as a TPOTClassifier . You can read more about the TPOTClassifier and TPOTRegressor classes in the API documentation . Some example code with custom TPOT parameters might look like: pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) Now TPOT is ready to optimize a pipeline for you. You can tell TPOT to optimize a pipeline based on a data set with the fit function: pipeline_optimizer.fit(X_train, y_train) The fit function initializes the genetic programming algorithm to find the highest-scoring pipeline based on average k-fold cross-validation Then, the pipeline is trained on the entire set of provided samples, and the TPOT instance can be used as a fitted model. You can then proceed to evaluate the final pipeline on the testing set with the score function: print(pipeline_optimizer.score(X_test, y_test)) Finally, you can tell TPOT to export the corresponding Python code for the optimized pipeline to a text file with the export function: pipeline_optimizer.export('tpot_exported_pipeline.py') Once this code finishes running, tpot_exported_pipeline.py will contain the Python code for the optimized pipeline. Below is a full example script using TPOT to optimize a pipeline, score it, and export the best pipeline to a file. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) pipeline_optimizer.fit(X_train, y_train) print(pipeline_optimizer.score(X_test, y_test)) pipeline_optimizer.export('tpot_exported_pipeline.py') Check our examples to see TPOT applied to some specific data sets.","title":"TPOT with code"},{"location":"using/#tpot-on-the-command-line","text":"To use TPOT via the command line, enter the following command with a path to the data file: tpot /path_to/data_file.csv An example command-line call to TPOT may look like: tpot data/mnist.csv -is , -target class -o tpot_exported_pipeline.py -g 5 -p 20 -cv 5 -s 42 -v 2 TPOT offers several arguments that can be provided at the command line. To see brief descriptions of these arguments, enter the following command: tpot --help Detailed descriptions of the command-line arguments are below. Argument Parameter Valid values Effect -is INPUT_SEPARATOR Any string Character used to separate columns in the input file. -target TARGET_NAME Any string Name of the target column in the input file. -mode TPOT_MODE ['classification', 'regression'] Whether TPOT is being used for a supervised classification or regression problem. -o OUTPUT_FILE String path to a file File to export the code for the final optimized pipeline. -g GENERATIONS Any positive integer Number of iterations to run the pipeline optimization process. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -p POPULATION_SIZE Any positive integer Number of individuals to retain in the GP population every generation. Generally, TPOT will work better when you give it more individuals (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -os OFFSPRING_SIZE Any positive integer Number of offspring to produce in each GP generation. By default, OFFSPRING_SIZE = POPULATION_SIZE. -mr MUTATION_RATE [0.0, 1.0] GP mutation rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to apply random changes to every generation. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. -xr CROSSOVER_RATE [0.0, 1.0] GP crossover rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to \"breed\" every generation. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. -scoring SCORING_FN 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'my_module.scorer_name*' Function used to evaluate the quality of a given pipeline for the problem. By default, accuracy is used for classification and mean squared error (MSE) is used for regression. TPOT assumes that any function with \"error\" or \"loss\" in the name is meant to be minimized, whereas any other functions will be maximized. my_module.scorer_name: You can also specify your own function or a full python path to an existing one. See the section on scoring functions for more details. -cv CV Any integer > 1 Number of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process. -sub SUBSAMPLE (0.0, 1.0] Subsample ratio of the training instance. Setting it to 0.5 means that TPOT randomly collects half of training samples for pipeline optimization process. -njobs NUM_JOBS Any positive integer or -1 Number of CPUs for evaluating pipelines in parallel during the TPOT optimization process. Assigning this to -1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. -maxtime MAX_TIME_MINS Any positive integer How many minutes TPOT has to optimize the pipeline. If provided, this setting will override the \"generations\" parameter and allow TPOT to run until it runs out of time. -maxeval MAX_EVAL_MINS Any positive float How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to consider more complex pipelines but will also allow TPOT to run longer. -s RANDOM_STATE Any positive integer Random number generator seed for reproducibility. Set this seed if you want your TPOT run to be reproducible with the same seed and data set in the future. -config CONFIG_FILE String or file path Operators and parameter configurations in TPOT: Path for configuration file: TPOT will use the path to a configuration file for customizing the operators and parameters that TPOT uses in the optimization process string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. -template TEMPLATE String Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. -memory MEMORY String or file path If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. Memory caching mode in TPOT: Path for a caching directory: TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown. string 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown. -cf CHECKPOINT_FOLDER Folder path If supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing. This is useful in multiple cases: sudden death before tpot could save an optimized pipeline progress tracking grabbing a pipeline while tpot is working Example: mkdir my_checkpoints -cf ./my_checkpoints -es EARLY_STOP Any positive integer How many generations TPOT checks whether there is no improvement in optimization process. End optimization process if there is no improvement in the set number of generations. -v VERBOSITY {0, 1, 2, 3} How much information TPOT communicates while it is running. 0 = none, 1 = minimal, 2 = high, 3 = all. A setting of 2 or higher will add a progress bar during the optimization procedure. --no-update-check Flag indicating whether the TPOT version checker should be disabled. --version Show TPOT's version number and exit. --help Show TPOT's help documentation and exit.","title":"TPOT on the command line"},{"location":"using/#scoring-functions","text":"TPOT makes use of sklearn.model_selection.cross_val_score for evaluating pipelines, and as such offers the same support for scoring functions. There are two ways to make use of scoring functions with TPOT: You can pass in a string to the scoring parameter from the list above. Any other strings will cause TPOT to throw an exception. You can pass the callable object/function with signature scorer(estimator, X, y) , where estimator is trained estimator to use for scoring, X are features that will be passed to estimator.predict and y are target values for X . To do this, you should implement your own function. See the example below for further explanation. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split from sklearn.metrics.scorer import make_scorer digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) # Make a custom metric function def my_custom_accuracy(y_true, y_pred): return float(sum(y_pred == y_true)) / len(y_true) # Make a custom a scorer from the custom metric function # Note: greater_is_better=False in make_scorer below would mean that the scoring function should be minimized. my_custom_scorer = make_scorer(my_custom_accuracy, greater_is_better=True) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, scoring=my_custom_scorer) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') You can pass a metric function with the signature score_func(y_true, y_pred) (e.g. my_custom_accuracy in the example above), where y_true are the true target values and y_pred are the predicted target values from an estimator. To do this, you should implement your own function. See the example above for further explanation. TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized ( greater_is_better=False in make_scorer ), whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. my_module.scorer_name : You can also use a custom score_func(y_true, y_pred) or scorer(estimator, X, y) function through the command line by adding the argument -scoring my_module.scorer to your command-line call. TPOT will import your module and use the custom scoring function from there. TPOT will include your current working directory when importing the module, so you can place it in the same directory where you are going to run TPOT. Example: -scoring sklearn.metrics.auc will use the function auc from sklearn.metrics module.","title":"Scoring functions"},{"location":"using/#built-in-tpot-configurations","text":"TPOT comes with a handful of default operators and parameter configurations that we believe work well for optimizing machine learning pipelines. Below is a list of the current built-in configurations that come with TPOT. Configuration Name Description Operators Default TPOT TPOT will search over a broad range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Some of these operators are complex and may take a long time to run, especially on larger datasets. Note: This is the default configuration for TPOT. To use this configuration, use the default value (None) for the config_dict parameter. Classification Regression TPOT light TPOT will search over a restricted range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Only simpler and fast-running operators will be used in these pipelines, so TPOT light is useful for finding quick and simple pipelines for a classification or regression problem. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression TPOT MDR TPOT will search over a series of feature selectors and Multifactor Dimensionality Reduction models to find a series of operators that maximize prediction accuracy. The TPOT MDR configuration is specialized for genome-wide association studies (GWAS) , and is described in detail online here . Note that TPOT MDR may be slow to run because the feature selection routines are computationally expensive, especially on large datasets. Classification Regression TPOT sparse TPOT uses a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression To use any of these configurations, simply pass the string name of the configuration to the config_dict parameter (or -config on the command line). For example, to use the \"TPOT light\" configuration: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict='TPOT light') tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py')","title":"Built-in TPOT configurations"},{"location":"using/#customizing-tpots-operators-and-parameters","text":"Beyond the default configurations that come with TPOT, in some cases it is useful to limit the algorithms and parameters that TPOT considers. For that reason, we allow users to provide TPOT with a custom configuration for its operators and parameters. The custom TPOT configuration must be in nested dictionary format, where the first level key is the path and name of the operator (e.g., sklearn.naive_bayes.MultinomialNB ) and the second level key is the corresponding parameter name for that operator (e.g., fit_prior ). The second level key should point to a list of parameter values for that parameter, e.g., 'fit_prior': [True, False] . For a simple example, the configuration could be: tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } in which case TPOT would only consider pipelines containing GaussianNB , BernoulliNB , MultinomialNB , and tune those algorithm's parameters in the ranges provided. This dictionary can be passed directly within the code to the TPOTClassifier / TPOTRegressor config_dict parameter, described above. For example: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict=tpot_config) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Command-line users must create a separate .py file with the custom configuration and provide the path to the file to the tpot call. For example, if the simple example configuration above is saved in tpot_classifier_config.py , that configuration could be used on the command line with the command: tpot data/mnist.csv -is , -target class -config tpot_classifier_config.py -g 5 -p 20 -v 2 -o tpot_exported_pipeline.py When using the command-line interface, the configuration file specified in the -config parameter must name its custom TPOT configuration tpot_config . Otherwise, TPOT will not be able to locate the configuration dictionary. For more detailed examples of how to customize TPOT's operator configuration, see the default configurations for classification and regression in TPOT's source code. Note that you must have all of the corresponding packages for the operators installed on your computer, otherwise TPOT will not be able to use them. For example, if XGBoost is not installed on your computer, then TPOT will simply not import nor use XGBoost in the pipelines it considers.","title":"Customizing TPOT's operators and parameters"},{"location":"using/#template-option-in-tpot","text":"Template option provides a way to specify a desired structure for machine learning pipeline, which may reduce TPOT computation time and potentially provide more interpretable results. Current implementation only supports linear pipelines. Below is a simple example to use template option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of SelectorMixin ), 2nd step is a feature transformer (a subclass of TransformerMixin ) and 3rd step is a classifier for classification (a subclass of ClassifierMixin ). The last step must be Classifier for TPOTClassifier 's template but Regressor for TPOTRegressor . Note: although SelectorMixin is subclass of TransformerMixin in scikit-leawrn, but Transformer in this option excludes those subclasses of SelectorMixin . tpot_obj = TPOTClassifier( template='Selector-Transformer-Classifier' ) If a specific operator, e.g. SelectPercentile , is prefered to used in the 1st step of pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'.","title":"Template option in TPOT"},{"location":"using/#featuresetselector-in-tpot","text":"FeatureSetSelector is a special new operator in TPOT. This operator enables feature selection based on priori export knowledge. For example, in RNA-seq gene expression analysis, this operator can be used to select one or more gene (feature) set(s) based on GO (Gene Ontology) terms or annotated gene sets Molecular Signatures Database ( MSigDB ) in the 1st step of pipeline via template option above, in order to reduce dimensions and TPOT computation time. This operator requires a dataset list in csv format. In this csv file, there are only three columns: 1st column is feature set names, 2nd column is the total number of features in one set and 3rd column is a list of feature names (if input X is pandas.DataFrame) or indexes (if input X is numpy.ndarray) delimited by \";\". Below is a example how to use this operator in TPOT. Please check our preprint paper for more details. from tpot import TPOTClassifier import numpy as np import pandas as pd from tpot.config import classifier_config_dict test_data = pd.read_csv(\"https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/tests.csv\") test_X = test_data.drop(\"class\", axis=1) test_y = test_data['class'] # add FeatureSetSelector into tpot configuration classifier_config_dict['tpot.builtins.FeatureSetSelector'] = { 'subset_list': ['https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/subset_test.csv'], 'sel_subset': [0,1] # select only one feature set, a list of index of subset in the list above #'sel_subset': list(combinations(range(3), 2)) # select two feature sets } tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, template='FeatureSetSelector-Transformer-Classifier', config_dict=classifier_config_dict) tpot.fit(test_X, test_y)","title":"FeatureSetSelector in TPOT"},{"location":"using/#pipeline-caching-in-tpot","text":"With the memory parameter, pipelines can cache the results of each transformer after fitting them. This feature is used to avoid repeated computation by transformers within a pipeline if the parameters and input data are identical to another fitted pipeline during optimization process. TPOT allows users to specify a custom directory path or joblib.Memory in case they want to re-use the memory cache in future TPOT runs (or a warm_start run). There are three methods for enabling memory caching in TPOT: from tpot import TPOTClassifier from tempfile import mkdtemp from joblib import Memory from shutil import rmtree # Method 1, auto mode: TPOT uses memory caching with a temporary directory and cleans it up upon shutdown tpot = TPOTClassifier(memory='auto') # Method 2, with a custom directory for memory caching tpot = TPOTClassifier(memory='/to/your/path') # Method 3, with a Memory object cachedir = mkdtemp() # Create a temporary folder memory = Memory(cachedir=cachedir, verbose=0) tpot = TPOTClassifier(memory=memory) # Clear the cache directory when you don't need it anymore rmtree(cachedir) Note: TPOT does NOT clean up memory caches if users set a custom directory path or Memory object. We recommend that you clean up the memory caches when you don't need it anymore.","title":"Pipeline caching in TPOT"},{"location":"using/#crashfreeze-issue-with-n_jobs-1-under-osx-or-linux","text":"Internally, TPOT uses joblib to fit estimators in parallel. This is the same parallelization framework used by scikit-learn. But it may crash/freeze with n_jobs > 1 under OSX or Linux as scikit-learn does , especially with large datasets. One solution is to configure Python's multiprocessing module to use the forkserver start method (instead of the default fork ) to manage the process pools. You can enable the forkserver mode globally for your program by putting the following codes into your main script: import multiprocessing # other imports, custom code, load data, define model... if __name__ == '__main__': multiprocessing.set_start_method('forkserver') # call scikit-learn utils or tpot utils with n_jobs > 1 here More information about these start methods can be found in the multiprocessing documentation .","title":"Crash/freeze issue with n_jobs &gt; 1 under OSX or Linux"},{"location":"using/#parallel-training-with-dask","text":"For large problems or working on Jupyter notebook, we highly recommend that you can distribute the work on a Dask cluster. The dask-examples binder has a runnable example with a small dask cluster. To use your Dask cluster to fit a TPOT model, specify the use_dask keyword when you create the TPOT estimator. Note: if use_dask=True , TPOT will use as many cores as available on the your Dask cluster. If n_jobs is specified, then it will control the chunk size (10* n_jobs if it is less then offspring size) of parallel training. estimator = TPOTEstimator(use_dask=True, n_jobs=-1) This will use use all the workers on your cluster to do the training, and use Dask-ML's pipeline rewriting to avoid re-fitting estimators multiple times on the same set of data. It will also provide fine-grained diagnostics in the distributed scheduler UI . Alternatively, Dask implements a joblib backend. You can instruct TPOT to use the distributed backend during training by specifying a joblib.parallel_backend : import joblib import distributed.joblib from dask.distributed import Client # connect to the cluster client = Client('schedueler-address') # create the estimator normally estimator = TPOTClassifier(n_jobs=-1) # perform the fit in this context manager with joblib.parallel_backend(\"dask\"): estimator.fit(X, y) See dask's distributed joblib integration for more.","title":"Parallel Training with Dask"}]}
\ No newline at end of file
diff --git a/docs_sources/examples.md b/docs_sources/examples.md
index e83a7f00..b3dcb702 100644
--- a/docs_sources/examples.md
+++ b/docs_sources/examples.md
@@ -47,7 +47,7 @@ from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import Normalizer
 from tpot.export_utils import set_param_recursive
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
@@ -97,7 +97,7 @@ from sklearn.preprocessing import PolynomialFeatures
 from tpot.builtins import StackingEstimator
 from tpot.export_utils import set_param_recursive
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
@@ -146,7 +146,7 @@ from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import PolynomialFeatures
 from tpot.export_utils import set_param_recursive
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
diff --git a/tests/export_tests.py b/tests/export_tests.py
index 59f9f27d..bb39b90f 100644
--- a/tests/export_tests.py
+++ b/tests/export_tests.py
@@ -73,7 +73,7 @@ def test_export_random_ind():
 from sklearn.naive_bayes import BernoulliNB
 from tpot.export_utils import set_param_recursive
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
@@ -128,7 +128,7 @@ def test_export_2():
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
@@ -321,7 +321,7 @@ def test_export_pipeline():
 from sklearn.tree import DecisionTreeClassifier
 from tpot.builtins import StackingEstimator
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
@@ -358,7 +358,7 @@ def test_export_pipeline_2():
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
@@ -389,7 +389,7 @@ def test_export_pipeline_3():
 from sklearn.pipeline import make_pipeline
 from sklearn.tree import DecisionTreeClassifier
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
@@ -429,7 +429,7 @@ def test_export_pipeline_4():
 from sklearn.preprocessing import FunctionTransformer
 from copy import copy
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
@@ -466,7 +466,7 @@ def test_export_pipeline_5():
 from sklearn.pipeline import make_pipeline
 from sklearn.tree import DecisionTreeRegressor
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
@@ -501,7 +501,7 @@ def test_export_pipeline_6():
 from sklearn.neighbors import KNeighborsClassifier
 from tpot.export_utils import set_param_recursive
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('test_path', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
@@ -596,7 +596,7 @@ def test_pipeline_score_save():
 from sklearn.pipeline import make_pipeline
 from sklearn.tree import DecisionTreeClassifier
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
@@ -650,7 +650,7 @@ def test_imputer_in_export():
 except ImportError:
     from sklearn.preprocessing import Imputer
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
diff --git a/tpot/export_utils.py b/tpot/export_utils.py
index 4cf8db47..51a37305 100644
--- a/tpot/export_utils.py
+++ b/tpot/export_utils.py
@@ -96,7 +96,7 @@ def export_pipeline(exported_pipeline,
         data_file_path = 'PATH/TO/DATA/FILE'
 
     pipeline_text += """
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('{}', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \\
diff --git a/tutorials/Digits.ipynb b/tutorials/Digits.ipynb
index ff4992a5..5821c3e3 100644
--- a/tutorials/Digits.ipynb
+++ b/tutorials/Digits.ipynb
@@ -213,15 +213,15 @@
    "source": [
     "# %load tpot_digits_pipeline.py\n",
     "import numpy as np\n",
-    "\n",
+    "import pandas as pd\n",
     "from sklearn.model_selection import train_test_split\n",
     "from sklearn.neighbors import KNeighborsClassifier\n",
     "\n",
-    "# NOTE: Make sure that the class is labeled 'class' in the data file\n",
-    "tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)\n",
-    "features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1)\n",
+    "# NOTE: Make sure that the outcome column is labeled 'target' in the data file\n",
+    "tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)\n",
+    "features = tpot_data.drop('target', axis=1)\n",
     "training_features, testing_features, training_classes, testing_classes = \\\n",
-    "    train_test_split(features, tpot_data['class'], random_state=None)\n",
+    "            train_test_split(features, tpot_data['target'], random_state=None)\n",
     "\n",
     "exported_pipeline = KNeighborsClassifier(n_neighbors=4, p=2, weights=\"distance\")\n",
     "\n",
diff --git a/tutorials/IRIS.ipynb b/tutorials/IRIS.ipynb
index 8b1a88cf..9ce9729a 100644
--- a/tutorials/IRIS.ipynb
+++ b/tutorials/IRIS.ipynb
@@ -403,17 +403,17 @@
    "source": [
     "# %load tpot_iris_pipeline.py\n",
     "import numpy as np\n",
-    "\n",
+    "import pandas as pd\n",
     "from sklearn.kernel_approximation import RBFSampler\n",
     "from sklearn.model_selection import train_test_split\n",
     "from sklearn.pipeline import make_pipeline\n",
     "from sklearn.tree import DecisionTreeClassifier\n",
     "\n",
-    "# NOTE: Make sure that the class is labeled 'class' in the data file\n",
-    "tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)\n",
-    "features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1)\n",
+    "# NOTE: Make sure that the outcome column is labeled 'target' in the data file\n",
+    "tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)\n",
+    "features = tpot_data.drop('target', axis=1)\n",
     "training_features, testing_features, training_classes, testing_classes = \\\n",
-    "    train_test_split(features, tpot_data['class'], random_state=None)\n",
+    "            train_test_split(features, tpot_data['target'], random_state=None)\n",
     "\n",
     "exported_pipeline = make_pipeline(\n",
     "    RBFSampler(gamma=0.8500000000000001),\n",
diff --git a/tutorials/MAGIC Gamma Telescope/MAGIC Gamma Telescope.ipynb b/tutorials/MAGIC Gamma Telescope/MAGIC Gamma Telescope.ipynb
index 4eb400af..4d857f1a 100644
--- a/tutorials/MAGIC Gamma Telescope/MAGIC Gamma Telescope.ipynb	
+++ b/tutorials/MAGIC Gamma Telescope/MAGIC Gamma Telescope.ipynb	
@@ -932,7 +932,7 @@
     "from sklearn.tree import DecisionTreeClassifier\n",
     "from tpot.builtins import StackingEstimator\n",
     "\n",
-    "# NOTE: Make sure that the class is labeled 'target' in the data file\n",
+    "# NOTE: Make sure that the outcome column is labeled 'target' in the data file\n",
     "tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)\n",
     "features = tpot_data.drop('target', axis=1)\n",
     "training_features, testing_features, training_target, testing_target = \\\n",
diff --git a/tutorials/MAGIC Gamma Telescope/tpot_MAGIC_Gamma_Telescope_pipeline.py b/tutorials/MAGIC Gamma Telescope/tpot_MAGIC_Gamma_Telescope_pipeline.py
index 388f04e3..208553c0 100644
--- a/tutorials/MAGIC Gamma Telescope/tpot_MAGIC_Gamma_Telescope_pipeline.py	
+++ b/tutorials/MAGIC Gamma Telescope/tpot_MAGIC_Gamma_Telescope_pipeline.py	
@@ -6,7 +6,7 @@
 from sklearn.tree import DecisionTreeClassifier
 from tpot.builtins import StackingEstimator
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
diff --git a/tutorials/Portuguese Bank Marketing/Portuguese Bank Marketing Strategy.ipynb b/tutorials/Portuguese Bank Marketing/Portuguese Bank Marketing Strategy.ipynb
index cd4c9713..b2a5f1d8 100644
--- a/tutorials/Portuguese Bank Marketing/Portuguese Bank Marketing Strategy.ipynb	
+++ b/tutorials/Portuguese Bank Marketing/Portuguese Bank Marketing Strategy.ipynb	
@@ -923,7 +923,7 @@
     "from sklearn.model_selection import train_test_split\n",
     "from sklearn.tree import DecisionTreeClassifier\n",
     "\n",
-    "# NOTE: Make sure that the class is labeled 'target' in the data file\n",
+    "# NOTE: Make sure that the outcome column is labeled 'target' in the data file\n",
     "tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)\n",
     "features = tpot_data.drop('target', axis=1)\n",
     "training_features, testing_features, training_target, testing_target = \\\n",
diff --git a/tutorials/Portuguese Bank Marketing/tpot_marketing_pipeline.py b/tutorials/Portuguese Bank Marketing/tpot_marketing_pipeline.py
index 5e737569..8f8bed8c 100644
--- a/tutorials/Portuguese Bank Marketing/tpot_marketing_pipeline.py	
+++ b/tutorials/Portuguese Bank Marketing/tpot_marketing_pipeline.py	
@@ -3,7 +3,7 @@
 from sklearn.model_selection import train_test_split
 from sklearn.tree import DecisionTreeClassifier
 
-# NOTE: Make sure that the class is labeled 'target' in the data file
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
diff --git a/tutorials/Titanic_Kaggle.ipynb b/tutorials/Titanic_Kaggle.ipynb
index 7b3c29f7..acc690f9 100644
--- a/tutorials/Titanic_Kaggle.ipynb
+++ b/tutorials/Titanic_Kaggle.ipynb
@@ -1065,15 +1065,15 @@
    "source": [
     "# %load tpot_titanic_pipeline.py\n",
     "import numpy as np\n",
-    "\n",
+    "import pandas as pd\n",
     "from sklearn.ensemble import RandomForestClassifier\n",
     "from sklearn.model_selection import train_test_split\n",
     "\n",
-    "# NOTE: Make sure that the class is labeled 'class' in the data file\n",
-    "tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)\n",
-    "features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1)\n",
+    "# NOTE: Make sure that the outcome column is labeled 'target' in the data file\n",
+    "tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)\n",
+    "features = tpot_data.drop('target', axis=1)\n",
     "training_features, testing_features, training_classes, testing_classes = \\\n",
-    "    train_test_split(features, tpot_data['class'], random_state=None)\n",
+    "            train_test_split(features, tpot_data['target'], random_state=None)\n",
     "\n",
     "exported_pipeline = RandomForestClassifier(bootstrap=False, max_features=0.4, min_samples_leaf=1, min_samples_split=9)\n",
     "\n",
diff --git a/tutorials/tpot_iris_pipeline.py b/tutorials/tpot_iris_pipeline.py
index c85875f2..2ded0bd5 100644
--- a/tutorials/tpot_iris_pipeline.py
+++ b/tutorials/tpot_iris_pipeline.py
@@ -1,15 +1,15 @@
 import numpy as np
-
+import pandas as pd
 from sklearn.kernel_approximation import RBFSampler
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.tree import DecisionTreeClassifier
 
-# NOTE: Make sure that the class is labeled 'class' in the data file
-tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)
-features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1)
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
+tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_classes, testing_classes = \
-    train_test_split(features, tpot_data['class'], random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 exported_pipeline = make_pipeline(
     RBFSampler(gamma=0.8500000000000001),
diff --git a/tutorials/tpot_mnist_pipeline.py b/tutorials/tpot_mnist_pipeline.py
index afa65109..1659c833 100644
--- a/tutorials/tpot_mnist_pipeline.py
+++ b/tutorials/tpot_mnist_pipeline.py
@@ -1,13 +1,13 @@
 import numpy as np
-
+import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
 
-# NOTE: Make sure that the class is labeled 'class' in the data file
-tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)
-features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1)
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
+tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_classes, testing_classes = \
-    train_test_split(features, tpot_data['class'], random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 exported_pipeline = KNeighborsClassifier(n_neighbors=4, p=2, weights="distance")
 
diff --git a/tutorials/tpot_titanic_pipeline.py b/tutorials/tpot_titanic_pipeline.py
index 1a0fa8d0..81f3e89c 100644
--- a/tutorials/tpot_titanic_pipeline.py
+++ b/tutorials/tpot_titanic_pipeline.py
@@ -1,13 +1,13 @@
 import numpy as np
-
+import pandas as pd
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import train_test_split
 
-# NOTE: Make sure that the class is labeled 'class' in the data file
-tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)
-features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1)
+# NOTE: Make sure that the outcome column is labeled 'target' in the data file
+tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_classes, testing_classes = \
-    train_test_split(features, tpot_data['class'], random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=None)
 
 exported_pipeline = RandomForestClassifier(bootstrap=False, max_features=0.4, min_samples_leaf=1, min_samples_split=9)
 

From 5bbd944ea22ec7735139b4899fe19a53e4650066 Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 10:27:36 -0500
Subject: [PATCH 28/44] fix the bug that warm_start is not working when
 max_time_mins is not default #946

---
 tests/tpot_tests.py | 25 ++++++++++++++++++++++++-
 tpot/base.py        |  9 +++++----
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/tests/tpot_tests.py b/tests/tpot_tests.py
index 0088833c..c2bc18cd 100644
--- a/tests/tpot_tests.py
+++ b/tests/tpot_tests.py
@@ -964,12 +964,35 @@ def test_fit_4():
     tpot_obj.generations == 20
 
     tpot_obj.fit(training_features, training_target)
-
+    assert tpot_obj._pop == []
     assert isinstance(tpot_obj._optimized_pipeline, creator.Individual)
     assert not (tpot_obj._start_datetime is None)
 
 
 def test_fit_5():
+    """Assert that the TPOT fit function provides an optimized pipeline with max_time_mins of 2 second."""
+    tpot_obj = TPOTClassifier(
+        random_state=42,
+        population_size=2,
+        generations=None,
+        verbosity=0,
+        max_time_mins=2/60.,
+        config_dict='TPOT light',
+        warm_start=True
+    )
+    tpot_obj._fit_init()
+    assert tpot_obj.generations == 1000000
+
+    # reset generations to 20 just in case that the failed test may take too much time
+    tpot_obj.generations == 20
+
+    tpot_obj.fit(training_features, training_target)
+    assert tpot_obj._pop != []
+    assert isinstance(tpot_obj._optimized_pipeline, creator.Individual)
+    assert not (tpot_obj._start_datetime is None)
+
+
+def test_fit_6():
     """Assert that the TPOT fit function provides an optimized pipeline with pandas DataFrame"""
     tpot_obj = TPOTClassifier(
         random_state=42,
diff --git a/tpot/base.py b/tpot/base.py
index 7bfc56e5..efe1b6be 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -752,10 +752,6 @@ def pareto_eq(ind1, ind2):
                     per_generation_function=self._check_periodic_pipeline
                 )
 
-            # store population for the next call
-            if self.warm_start:
-                self._pop = pop
-
         # Allow for certain exceptions to signal a premature fit() cancellation
         except (KeyboardInterrupt, SystemExit, StopIteration) as e:
             if self.verbosity > 0:
@@ -763,6 +759,9 @@ def pareto_eq(ind1, ind2):
                 self._pbar.write('{}\nTPOT closed prematurely. Will use the current best pipeline.'.format(e),
                                  file=self._file)
         finally:
+            # clean population for the next call if warm_start=False
+            if not self.warm_start:
+                self._pop = []
             # keep trying 10 times in case weird things happened like multiple CTRL+C or exceptions
             attempts = 10
             for attempt in range(attempts):
@@ -1383,6 +1382,8 @@ def _evaluate_individuals(self, population, features, target, sample_weight=None
                 ind.fitness.values = (5000.,-float('inf'))
 
             self._pareto_front.update(population)
+            print(111)
+            self._pop = population
             raise KeyboardInterrupt
 
         self._update_evaluated_individuals_(result_score_list, eval_individuals_str, operator_counts, stats_dicts)

From 0def2d34ebffce9dd2d3d79e3fe6ba2d3b97184f Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 11:17:06 -0500
Subject: [PATCH 29/44] clean codes

---
 tpot/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tpot/base.py b/tpot/base.py
index efe1b6be..9b003ce7 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -1382,7 +1382,7 @@ def _evaluate_individuals(self, population, features, target, sample_weight=None
                 ind.fitness.values = (5000.,-float('inf'))
 
             self._pareto_front.update(population)
-            print(111)
+            
             self._pop = population
             raise KeyboardInterrupt
 

From 73cec4e91b4d6ced8a987170349988d0cc0adf88 Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 12:05:44 -0500
Subject: [PATCH 30/44] add rerun to test_fit_5

---
 tests/tpot_tests.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/tpot_tests.py b/tests/tpot_tests.py
index c2bc18cd..d8f878e8 100644
--- a/tests/tpot_tests.py
+++ b/tests/tpot_tests.py
@@ -970,13 +970,13 @@ def test_fit_4():
 
 
 def test_fit_5():
-    """Assert that the TPOT fit function provides an optimized pipeline with max_time_mins of 2 second."""
+    """Assert that the TPOT fit function provides an optimized pipeline with max_time_mins of 2 second with warm_start=True."""
     tpot_obj = TPOTClassifier(
         random_state=42,
         population_size=2,
         generations=None,
         verbosity=0,
-        max_time_mins=2/60.,
+        max_time_mins=3/60.,
         config_dict='TPOT light',
         warm_start=True
     )
@@ -990,6 +990,10 @@ def test_fit_5():
     assert tpot_obj._pop != []
     assert isinstance(tpot_obj._optimized_pipeline, creator.Individual)
     assert not (tpot_obj._start_datetime is None)
+    # rerun it
+    tpot_obj.fit(training_features, training_target)
+    assert tpot_obj._pop != []
+
 
 
 def test_fit_6():

From cc42dfa0e2808a96e9022fac7da980fad13d124a Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 12:14:21 -0500
Subject: [PATCH 31/44] stacking_estimator should not stack nan/infinity
 prediction proba #893

---
 tpot/builtins/stacking_estimator.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tpot/builtins/stacking_estimator.py b/tpot/builtins/stacking_estimator.py
index ee2abfb5..e742e5a5 100644
--- a/tpot/builtins/stacking_estimator.py
+++ b/tpot/builtins/stacking_estimator.py
@@ -84,7 +84,10 @@ def transform(self, X):
         X_transformed = np.copy(X)
         # add class probabilities as a synthetic feature
         if issubclass(self.estimator.__class__, ClassifierMixin) and hasattr(self.estimator, 'predict_proba'):
-            X_transformed = np.hstack((self.estimator.predict_proba(X), X))
+            y_pred_proba = self.estimator.predict_proba(X)
+            # check all values that should be not infinity or not NAN
+            if np.all(np.isfinite(y_pred_proba)):
+                X_transformed = np.hstack((y_pred_proba, X))
 
         # add class prodiction as a synthetic feature
         X_transformed = np.hstack((np.reshape(self.estimator.predict(X), (-1, 1)), X_transformed))

From a0c0c40afaa1d0cf927e8eb62c90eddfb4a5059a Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 12:40:23 -0500
Subject: [PATCH 32/44] TPOT rasie ValueError when template parameter is
 invalid. #898

---
 tests/tpot_tests.py | 11 +++++++++++
 tpot/base.py        | 32 +++++++++++++++++++-------------
 2 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/tests/tpot_tests.py b/tests/tpot_tests.py
index d8f878e8..2f8915dc 100644
--- a/tests/tpot_tests.py
+++ b/tests/tpot_tests.py
@@ -711,6 +711,17 @@ def test_template_4():
     assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)
 
 
+def test_template_5():
+    """Assert that TPOT rasie ValueError when template parameter is invalid."""
+
+    tpot_obj = TPOTClassifier(
+        random_state=42,
+        verbosity=0,
+        template='SelectPercentile-Transformer-Classifie' # a typ in Classifier
+    )
+    assert_raises(ValueError, tpot_obj._fit_init)
+
+
 def test_fit_GroupKFold():
     """Assert that TPOT properly handles the group parameter when using GroupKFold."""
     # This check tests if the darker digits images would generalize to the lighter ones.
diff --git a/tpot/base.py b/tpot/base.py
index 9b003ce7..819087d5 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -460,22 +460,28 @@ def _add_operators(self):
                         ret_types.append(step_ret_type)
                     else:
                         step_ret_type = Output_Array
+                check_template = True
                 if step == 'CombineDFs':
                     self._pset.addPrimitive(CombineDFs(), [step_in_type, step_in_type], step_in_type)
                 elif main_type.count(step): # if the step is a main type
-                    for operator in self.operators:
+                    ops = [op for op in self.operators if op.type() == step]
+                    for operator in ops:
                         arg_types =  operator.parameter_types()[0][1:]
-                        if operator.type() == step:
-                            p_types = ([step_in_type] + arg_types, step_ret_type)
-                            self._pset.addPrimitive(operator, *p_types)
-                            self._import_hash_and_add_terminals(operator, arg_types)
-                else: # is the step is a specific operator
-                    for operator in self.operators:
-                        arg_types =  operator.parameter_types()[0][1:]
-                        if operator.__name__ == step:
-                            p_types = ([step_in_type] + arg_types, step_ret_type)
-                            self._pset.addPrimitive(operator, *p_types)
-                            self._import_hash_and_add_terminals(operator, arg_types)
+                        p_types = ([step_in_type] + arg_types, step_ret_type)
+                        self._pset.addPrimitive(operator, *p_types)
+                        self._import_hash_and_add_terminals(operator, arg_types)
+                else: # is the step is a specific operator or a wrong input
+                    try:
+                        operator = next(op for op in self.operators if op.__name__ == step)
+                    except:
+                        raise ValueError(
+                            'An error occured while attempting to read the specified '
+                            'template. Please check a step named {}'.format(step)
+                        )
+                    arg_types =  operator.parameter_types()[0][1:]
+                    p_types = ([step_in_type] + arg_types, step_ret_type)
+                    self._pset.addPrimitive(operator, *p_types)
+                    self._import_hash_and_add_terminals(operator, arg_types)
         self.ret_types = [np.ndarray, Output_Array] + ret_types
 
 
@@ -1382,7 +1388,7 @@ def _evaluate_individuals(self, population, features, target, sample_weight=None
                 ind.fitness.values = (5000.,-float('inf'))
 
             self._pareto_front.update(population)
-            
+
             self._pop = population
             raise KeyboardInterrupt
 

From a41a4941bdae82817e1f9388385c1067493535d2 Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 13:30:02 -0500
Subject: [PATCH 33/44] remove support for py2.7 and py version < 3.5

---
 .appveyor.yml              |  2 +-
 README.md                  |  3 +--
 docs_sources/installing.md |  6 ------
 requirements.txt           | 18 +++++++++---------
 setup.py                   | 22 +++++++++-------------
 tpot/_version.py           |  2 +-
 tpot/base.py               | 22 ----------------------
 7 files changed, 21 insertions(+), 54 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 8f3ddebb..dfa050c8 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -18,7 +18,7 @@ install:
   - conda config --set always_yes yes --set changeps1 no
   - conda update -q conda
   - conda info -a
-  - conda create -q -n test-environment python=%PYTHON_VERSION% numpy scipy scikit-learn nose cython pandas pywin32 joblib
+  - conda create -q -n test-environment python=%PYTHON_VERSION% numpy scipy scikit-learn nose cython pandas joblib
   - activate test-environment
   - pip install deap tqdm update_checker stopit xgboost dask[delayed] dask[dataframe] cloudpickle==0.5.6 fsspec>=0.3.3 dask_ml==%DASK_ML_VERSION%
 
diff --git a/README.md b/README.md
index 949af676..02c6bc0b 100644
--- a/README.md
+++ b/README.md
@@ -6,8 +6,7 @@ Development status: [![Development Build Status - Mac/Linux](https://travis-ci.o
 [![Development Build Status - Windows](https://ci.appveyor.com/api/projects/status/b7bmpwpkjhifrm7v/branch/development?svg=true)](https://ci.appveyor.com/project/weixuanfu/tpot?branch=development)
 [![Development Coverage Status](https://coveralls.io/repos/github/EpistasisLab/tpot/badge.svg?branch=development)](https://coveralls.io/github/EpistasisLab/tpot?branch=development)
 
-Package information: [![Python 2.7](https://img.shields.io/badge/python-2.7-blue.svg)](https://www.python.org/download/releases/2.7/)
-[![Python 3.7](https://img.shields.io/badge/python-3.7-blue.svg)](https://www.python.org/downloads/release/python-370/)
+Package information: [![Python 3.7](https://img.shields.io/badge/python-3.7-blue.svg)](https://www.python.org/downloads/release/python-370/)
 [![License: LGPL v3](https://img.shields.io/badge/license-LGPL%20v3-blue.svg)](http://www.gnu.org/licenses/lgpl-3.0)
 [![PyPI version](https://badge.fury.io/py/TPOT.svg)](https://badge.fury.io/py/TPOT)
 
diff --git a/docs_sources/installing.md b/docs_sources/installing.md
index 016ce433..e1014e2f 100644
--- a/docs_sources/installing.md
+++ b/docs_sources/installing.md
@@ -32,12 +32,6 @@ DEAP, update_checker, tqdm and stopit can be installed with `pip` via the comman
 pip install deap update_checker tqdm stopit
 ```
 
-**For the Windows users**, the pywin32 module is required if Python is NOT installed via the [Anaconda Python distribution](https://www.continuum.io/downloads) and can be installed with `pip` for Python version <=3.3 or `conda` (e.g. miniconda) for any Python version:
-
-```Shell
-conda install pywin32
-```
-
 **Optionally**, you can install [XGBoost](https://github.com/dmlc/xgboost) if you would like TPOT to use the eXtreme Gradient Boosting models. XGBoost is entirely optional, and TPOT will still function normally without XGBoost if you do not have it installed. **Windows users: pip installation may not work on some Windows environments, and it may cause unexpected errors.**
 
 ```Shell
diff --git a/requirements.txt b/requirements.txt
index 616ce627..365b8e50 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,10 @@
-deap==1.0.2.post2
+deap>=1.2
 nose==1.3.7
-numpy==1.12.1
-scikit-learn==0.18.1
-scipy==0.19.0
-tqdm==4.26.0
-update-checker==0.16
-stopit==1.1.1
-pandas==0.20.2
-joblib==0.10.3
+numpy>=1.16.3
+scikit-learn>=0.21.3
+scipy>=1.3.1
+tqdm>=4.36.1
+update-checker>=0.16
+stopit>=1.1.1
+pandas>=0.24.2
+joblib>=0.13.2
diff --git a/setup.py b/setup.py
index a6c0980e..ff075ea4 100644
--- a/setup.py
+++ b/setup.py
@@ -35,30 +35,26 @@ def calculate_version():
 This project is hosted at https://github.com/EpistasisLab/tpot
 ''',
     zip_safe=True,
-    install_requires=['numpy>=1.12.1',
-                    'scipy>=0.19.0',
-                    'scikit-learn>=0.18.1',
-                    'deap>=1.0',
+    install_requires=['numpy>=1.16.3',
+                    'scipy>=1.3.1',
+                    'scikit-learn>=0.21.3',
+                    'deap>=1.2',
                     'update_checker>=0.16',
-                    'tqdm>=4.26.0',
+                    'tqdm>=4.36.1',
                     'stopit>=1.1.1',
-                    'pandas>=0.20.2',
-                    'joblib>=0.10.3'],
+                    'pandas>=0.24.2',
+                    'joblib>=0.13.2'],
     extras_require={
-        'xgboost': ['xgboost==0.6a2'],
+        'xgboost': ['xgboost==0.90'],
         'skrebate': ['skrebate>=0.3.4'],
         'mdr': ['scikit-mdr>=0.4.4'],
         'dask': ['dask>=0.18.2',
                  'distributed>=1.22.1',
-                 'dask-ml>=0.9.0'],
+                 'dask-ml>=1.0.0'],
     },
     classifiers=[
         'Intended Audience :: Science/Research',
         'License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)',
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
diff --git a/tpot/_version.py b/tpot/_version.py
index 8b320b7d..c56b024a 100644
--- a/tpot/_version.py
+++ b/tpot/_version.py
@@ -23,4 +23,4 @@
 
 """
 
-__version__ = '0.10.2'
+__version__ = '0.11'
diff --git a/tpot/base.py b/tpot/base.py
index 819087d5..3627309a 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -81,28 +81,6 @@
     warnings.simplefilter('ignore')
     from tqdm.autonotebook import tqdm
 
-# hot patch for Windows: solve the problem of crashing python after Ctrl + C in Windows OS
-# https://github.com/ContinuumIO/anaconda-issues/issues/905
-if sys.platform.startswith('win'):
-    import win32api
-
-    try:
-        import _thread
-    except ImportError:
-        import thread as _thread
-
-
-    def handler(dwCtrlType, hook_sigint=_thread.interrupt_main):
-        """SIGINT handler function."""
-        if dwCtrlType == 0:  # CTRL_C_EVENT
-            hook_sigint()
-            return 1  # don't chain to the next handler
-        return 0
-
-
-    win32api.SetConsoleCtrlHandler(handler, 1)
-
-
 
 class TPOTBase(BaseEstimator):
     """Automatically creates and optimizes machine learning pipelines using GP."""

From a48297160dd053752d307f8cf340c4e702a0f5af Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 13:36:10 -0500
Subject: [PATCH 34/44] remove supports for scikit-learn < 0.21

---
 tests/export_tests.py | 7 ++-----
 tpot/base.py          | 7 ++-----
 tpot/export_utils.py  | 7 ++-----
 3 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/tests/export_tests.py b/tests/export_tests.py
index bb39b90f..0ffbbc27 100644
--- a/tests/export_tests.py
+++ b/tests/export_tests.py
@@ -645,10 +645,7 @@ def test_imputer_in_export():
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier
-try:
-    from sklearn.impute import SimpleImputer as Imputer
-except ImportError:
-    from sklearn.preprocessing import Imputer
+from sklearn.impute import SimpleImputer
 
 # NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
@@ -656,7 +653,7 @@ def test_imputer_in_export():
 training_features, testing_features, training_target, testing_target = \\
             train_test_split(features, tpot_data['target'], random_state=None)
 
-imputer = Imputer(strategy="median")
+imputer = SimpleImputer(strategy="median")
 imputer.fit(training_features)
 training_features = imputer.transform(training_features)
 testing_features = imputer.transform(testing_features)
diff --git a/tpot/base.py b/tpot/base.py
index 3627309a..6f256ff2 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -50,10 +50,7 @@
 from sklearn.utils import check_X_y, check_consistent_length, check_array
 from sklearn.pipeline import make_pipeline, make_union
 from sklearn.preprocessing import FunctionTransformer
-try:
-    from sklearn.impute import SimpleImputer as Imputer
-except ImportError:
-    from sklearn.preprocessing import Imputer
+from sklearn.impute import SimpleImputer
 from sklearn.model_selection import train_test_split
 from sklearn.metrics.scorer import make_scorer, _BaseScorer
 
@@ -1125,7 +1122,7 @@ def _impute_values(self, features):
             print('Imputing missing values in feature set')
 
         if self._fitted_imputer is None:
-            self._fitted_imputer = Imputer(strategy="median")
+            self._fitted_imputer = SimpleImputer(strategy="median")
             self._fitted_imputer.fit(features)
 
         return self._fitted_imputer.transform(features)
diff --git a/tpot/export_utils.py b/tpot/export_utils.py
index 51a37305..a66e3da2 100644
--- a/tpot/export_utils.py
+++ b/tpot/export_utils.py
@@ -106,7 +106,7 @@ def export_pipeline(exported_pipeline,
     # Add the imputation step if it was used by TPOT
     if impute:
         pipeline_text += """
-imputer = Imputer(strategy="median")
+imputer = SimpleImputer(strategy="median")
 imputer.fit(training_features)
 training_features = imputer.transform(training_features)
 testing_features = imputer.transform(testing_features)
@@ -217,10 +217,7 @@ def merge_imports(old_dict, new_dict):
 
     # Add the imputer if necessary
     if impute:
-        pipeline_text += """try:
-    from sklearn.impute import SimpleImputer as Imputer
-except ImportError:
-    from sklearn.preprocessing import Imputer
+        pipeline_text += """from sklearn.impute import SimpleImputer
 """
     if random_state is not None:
         pipeline_text += """from tpot.export_utils import set_param_recursive

From cc475f41aff2f930ccc840b816cf139b3e759531 Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 13:44:55 -0500
Subject: [PATCH 35/44] change sklearn requirement to 0.21.0

---
 requirements.txt | 2 +-
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 365b8e50..9fa9dca7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 deap>=1.2
 nose==1.3.7
 numpy>=1.16.3
-scikit-learn>=0.21.3
+scikit-learn>=0.21.0
 scipy>=1.3.1
 tqdm>=4.36.1
 update-checker>=0.16
diff --git a/setup.py b/setup.py
index ff075ea4..0c98684a 100644
--- a/setup.py
+++ b/setup.py
@@ -37,7 +37,7 @@ def calculate_version():
     zip_safe=True,
     install_requires=['numpy>=1.16.3',
                     'scipy>=1.3.1',
-                    'scikit-learn>=0.21.3',
+                    'scikit-learn>=0.21.0',
                     'deap>=1.2',
                     'update_checker>=0.16',
                     'tqdm>=4.36.1',

From 083adb25ea45204bac771a5eb9dfa7b4accc54cd Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 13:46:10 -0500
Subject: [PATCH 36/44] change version id to 0.11.0

---
 tpot/_version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tpot/_version.py b/tpot/_version.py
index c56b024a..69737bab 100644
--- a/tpot/_version.py
+++ b/tpot/_version.py
@@ -23,4 +23,4 @@
 
 """
 
-__version__ = '0.11'
+__version__ = '0.11.0'

From 318641facaae7a687cec6ed14b362b4038e07302 Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 14:44:15 -0500
Subject: [PATCH 37/44] drop supports for scoring function with the signature
 score_func(y_true, y_pred)

---
 docs_sources/api.md      |  4 ---
 docs_sources/releases.md | 56 ++++++++++++++++++++++++++++++++++++++++
 docs_sources/using.md    |  3 ---
 tests/tpot_tests.py      | 42 +++++++++++++-----------------
 tpot/base.py             | 30 +++++----------------
 5 files changed, 81 insertions(+), 54 deletions(-)

diff --git a/docs_sources/api.md b/docs_sources/api.md
index 473509ac..e31c2883 100644
--- a/docs_sources/api.md
+++ b/docs_sources/api.md
@@ -80,8 +80,6 @@ Function used to evaluate the quality of a given pipeline for the classification
 <br /><br/>
 If you would like to use a custom scorer, you can pass the callable object/function with signature <em>scorer(estimator, X, y)</em>.
 <br /><br/>
-If you would like to use a metric function, you can pass the callable function to this parameter with the signature <em>score_func(y_true, y_pred)</em>. TPOT assumes that any function with "error" or "loss" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11.
-<br /><br/>
 See the section on <a href="../using/#scoring-functions">scoring functions</a> for more details.
 
 </blockquote>
@@ -573,8 +571,6 @@ Note that we recommend using the <em>neg</em> version of mean squared error and
 <br /><br/>
 If you would like to use a custom scorer, you can pass the callable object/function with signature <em>scorer(estimator, X, y)</em>.
 <br /><br/>
-If you would like to use a metric function, you can pass the callable function to this parameter with the signature <em>score_func(y_true, y_pred)</em>. TPOT assumes that any function with "error" or "loss" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11.
-<br /><br/>
 See the section on <a href="../using/#scoring-functions">scoring functions</a> for more details.
 </blockquote>
 
diff --git a/docs_sources/releases.md b/docs_sources/releases.md
index 52c600eb..6aa77a11 100644
--- a/docs_sources/releases.md
+++ b/docs_sources/releases.md
@@ -1,3 +1,59 @@
+# Version 0.11.0
+- **Support for Python 3.4 and below has been officially dropped.** Also support for scikit-learn 0.20 or below has been dropped.
+- The support of a metric function with the signature `score_func(y_true, y_pred)` for `scoring parameter` has been dropped.
+- Refine `StackingEstimator` for not stacking NaN/Infinity predication probabilities.
+- Fix a bug that population doesn't persist by `warm_start=True` when `max_time_mins` is not default value.
+- Now the `random_state` parameter in TPOT is used for pipeline evaluation instead of using a fixed random seed of 42 before. The `set_param_recursive` function has been moved to export_utils.py and it can be used in exported codes for setting `random_state` recursively in scikit-learn Pipeline. It is used to set `random_state` in `fitted_pipeline_` attribute and exported pipelines.
+- TPOT can independently use `generations` and `max_time_mins` to limit the optimization process through using one of the parameters or both.
+- `.export()` function will return string of exported pipeline if output filename is not specified.
+- Add [`SGDClassifier`](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html) and [`SGDRegressor`](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html) into TPOT default configs.
+- Fix errors in documentation
+
+# Version 0.10.2
+- **TPOT v0.10.2 is the last version to support Python 2.7 and Python 3.4.**
+- Minor updates for fixing compatibility issues with the latest version of scikit-learn (version > 0.21) and xgboost (v0.90)
+- Default value of `template` parameter is changed to `None` instead.
+- Fix errors in documentation
+
+# Version 0.10.1
+
+- Add `data_file_path` option into `expert` function for replacing `'PATH/TO/DATA/FILE'` to customized dataset path in exported scripts. (Related issue #838)
+- Change python version in CI tests to 3.7
+- Add CI tests for macOS.
+
+# Version 0.10.0
+
+- Add a new `template` option to specify a desired structure for machine learning pipeline in TPOT. Check [TPOT API](https://epistasislab.github.io/tpot/api/) (it will be updated once it is merge to master branch).
+- Add `FeatureSetSelector` operator into TPOT for feature selection based on *priori* export knowledge. Please check our [preprint paper](https://www.biorxiv.org/content/10.1101/502484v1.article-info) for more details (*Note: it was named `DatasetSelector` in 1st version paper but we will rename to FeatureSetSelector in next version of the paper*)
+- Refine `n_jobs` parameter to accept value below -1. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used.
+- Now `memory`  parameter can create memory cache directory if it does not exist.
+- Fix minor bugs.
+
+# Version 0.9.6
+
+- Fix a bug causing that `max_time_mins` parameter doesn't work when `use_dask=True` in TPOT 0.9.5
+- Now TPOT saves best pareto values best pareto pipeline s in checkpoint folder
+- TPOT raises `ImportError` if operators in the TPOT configuration are not available when `verbosity>2`
+- Thank @PGijsbers for the suggestions. Now TPOT can save scores of individuals already evaluated in any generation even the evaluation process of that generation is interrupted/stopped. But it is noted that, in this case, TPOT will raise this **warning message**: `WARNING: TPOT may not provide a good pipeline if TPOT is stopped/interrupted in a early generation.`, because the pipelines in early generation, e.g. 1st generation, are evolved/modified very limited times via evolutionary algorithm.
+- Fix bugs in configuration of `TPOTRegressor`
+- Error fixes in documentation
+
+# Version 0.9.5
+
+- **TPOT now supports integration with Dask for parallelization + smart caching**. Big thanks to the Dask dev team for making this happen!
+
+- TPOT now supports for imputation/sparse matrices into `predict` and `predict_proba` functions.
+
+- `TPOTClassifier` and `TPOTRegressor` now follows scikit-learn estimator API.
+
+- We refined scoring parameter in TPOT API for accepting [`Scorer` object](http://jaquesgrobler.github.io/online-sklearn-build/modules/generated/sklearn.metrics.Scorer.html).
+
+- We refined parameters in VarianceThreshold and FeatureAgglomeration.
+
+- TPOT now supports using memory caching within a Pipeline via a optional `memory` parameter.
+
+- We improved documentation of TPOT.
+
 # Version 0.9
 
 * **TPOT now supports sparse matrices** with a new built-in TPOT configuration, "TPOT sparse". We are using a custom OneHotEncoder implementation that supports missing values and continuous features.
diff --git a/docs_sources/using.md b/docs_sources/using.md
index 82ed0efc..f83a2652 100644
--- a/docs_sources/using.md
+++ b/docs_sources/using.md
@@ -387,9 +387,6 @@ print(tpot.score(X_test, y_test))
 tpot.export('tpot_digits_pipeline.py')
 ```
 
-- You can pass a metric function with the signature `score_func(y_true, y_pred)` (e.g. `my_custom_accuracy` in the example above), where `y_true` are the true target values and `y_pred` are the predicted target values from an estimator. To do this, you should implement your own function. See the example above for further explanation. TPOT assumes that any function with "error" or "loss" in the function name is meant to be minimized (`greater_is_better=False` in [`make_scorer`](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html)), whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11.
-
-
 * **my_module.scorer_name**: You can also use a custom `score_func(y_true, y_pred)` or `scorer(estimator, X, y)` function through the command line by adding the argument `-scoring my_module.scorer` to your command-line call. TPOT will import your module and use the custom scoring function from there. TPOT will include your current working directory when importing the module, so you can place it in the same directory where you are going to run TPOT.
 Example: `-scoring sklearn.metrics.auc` will use the function auc from sklearn.metrics module.
 
diff --git a/tests/tpot_tests.py b/tests/tpot_tests.py
index 2f8915dc..e9be5b5e 100644
--- a/tests/tpot_tests.py
+++ b/tests/tpot_tests.py
@@ -175,14 +175,9 @@ def test_init_default_scoring():
 
 
 def test_init_default_scoring_2():
-    """Assert that TPOT intitializes with a valid customized metric function."""
-    with warnings.catch_warnings(record=True) as w:
-        tpot_obj = TPOTClassifier(scoring=balanced_accuracy)
-        tpot_obj._fit_init()
-    assert len(w) == 1 # deap 1.2.2 warning message made this unit test failed
-    assert issubclass(w[-1].category, DeprecationWarning) # deap 1.2.2 warning message made this unit test failed
-    assert "This scoring type was deprecated" in str(w[-1].message) # deap 1.2.2 warning message made this unit test failed
-    assert tpot_obj.scoring_function._score_func == balanced_accuracy
+    """Assert that TPOT rasies ValueError with a invalid sklearn metric function."""
+    tpot_obj = TPOTClassifier(scoring=balanced_accuracy)
+    assert_raises(ValueError, tpot_obj._fit_init)
 
 
 def test_init_default_scoring_3():
@@ -207,28 +202,27 @@ def my_scorer(clf, X, y):
 
 
 def test_init_default_scoring_5():
-    """Assert that TPOT intitializes with a valid sklearn metric function roc_auc_score."""
-    with warnings.catch_warnings(record=True) as w:
-        tpot_obj = TPOTClassifier(scoring=roc_auc_score)
-        tpot_obj._fit_init()
-    assert len(w) == 1
-    assert issubclass(w[-1].category, DeprecationWarning)
-    assert "This scoring type was deprecated" in str(w[-1].message)
-    assert tpot_obj.scoring_function._score_func == roc_auc_score
+    """Assert that TPOT rasies ValueError with a invalid sklearn metric function roc_auc_score."""
+    tpot_obj = TPOTClassifier(scoring=roc_auc_score)
+    assert_raises(ValueError, tpot_obj._fit_init)
 
 
 def test_init_default_scoring_6():
-    """Assert that TPOT intitializes with a valid customized metric function in __main__"""
+    """Assert that TPOT rasies ValueError with a invalid sklearn metric function from __main__."""
     def my_scorer(y_true, y_pred):
         return roc_auc_score(y_true, y_pred)
-    with warnings.catch_warnings(record=True) as w:
-        tpot_obj = TPOTClassifier(scoring=my_scorer)
-        tpot_obj._fit_init()
-    assert len(w) == 1
-    assert issubclass(w[-1].category, DeprecationWarning)
-    assert "This scoring type was deprecated" in str(w[-1].message)
 
-    assert tpot_obj.scoring_function._score_func == my_scorer
+    tpot_obj = TPOTClassifier(scoring=my_scorer)
+    assert_raises(ValueError, tpot_obj._fit_init)
+
+
+def test_init_default_scoring_7():
+    """Assert that TPOT rasies ValueError with a valid sklearn metric function from __main__."""
+    def my_scorer(estimator, X, y):
+        return make_scorer(balanced_accuracy)
+
+    tpot_obj = TPOTClassifier(scoring=my_scorer)
+    tpot_obj._fit_init()
 
 
 def test_invalid_score_warning():
diff --git a/tpot/base.py b/tpot/base.py
index 6f256ff2..b2f6b1f0 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -52,7 +52,7 @@
 from sklearn.preprocessing import FunctionTransformer
 from sklearn.impute import SimpleImputer
 from sklearn.model_selection import train_test_split
-from sklearn.metrics.scorer import make_scorer, _BaseScorer
+from sklearn.metrics.scorer import _BaseScorer
 
 from joblib import Parallel, delayed, Memory
 
@@ -137,13 +137,6 @@ def __init__(self, generations=100, population_size=100, offspring_size=None,
 
             ['neg_median_absolute_error', 'neg_mean_absolute_error',
             'neg_mean_squared_error', 'r2']
-
-            If you would like to use a custom scoring function, you can pass a callable
-            function to this parameter with the signature scorer(y_true, y_pred).
-            See the section on scoring functions in the documentation for more details.
-
-            TPOT assumes that any custom scoring function with "error" or "loss" in the
-            name is meant to be minimized, whereas any other functions will be maximized.
         cv: int or cross-validation generator, optional (default: 5)
             If CV is a number, then it is the number of folds to evaluate each
             pipeline over in k-fold cross-validation during the TPOT optimization
@@ -308,25 +301,16 @@ def _setup_scoring_function(self, scoring):
             elif callable(scoring):
                 # Heuristic to ensure user has not passed a metric
                 module = getattr(scoring, '__module__', None)
-                if sys.version_info[0] < 3:
-                    if inspect.isfunction(scoring):
-                        args_list = inspect.getargspec(scoring)[0]
-                    else:
-                        args_list = inspect.getargspec(scoring.__call__)[0]
-                else:
-                    args_list = inspect.getfullargspec(scoring)[0]
+                args_list = inspect.getfullargspec(scoring)[0]
                 if args_list == ["y_true", "y_pred"] or (hasattr(module, 'startswith') and \
                     (module.startswith('sklearn.metrics.') or module.startswith('tpot.metrics')) and \
                     not module.startswith('sklearn.metrics.scorer') and \
                     not module.startswith('sklearn.metrics.tests.')):
-                    scoring_name = scoring.__name__
-                    greater_is_better = 'loss' not in scoring_name and 'error' not in scoring_name
-                    self.scoring_function = make_scorer(scoring, greater_is_better=greater_is_better)
-                    warnings.simplefilter('always', DeprecationWarning)
-                    warnings.warn('Scoring function {} looks like it is a metric function '
-                                  'rather than a scikit-learn scorer. This scoring type was deprecated '
-                                  'in version TPOT 0.9.1 and will be removed in version 0.11. '
-                                  'Please update your custom scoring function.'.format(scoring), DeprecationWarning)
+                    raise ValueError(
+                            'Scoring function {} looks like it is a metric function '
+                            'rather than a scikit-learn scorer. This scoring type was removed in version 0.11. '
+                            'Please update your custom scoring function.'.format(scoring)
+                            )
                 else:
                     self.scoring_function = scoring
 

From b119d82e1147f4469be4e26a4f68130f5ee487ec Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 14:49:36 -0500
Subject: [PATCH 38/44] fix a bug in eaMuPlusLambda #946

---
 tpot/gp_deap.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tpot/gp_deap.py b/tpot/gp_deap.py
index 71566ce0..a68c95e1 100644
--- a/tpot/gp_deap.py
+++ b/tpot/gp_deap.py
@@ -224,7 +224,7 @@ def eaMuPlusLambda(population, toolbox, mu, lambda_, cxpb, mutpb, ngen, pbar,
     for ind in population:
         initialize_stats_dict(ind)
 
-    population = toolbox.evaluate(population)
+    population[:] = toolbox.evaluate(population)
 
     record = stats.compile(population) if stats is not None else {}
     logbook.record(gen=0, nevals=len(population), **record)

From d0e87cc1e1ed084b3181b0d06871a118c6fdcced Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 15:05:00 -0500
Subject: [PATCH 39/44] refine self._pop #946

---
 tpot/base.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tpot/base.py b/tpot/base.py
index b2f6b1f0..d32d19cb 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -656,10 +656,8 @@ def fit(self, features, target, sample_weight=None, groups=None):
         self._toolbox.register('evaluate', self._evaluate_individuals, features=features, target=target, sample_weight=sample_weight, groups=groups)
 
         # assign population, self._pop can only be not None if warm_start is enabled
-        if self._pop:
-            pop = self._pop
-        else:
-            pop = self._toolbox.population(n=self.population_size)
+        if not self._pop:
+            self._pop = self._toolbox.population(n=self.population_size)
 
         def pareto_eq(ind1, ind2):
             """Determine whether two individuals are equal on the Pareto front.
@@ -704,7 +702,7 @@ def pareto_eq(ind1, ind2):
                 self._setup_memory()
                 warnings.simplefilter('ignore')
                 pop, _ = eaMuPlusLambda(
-                    population=pop,
+                    population=self._pop,
                     toolbox=self._toolbox,
                     mu=self.population_size,
                     lambda_=self._lambda,

From 96d6a61dc0123f98e85512aed714ffa2d50db993 Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 15:19:14 -0500
Subject: [PATCH 40/44] refine documentation

---
 docs/404.html                                |   24 +-
 docs/api/index.html                          |   38 +-
 docs/citing/index.html                       |   18 +-
 docs/contributing/index.html                 |   18 +-
 docs/css/highlight.css                       |  124 +
 docs/css/theme_extra.css                     |    5 +-
 docs/examples/index.html                     |  106 +-
 docs/index.html                              |   28 +-
 docs/installing/index.html                   |   26 +-
 docs/js/highlight.pack.js                    |    2 +
 docs/js/theme.js                             |   36 +-
 docs/related/index.html                      |   18 +-
 docs/releases/index.html                     |  114 +-
 docs/search.html                             |   44 +-
 docs/search/lunr.js                          | 2986 ------------------
 docs/search/lunr.min.js                      |    7 +
 docs/search/main.js                          |   96 -
 docs/search/mustache.min.js                  |    1 +
 docs/search/require.js                       |   36 +
 docs/search/search-results-template.mustache |    4 +
 docs/search/search.js                        |   92 +
 docs/search/search_index.json                |  250 +-
 docs/search/text.js                          |  390 +++
 docs/search/worker.js                        |  128 -
 docs/sitemap.xml                             |   51 +-
 docs/sitemap.xml.gz                          |  Bin 272 -> 0 bytes
 docs/support/index.html                      |   18 +-
 docs/using/index.html                        |   43 +-
 docs_sources/releases.md                     |    6 +-
 29 files changed, 1259 insertions(+), 3450 deletions(-)
 create mode 100644 docs/css/highlight.css
 create mode 100644 docs/js/highlight.pack.js
 delete mode 100644 docs/search/lunr.js
 create mode 100644 docs/search/lunr.min.js
 delete mode 100644 docs/search/main.js
 create mode 100644 docs/search/mustache.min.js
 create mode 100644 docs/search/require.js
 create mode 100644 docs/search/search-results-template.mustache
 create mode 100644 docs/search/search.js
 create mode 100644 docs/search/text.js
 delete mode 100644 docs/search/worker.js
 delete mode 100644 docs/sitemap.xml.gz

diff --git a/docs/404.html b/docs/404.html
index c277b485..009af1cd 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -13,12 +13,11 @@
 
   <link rel="stylesheet" href="/tpot/css/theme.css" type="text/css" />
   <link rel="stylesheet" href="/tpot/css/theme_extra.css" type="text/css" />
-  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
+  <link rel="stylesheet" href="/tpot/css/highlight.css">
   
-  <script src="/tpot/js/jquery-2.1.1.min.js" defer></script>
-  <script src="/tpot/js/modernizr-2.8.3.min.js" defer></script>
-  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
-  <script>hljs.initHighlightingOnLoad();</script> 
+  <script src="/tpot/js/jquery-2.1.1.min.js"></script>
+  <script src="/tpot/js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="/tpot/js/highlight.pack.js"></script> 
   
 </head>
 
@@ -29,10 +28,10 @@
     
     <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
       <div class="wy-side-nav-search">
-        <a href="/tpot/." class="icon icon-home"> TPOT</a>
+        <a href="/tpot/" class="icon icon-home"> TPOT</a>
         <div role="search">
   <form id ="rtd-search-form" class="wy-form" action="/tpot/search.html" method="get">
-    <input type="text" name="q" placeholder="Search docs" title="Type search term here" />
+    <input type="text" name="q" placeholder="Search docs" />
   </form>
 </div>
       </div>
@@ -43,7 +42,7 @@
           
             <li class="toctree-l1">
 		
-    <a class="" href="/tpot/.">Home</a>
+    <a class="" href="/tpot/">Home</a>
 	    </li>
           
             <li class="toctree-l1">
@@ -101,7 +100,7 @@
       
       <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
         <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
-        <a href="/tpot/.">TPOT</a>
+        <a href="/tpot/">TPOT</a>
       </nav>
 
       
@@ -109,7 +108,7 @@
         <div class="rst-content">
           <div role="navigation" aria-label="breadcrumbs navigation">
   <ul class="wy-breadcrumbs">
-    <li><a href="/tpot/.">Docs</a> &raquo;</li>
+    <li><a href="/tpot/">Docs</a> &raquo;</li>
     
     
     <li class="wy-breadcrumbs-aside">
@@ -161,8 +160,9 @@ <h1 id="404-page-not-found">404</h1>
     </span>
 </div>
     <script>var base_url = '/tpot';</script>
-    <script src="/tpot/js/theme.js" defer></script>
-      <script src="/tpot/search/main.js" defer></script>
+    <script src="/tpot/js/theme.js"></script>
+      <script src="/tpot/search/require.js"></script>
+      <script src="/tpot/search/search.js"></script>
 
 </body>
 </html>
diff --git a/docs/api/index.html b/docs/api/index.html
index 1119dfc3..0c6f1042 100644
--- a/docs/api/index.html
+++ b/docs/api/index.html
@@ -13,19 +13,18 @@
 
   <link rel="stylesheet" href="../css/theme.css" type="text/css" />
   <link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
-  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
+  <link rel="stylesheet" href="../css/highlight.css">
   
   <script>
     // Current page data
     var mkdocs_page_name = "TPOT API";
     var mkdocs_page_input_path = "api.md";
-    var mkdocs_page_url = "/tpot/api/";
+    var mkdocs_page_url = "/api/";
   </script>
   
-  <script src="../js/jquery-2.1.1.min.js" defer></script>
-  <script src="../js/modernizr-2.8.3.min.js" defer></script>
-  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
-  <script>hljs.initHighlightingOnLoad();</script> 
+  <script src="../js/jquery-2.1.1.min.js"></script>
+  <script src="../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../js/highlight.pack.js"></script> 
   
 </head>
 
@@ -39,7 +38,7 @@
         <a href=".." class="icon icon-home"> TPOT</a>
         <div role="search">
   <form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
-    <input type="text" name="q" placeholder="Search docs" title="Type search term here" />
+    <input type="text" name="q" placeholder="Search docs" />
   </form>
 </div>
       </div>
@@ -167,14 +166,14 @@ <h1 id="classification">Classification</h1>
 The TPOTClassifier will also search over the hyperparameters of all objects in the pipeline.</p>
 <p>By default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters.
 However, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the <code>config_dict</code> parameter.</p>
-<p>Read more in the <a href="using/#tpot-with-code">User Guide</a>.</p>
+<p>Read more in the <a href="../using/#tpot-with-code">User Guide</a>.</p>
 <table>
 <tr>
 <td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Parameters:</strong></td>
 <td width="80%" style="background:white;">
-<strong>generations</strong>: int, optional (default=100)
+<strong>generations</strong>: int or None optional (default=100)
 <blockquote>
-Number of iterations to the run pipeline optimization process. Must be a positive number.
+Number of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter <em>max_time_mins</em> must be defined as the runtime limit.
 <br /><br />
 Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.
 <br /><br />
@@ -220,8 +219,6 @@ <h1 id="classification">Classification</h1>
 <br /><br/>
 If you would like to use a custom scorer, you can pass the callable object/function with signature <em>scorer(estimator, X, y)</em>.
 <br /><br/>
-If you would like to use a metric function, you can pass the callable function to this parameter with the signature <em>score_func(y_true, y_pred)</em>. TPOT assumes that any function with "error" or "loss" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11.
-<br /><br/>
 See the section on <a href="../using/#scoring-functions">scoring functions</a> for more details.
 
 </blockquote>
@@ -255,7 +252,7 @@ <h1 id="classification">Classification</h1>
 <blockquote>
 How many minutes TPOT has to optimize the pipeline.
 <br /><br />
-If not None, this setting will override the <em>generations</em> parameter and allow TPOT to run until <em>max_time_mins</em> minutes elapse.
+If not None, this setting will allow TPOT to run until <em>max_time_mins</em> minutes elapsed and then stop. TPOT will stop earlier if <em>generations</em> is set and all generations are already evaluated.
 </blockquote>
 
 <strong>max_eval_time_mins</strong>: float, optional (default=5)
@@ -639,14 +636,14 @@ <h1 id="regression">Regression</h1>
 The TPOTRegressor will also search over the hyperparameters of all objects in the pipeline.</p>
 <p>By default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters.
 However, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the <code>config_dict</code> parameter.</p>
-<p>Read more in the <a href="using/#tpot-with-code">User Guide</a>.</p>
+<p>Read more in the <a href="../using/#tpot-with-code">User Guide</a>.</p>
 <table>
 <tr>
 <td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Parameters:</strong></td>
 <td width="80%" style="background:white;">
-<strong>generations</strong>: int, optional (default=100)
+<strong>generations</strong>: int or None, optional (default=100)
 <blockquote>
-Number of iterations to the run pipeline optimization process. Must be a positive number.
+Number of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter <em>max_time_mins</em> must be defined as the runtime limit.
 <br /><br />
 Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.
 <br /><br />
@@ -693,8 +690,6 @@ <h1 id="regression">Regression</h1>
 <br /><br/>
 If you would like to use a custom scorer, you can pass the callable object/function with signature <em>scorer(estimator, X, y)</em>.
 <br /><br/>
-If you would like to use a metric function, you can pass the callable function to this parameter with the signature <em>score_func(y_true, y_pred)</em>. TPOT assumes that any function with "error" or "loss" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11.
-<br /><br/>
 See the section on <a href="../using/#scoring-functions">scoring functions</a> for more details.
 </blockquote>
 
@@ -728,7 +723,7 @@ <h1 id="regression">Regression</h1>
 <blockquote>
 How many minutes TPOT has to optimize the pipeline.
 <br /><br />
-If not None, this setting will override the <em>generations</em> parameter and allow TPOT to run until <em>max_time_mins</em> minutes elapse.
+If not None, this setting will allow TPOT to run until <em>max_time_mins</em> minutes elapsed and then stop. TPOT will stop earlier if <em>generations</em> is set and all generations are already evaluated.
 </blockquote>
 
 <strong>max_eval_time_mins</strong>: float, optional (default=5)
@@ -1098,8 +1093,9 @@ <h1 id="regression">Regression</h1>
     </span>
 </div>
     <script>var base_url = '..';</script>
-    <script src="../js/theme.js" defer></script>
-      <script src="../search/main.js" defer></script>
+    <script src="../js/theme.js"></script>
+      <script src="../search/require.js"></script>
+      <script src="../search/search.js"></script>
 
 </body>
 </html>
diff --git a/docs/citing/index.html b/docs/citing/index.html
index c2fc98c7..2ed072e5 100644
--- a/docs/citing/index.html
+++ b/docs/citing/index.html
@@ -13,19 +13,18 @@
 
   <link rel="stylesheet" href="../css/theme.css" type="text/css" />
   <link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
-  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
+  <link rel="stylesheet" href="../css/highlight.css">
   
   <script>
     // Current page data
     var mkdocs_page_name = "Citing";
     var mkdocs_page_input_path = "citing.md";
-    var mkdocs_page_url = "/tpot/citing/";
+    var mkdocs_page_url = "/citing/";
   </script>
   
-  <script src="../js/jquery-2.1.1.min.js" defer></script>
-  <script src="../js/modernizr-2.8.3.min.js" defer></script>
-  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
-  <script>hljs.initHighlightingOnLoad();</script> 
+  <script src="../js/jquery-2.1.1.min.js"></script>
+  <script src="../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../js/highlight.pack.js"></script> 
   
 </head>
 
@@ -39,7 +38,7 @@
         <a href=".." class="icon icon-home"> TPOT</a>
         <div role="search">
   <form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
-    <input type="text" name="q" placeholder="Search docs" title="Type search term here" />
+    <input type="text" name="q" placeholder="Search docs" />
   </form>
 </div>
       </div>
@@ -224,8 +223,9 @@
     </span>
 </div>
     <script>var base_url = '..';</script>
-    <script src="../js/theme.js" defer></script>
-      <script src="../search/main.js" defer></script>
+    <script src="../js/theme.js"></script>
+      <script src="../search/require.js"></script>
+      <script src="../search/search.js"></script>
 
 </body>
 </html>
diff --git a/docs/contributing/index.html b/docs/contributing/index.html
index 8a288468..24a3a3a8 100644
--- a/docs/contributing/index.html
+++ b/docs/contributing/index.html
@@ -13,19 +13,18 @@
 
   <link rel="stylesheet" href="../css/theme.css" type="text/css" />
   <link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
-  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
+  <link rel="stylesheet" href="../css/highlight.css">
   
   <script>
     // Current page data
     var mkdocs_page_name = "Contributing";
     var mkdocs_page_input_path = "contributing.md";
-    var mkdocs_page_url = "/tpot/contributing/";
+    var mkdocs_page_url = "/contributing/";
   </script>
   
-  <script src="../js/jquery-2.1.1.min.js" defer></script>
-  <script src="../js/modernizr-2.8.3.min.js" defer></script>
-  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
-  <script>hljs.initHighlightingOnLoad();</script> 
+  <script src="../js/jquery-2.1.1.min.js"></script>
+  <script src="../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../js/highlight.pack.js"></script> 
   
 </head>
 
@@ -39,7 +38,7 @@
         <a href=".." class="icon icon-home"> TPOT</a>
         <div role="search">
   <form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
-    <input type="text" name="q" placeholder="Search docs" title="Type search term here" />
+    <input type="text" name="q" placeholder="Search docs" />
   </form>
 </div>
       </div>
@@ -305,8 +304,9 @@ <h2 id="after-submitting-your-pull-request">After submitting your pull request</
     </span>
 </div>
     <script>var base_url = '..';</script>
-    <script src="../js/theme.js" defer></script>
-      <script src="../search/main.js" defer></script>
+    <script src="../js/theme.js"></script>
+      <script src="../search/require.js"></script>
+      <script src="../search/search.js"></script>
 
 </body>
 </html>
diff --git a/docs/css/highlight.css b/docs/css/highlight.css
new file mode 100644
index 00000000..0ae40a72
--- /dev/null
+++ b/docs/css/highlight.css
@@ -0,0 +1,124 @@
+/*
+This is the GitHub theme for highlight.js
+
+github.com style (c) Vasily Polovnyov <vast@whiteants.net>
+
+*/
+
+.hljs {
+  display: block;
+  overflow-x: auto;
+  color: #333;
+  -webkit-text-size-adjust: none;
+}
+
+.hljs-comment,
+.diff .hljs-header,
+.hljs-javadoc {
+  color: #998;
+  font-style: italic;
+}
+
+.hljs-keyword,
+.css .rule .hljs-keyword,
+.hljs-winutils,
+.nginx .hljs-title,
+.hljs-subst,
+.hljs-request,
+.hljs-status {
+  color: #333;
+  font-weight: bold;
+}
+
+.hljs-number,
+.hljs-hexcolor,
+.ruby .hljs-constant {
+  color: #008080;
+}
+
+.hljs-string,
+.hljs-tag .hljs-value,
+.hljs-phpdoc,
+.hljs-dartdoc,
+.tex .hljs-formula {
+  color: #d14;
+}
+
+.hljs-title,
+.hljs-id,
+.scss .hljs-preprocessor {
+  color: #900;
+  font-weight: bold;
+}
+
+.hljs-list .hljs-keyword,
+.hljs-subst {
+  font-weight: normal;
+}
+
+.hljs-class .hljs-title,
+.hljs-type,
+.vhdl .hljs-literal,
+.tex .hljs-command {
+  color: #458;
+  font-weight: bold;
+}
+
+.hljs-tag,
+.hljs-tag .hljs-title,
+.hljs-rule .hljs-property,
+.django .hljs-tag .hljs-keyword {
+  color: #000080;
+  font-weight: normal;
+}
+
+.hljs-attribute,
+.hljs-variable,
+.lisp .hljs-body,
+.hljs-name {
+  color: #008080;
+}
+
+.hljs-regexp {
+  color: #009926;
+}
+
+.hljs-symbol,
+.ruby .hljs-symbol .hljs-string,
+.lisp .hljs-keyword,
+.clojure .hljs-keyword,
+.scheme .hljs-keyword,
+.tex .hljs-special,
+.hljs-prompt {
+  color: #990073;
+}
+
+.hljs-built_in {
+  color: #0086b3;
+}
+
+.hljs-preprocessor,
+.hljs-pragma,
+.hljs-pi,
+.hljs-doctype,
+.hljs-shebang,
+.hljs-cdata {
+  color: #999;
+  font-weight: bold;
+}
+
+.hljs-deletion {
+  background: #fdd;
+}
+
+.hljs-addition {
+  background: #dfd;
+}
+
+.diff .hljs-change {
+  background: #0086b3;
+}
+
+.hljs-chunk {
+  color: #aaa;
+}
diff --git a/docs/css/theme_extra.css b/docs/css/theme_extra.css
index ab107ba6..cf8123e3 100644
--- a/docs/css/theme_extra.css
+++ b/docs/css/theme_extra.css
@@ -128,11 +128,8 @@ pre .cs, pre .c {
  * Additions specific to the search functionality provided by MkDocs
  */
 
-.search-results {
-    margin-top: 23px;
-}
-
 .search-results article {
+    margin-top: 23px;
     border-top: 1px solid #E1E4E5;
     padding-top: 24px;
 }
diff --git a/docs/examples/index.html b/docs/examples/index.html
index 063c117a..75b43b91 100644
--- a/docs/examples/index.html
+++ b/docs/examples/index.html
@@ -13,19 +13,18 @@
 
   <link rel="stylesheet" href="../css/theme.css" type="text/css" />
   <link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
-  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
+  <link rel="stylesheet" href="../css/highlight.css">
   
   <script>
     // Current page data
     var mkdocs_page_name = "Examples";
     var mkdocs_page_input_path = "examples.md";
-    var mkdocs_page_url = "/tpot/examples/";
+    var mkdocs_page_url = "/examples/";
   </script>
   
-  <script src="../js/jquery-2.1.1.min.js" defer></script>
-  <script src="../js/modernizr-2.8.3.min.js" defer></script>
-  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
-  <script>hljs.initHighlightingOnLoad();</script> 
+  <script src="../js/jquery-2.1.1.min.js"></script>
+  <script src="../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../js/highlight.pack.js"></script> 
   
 </head>
 
@@ -39,7 +38,7 @@
         <a href=".." class="icon icon-home"> TPOT</a>
         <div role="search">
   <form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
-    <input type="text" name="q" placeholder="Search docs" title="Type search term here" />
+    <input type="text" name="q" placeholder="Search docs" />
   </form>
 </div>
       </div>
@@ -79,7 +78,7 @@
     <li class="toctree-l2"><a href="#iris-flower-classification">Iris flower classification</a></li>
     
 
-    <li class="toctree-l2"><a href="#mnist-digit-recognition">MNIST digit recognition</a></li>
+    <li class="toctree-l2"><a href="#digits-dataset">Digits dataset</a></li>
     
 
     <li class="toctree-l2"><a href="#boston-housing-prices-modeling">Boston housing prices modeling</a></li>
@@ -179,11 +178,11 @@ <h2 id="overview">Overview</h2>
 <td align="center"><a href="https://github.com/EpistasisLab/tpot/blob/master/tutorials/IRIS.ipynb">link</a></td>
 </tr>
 <tr>
-<td>MNIST</td>
+<td>Optical Recognition of Handwritten Digits</td>
 <td>digit recognition</td>
 <td>(image) classification</td>
-<td align="center"><a href="https://yann.lecun.com/exdb/mnist/">link</a></td>
-<td align="center"><a href="https://github.com/EpistasisLab/tpot/blob/master/tutorials/MNIST.ipynb">link</a></td>
+<td align="center"><a href="https://scikit-learn.org/stable/datasets/index.html#digits-dataset">link</a></td>
+<td align="center"><a href="https://github.com/EpistasisLab/tpot/blob/master/tutorials/Digits.ipynb">link</a></td>
 </tr>
 <tr>
 <td>Boston</td>
@@ -216,7 +215,7 @@ <h2 id="overview">Overview</h2>
 </tbody>
 </table>
 <p><strong>Notes:</strong>
-- For details on how the <code>fit()</code>, <code>score()</code> and <code>export()</code> methods work, refer to the <a href="/using/">usage documentation</a>.
+- For details on how the <code>fit()</code>, <code>score()</code> and <code>export()</code> methods work, refer to the <a href="../using/">usage documentation</a>.
 - Upon re-running the experiments, your resulting pipelines <em>may</em> differ (to some extent) from the ones demonstrated here.</p>
 <h2 id="iris-flower-classification">Iris flower classification</h2>
 <p>The following code illustrates how TPOT can be employed for performing a simple <em>classification task</em> over the Iris dataset.</p>
@@ -227,9 +226,9 @@ <h2 id="iris-flower-classification">Iris flower classification</h2>
 
 iris = load_iris()
 X_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64),
-    iris.target.astype(np.float64), train_size=0.75, test_size=0.25)
+    iris.target.astype(np.float64), train_size=0.75, test_size=0.25, random_state=42)
 
-tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2)
+tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=42)
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
 tpot.export('tpot_iris_pipeline.py')
@@ -237,39 +236,42 @@ <h2 id="iris-flower-classification">Iris flower classification</h2>
 
 <p>Running this code should discover a pipeline (exported as <code>tpot_iris_pipeline.py</code>) that achieves about 97% test accuracy:</p>
 <pre><code class="Python">import numpy as np
-
+import pandas as pd
 from sklearn.model_selection import train_test_split
-from sklearn.naive_bayes import GaussianNB
+from sklearn.neighbors import KNeighborsClassifier
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import Normalizer
+from tpot.export_utils import set_param_recursive
 
 # NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1),
-                     tpot_data.dtype.names.index('class'), axis=1)
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
-            train_test_split(features, tpot_data['target'], random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=42)
 
+# Average CV score on the training set was: 0.9826086956521738
 exported_pipeline = make_pipeline(
-    Normalizer(),
-    GaussianNB()
+    Normalizer(norm=&quot;l2&quot;),
+    KNeighborsClassifier(n_neighbors=5, p=2, weights=&quot;distance&quot;)
 )
+# Fix random state for all the steps in exported pipeline
+set_param_recursive(exported_pipeline.steps, 'random_state', 42)
 
 exported_pipeline.fit(training_features, training_target)
 results = exported_pipeline.predict(testing_features)
 </code></pre>
 
-<h2 id="mnist-digit-recognition">MNIST digit recognition</h2>
-<p>Below is a minimal working example with the practice MNIST dataset, which is an <em>image classification problem</em>.</p>
+<h2 id="digits-dataset">Digits dataset</h2>
+<p>Below is a minimal working example with the optical recognition of handwritten digits dataset, which is an <em>image classification problem</em>.</p>
 <pre><code class="Python">from tpot import TPOTClassifier
 from sklearn.datasets import load_digits
 from sklearn.model_selection import train_test_split
 
 digits = load_digits()
 X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,
-                                                    train_size=0.75, test_size=0.25)
+                                                    train_size=0.75, test_size=0.25, random_state=42)
 
-tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2)
+tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=42)
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
 tpot.export('tpot_digits_pipeline.py')
@@ -277,18 +279,29 @@ <h2 id="mnist-digit-recognition">MNIST digit recognition</h2>
 
 <p>Running this code should discover a pipeline (exported as <code>tpot_digits_pipeline.py</code>) that achieves about 98% test accuracy:</p>
 <pre><code class="Python">import numpy as np
-
+import pandas as pd
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
-from sklearn.neighbors import KNeighborsClassifier
+from sklearn.pipeline import make_pipeline, make_union
+from sklearn.preprocessing import PolynomialFeatures
+from tpot.builtins import StackingEstimator
+from tpot.export_utils import set_param_recursive
 
 # NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1),
-                     tpot_data.dtype.names.index('class'), axis=1)
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
-            train_test_split(features, tpot_data['target'], random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=42)
 
-exported_pipeline = KNeighborsClassifier(n_neighbors=6, weights=&quot;distance&quot;)
+# Average CV score on the training set was: 0.9799428471757372
+exported_pipeline = make_pipeline(
+    PolynomialFeatures(degree=2, include_bias=False, interaction_only=False),
+    StackingEstimator(estimator=LogisticRegression(C=0.1, dual=False, penalty=&quot;l1&quot;)),
+    RandomForestClassifier(bootstrap=True, criterion=&quot;entropy&quot;, max_features=0.35000000000000003, min_samples_leaf=20, min_samples_split=19, n_estimators=100)
+)
+# Fix random state for all the steps in exported pipeline
+set_param_recursive(exported_pipeline.steps, 'random_state', 42)
 
 exported_pipeline.fit(training_features, training_target)
 results = exported_pipeline.predict(testing_features)
@@ -302,9 +315,9 @@ <h2 id="boston-housing-prices-modeling">Boston housing prices modeling</h2>
 
 housing = load_boston()
 X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target,
-                                                    train_size=0.75, test_size=0.25)
+                                                    train_size=0.75, test_size=0.25, random_state=42)
 
-tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2)
+tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2, random_state=42)
 tpot.fit(X_train, y_train)
 print(tpot.score(X_test, y_test))
 tpot.export('tpot_boston_pipeline.py')
@@ -312,20 +325,26 @@ <h2 id="boston-housing-prices-modeling">Boston housing prices modeling</h2>
 
 <p>Running this code should discover a pipeline (exported as <code>tpot_boston_pipeline.py</code>) that achieves at least 10 mean squared error (MSE) on the test set:</p>
 <pre><code class="Python">import numpy as np
-
-from sklearn.ensemble import GradientBoostingRegressor
+import pandas as pd
+from sklearn.ensemble import ExtraTreesRegressor
 from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import PolynomialFeatures
+from tpot.export_utils import set_param_recursive
 
 # NOTE: Make sure that the outcome column is labeled 'target' in the data file
 tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
-features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1),
-                     tpot_data.dtype.names.index('class'), axis=1)
+features = tpot_data.drop('target', axis=1)
 training_features, testing_features, training_target, testing_target = \
-            train_test_split(features, tpot_data['target'], random_state=None)
+            train_test_split(features, tpot_data['target'], random_state=42)
 
-exported_pipeline = GradientBoostingRegressor(alpha=0.85, learning_rate=0.1, loss=&quot;ls&quot;,
-                                              max_features=0.9, min_samples_leaf=5,
-                                              min_samples_split=6)
+# Average CV score on the training set was: -10.812040755234403
+exported_pipeline = make_pipeline(
+    PolynomialFeatures(degree=2, include_bias=False, interaction_only=False),
+    ExtraTreesRegressor(bootstrap=False, max_features=0.5, min_samples_leaf=2, min_samples_split=3, n_estimators=100)
+)
+# Fix random state for all the steps in exported pipeline
+set_param_recursive(exported_pipeline.steps, 'random_state', 42)
 
 exported_pipeline.fit(training_features, training_target)
 results = exported_pipeline.predict(testing_features)
@@ -385,8 +404,9 @@ <h2 id="magic-gamma-telescope">MAGIC Gamma Telescope</h2>
     </span>
 </div>
     <script>var base_url = '..';</script>
-    <script src="../js/theme.js" defer></script>
-      <script src="../search/main.js" defer></script>
+    <script src="../js/theme.js"></script>
+      <script src="../search/require.js"></script>
+      <script src="../search/search.js"></script>
 
 </body>
 </html>
diff --git a/docs/index.html b/docs/index.html
index 5b77ddff..d09b60ed 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -7,25 +7,24 @@
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   <meta name="description" content="Documentation for TPOT, a Python Automated Machine Learning tool that optimizes machine learning pipelines using genetic programming.">
   <meta name="author" content="Randal S. Olson">
-  <link rel="shortcut icon" href="img/favicon.ico">
+  <link rel="shortcut icon" href="./img/favicon.ico">
   <title>Home - TPOT</title>
   <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
 
-  <link rel="stylesheet" href="css/theme.css" type="text/css" />
-  <link rel="stylesheet" href="css/theme_extra.css" type="text/css" />
-  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
+  <link rel="stylesheet" href="./css/theme.css" type="text/css" />
+  <link rel="stylesheet" href="./css/theme_extra.css" type="text/css" />
+  <link rel="stylesheet" href="./css/highlight.css">
   
   <script>
     // Current page data
     var mkdocs_page_name = "Home";
     var mkdocs_page_input_path = "index.md";
-    var mkdocs_page_url = "/tpot/";
+    var mkdocs_page_url = "/";
   </script>
   
-  <script src="js/jquery-2.1.1.min.js" defer></script>
-  <script src="js/modernizr-2.8.3.min.js" defer></script>
-  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
-  <script>hljs.initHighlightingOnLoad();</script> 
+  <script src="./js/jquery-2.1.1.min.js"></script>
+  <script src="./js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="./js/highlight.pack.js"></script> 
   
 </head>
 
@@ -39,7 +38,7 @@
         <a href="." class="icon icon-home"> TPOT</a>
         <div role="search">
   <form id ="rtd-search-form" class="wy-form" action="./search.html" method="get">
-    <input type="text" name="q" placeholder="Search docs" title="Type search term here" />
+    <input type="text" name="q" placeholder="Search docs" />
   </form>
 </div>
       </div>
@@ -205,13 +204,14 @@
     </span>
 </div>
     <script>var base_url = '.';</script>
-    <script src="js/theme.js" defer></script>
-      <script src="search/main.js" defer></script>
+    <script src="./js/theme.js"></script>
+      <script src="./search/require.js"></script>
+      <script src="./search/search.js"></script>
 
 </body>
 </html>
 
 <!--
-MkDocs version : 1.0.4
-Build Date UTC : 2019-07-16 16:15:53
+MkDocs version : 0.17.2
+Build Date UTC : 2019-11-05 20:18:31
 -->
diff --git a/docs/installing/index.html b/docs/installing/index.html
index 3771b635..c4018a2d 100644
--- a/docs/installing/index.html
+++ b/docs/installing/index.html
@@ -13,19 +13,18 @@
 
   <link rel="stylesheet" href="../css/theme.css" type="text/css" />
   <link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
-  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
+  <link rel="stylesheet" href="../css/highlight.css">
   
   <script>
     // Current page data
     var mkdocs_page_name = "Installation";
     var mkdocs_page_input_path = "installing.md";
-    var mkdocs_page_url = "/tpot/installing/";
+    var mkdocs_page_url = "/installing/";
   </script>
   
-  <script src="../js/jquery-2.1.1.min.js" defer></script>
-  <script src="../js/modernizr-2.8.3.min.js" defer></script>
-  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
-  <script>hljs.initHighlightingOnLoad();</script> 
+  <script src="../js/jquery-2.1.1.min.js"></script>
+  <script src="../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../js/highlight.pack.js"></script> 
   
 </head>
 
@@ -39,7 +38,7 @@
         <a href=".." class="icon icon-home"> TPOT</a>
         <div role="search">
   <form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
-    <input type="text" name="q" placeholder="Search docs" title="Type search term here" />
+    <input type="text" name="q" placeholder="Search docs" />
   </form>
 </div>
       </div>
@@ -175,17 +174,13 @@
 <pre><code class="Shell">pip install deap update_checker tqdm stopit
 </code></pre>
 
-<p><strong>For the Windows users</strong>, the pywin32 module is required if Python is NOT installed via the <a href="https://www.continuum.io/downloads">Anaconda Python distribution</a> and can be installed with <code>pip</code> for Python verion &lt;=3.3 or <code>conda</code> (e.g. miniconda) for any Python version:</p>
-<pre><code class="Shell">conda install pywin32
-</code></pre>
-
 <p><strong>Optionally</strong>, you can install <a href="https://github.com/dmlc/xgboost">XGBoost</a> if you would like TPOT to use the eXtreme Gradient Boosting models. XGBoost is entirely optional, and TPOT will still function normally without XGBoost if you do not have it installed. <strong>Windows users: pip installation may not work on some Windows environments, and it may cause unexpected errors.</strong></p>
 <pre><code class="Shell">pip install xgboost
 </code></pre>
 
 <p>If you have issues installing XGBoost, check the <a href="http://xgboost.readthedocs.io/en/latest/build.html">XGBoost installation documentation</a>.</p>
-<p>If you plan to use <a href="http://dask.pydata.org/en/latest/">Dask</a> for parallel training, make sure to install <a href="http://dask.pydata.org/en/latest/install.html">dask[delay]</a> and <a href="https://dask-ml.readthedocs.io/en/latest/install.html">dask_ml</a>.</p>
-<pre><code class="Shell">pip install dask[delayed] dask-ml
+<p>If you plan to use <a href="http://dask.pydata.org/en/latest/">Dask</a> for parallel training, make sure to install [dask[delay] and dask[dataframe]](https://docs.dask.org/en/latest/install.html) and <a href="https://dask-ml.readthedocs.io/en/latest/install.html">dask_ml</a>.</p>
+<pre><code class="Shell">pip install dask[delayed] dask[dataframe] dask-ml fsspec&gt;=0.3.3
 </code></pre>
 
 <p>If you plan to use the <a href="https://arxiv.org/abs/1702.01780">TPOT-MDR configuration</a>, make sure to install <a href="https://github.com/EpistasisLab/scikit-mdr">scikit-mdr</a> and <a href="https://github.com/EpistasisLab/scikit-rebate">scikit-rebate</a>:</p>
@@ -245,8 +240,9 @@
     </span>
 </div>
     <script>var base_url = '..';</script>
-    <script src="../js/theme.js" defer></script>
-      <script src="../search/main.js" defer></script>
+    <script src="../js/theme.js"></script>
+      <script src="../search/require.js"></script>
+      <script src="../search/search.js"></script>
 
 </body>
 </html>
diff --git a/docs/js/highlight.pack.js b/docs/js/highlight.pack.js
new file mode 100644
index 00000000..a5818dfb
--- /dev/null
+++ b/docs/js/highlight.pack.js
@@ -0,0 +1,2 @@
+!function(e){"undefined"!=typeof exports?e(exports):(window.hljs=e({}),"function"==typeof define&&define.amd&&define([],function(){return window.hljs}))}(function(e){function n(e){return e.replace(/&/gm,"&amp;").replace(/</gm,"&lt;").replace(/>/gm,"&gt;")}function t(e){return e.nodeName.toLowerCase()}function r(e,n){var t=e&&e.exec(n);return t&&0==t.index}function a(e){var n=(e.className+" "+(e.parentNode?e.parentNode.className:"")).split(/\s+/);return n=n.map(function(e){return e.replace(/^lang(uage)?-/,"")}),n.filter(function(e){return N(e)||/no(-?)highlight|plain|text/.test(e)})[0]}function i(e,n){var t,r={};for(t in e)r[t]=e[t];if(n)for(t in n)r[t]=n[t];return r}function o(e){var n=[];return function r(e,a){for(var i=e.firstChild;i;i=i.nextSibling)3==i.nodeType?a+=i.nodeValue.length:1==i.nodeType&&(n.push({event:"start",offset:a,node:i}),a=r(i,a),t(i).match(/br|hr|img|input/)||n.push({event:"stop",offset:a,node:i}));return a}(e,0),n}function u(e,r,a){function i(){return e.length&&r.length?e[0].offset!=r[0].offset?e[0].offset<r[0].offset?e:r:"start"==r[0].event?e:r:e.length?e:r}function o(e){function r(e){return" "+e.nodeName+'="'+n(e.value)+'"'}l+="<"+t(e)+Array.prototype.map.call(e.attributes,r).join("")+">"}function u(e){l+="</"+t(e)+">"}function c(e){("start"==e.event?o:u)(e.node)}for(var s=0,l="",f=[];e.length||r.length;){var g=i();if(l+=n(a.substr(s,g[0].offset-s)),s=g[0].offset,g==e){f.reverse().forEach(u);do c(g.splice(0,1)[0]),g=i();while(g==e&&g.length&&g[0].offset==s);f.reverse().forEach(o)}else"start"==g[0].event?f.push(g[0].node):f.pop(),c(g.splice(0,1)[0])}return l+n(a.substr(s))}function c(e){function n(e){return e&&e.source||e}function t(t,r){return new RegExp(n(t),"m"+(e.cI?"i":"")+(r?"g":""))}function r(a,o){if(!a.compiled){if(a.compiled=!0,a.k=a.k||a.bK,a.k){var u={},c=function(n,t){e.cI&&(t=t.toLowerCase()),t.split(" ").forEach(function(e){var t=e.split("|");u[t[0]]=[n,t[1]?Number(t[1]):1]})};"string"==typeof a.k?c("keyword",a.k):Object.keys(a.k).forEach(function(e){c(e,a.k[e])}),a.k=u}a.lR=t(a.l||/\b\w+\b/,!0),o&&(a.bK&&(a.b="\\b("+a.bK.split(" ").join("|")+")\\b"),a.b||(a.b=/\B|\b/),a.bR=t(a.b),a.e||a.eW||(a.e=/\B|\b/),a.e&&(a.eR=t(a.e)),a.tE=n(a.e)||"",a.eW&&o.tE&&(a.tE+=(a.e?"|":"")+o.tE)),a.i&&(a.iR=t(a.i)),void 0===a.r&&(a.r=1),a.c||(a.c=[]);var s=[];a.c.forEach(function(e){e.v?e.v.forEach(function(n){s.push(i(e,n))}):s.push("self"==e?a:e)}),a.c=s,a.c.forEach(function(e){r(e,a)}),a.starts&&r(a.starts,o);var l=a.c.map(function(e){return e.bK?"\\.?("+e.b+")\\.?":e.b}).concat([a.tE,a.i]).map(n).filter(Boolean);a.t=l.length?t(l.join("|"),!0):{exec:function(){return null}}}}r(e)}function s(e,t,a,i){function o(e,n){for(var t=0;t<n.c.length;t++)if(r(n.c[t].bR,e))return n.c[t]}function u(e,n){if(r(e.eR,n)){for(;e.endsParent&&e.parent;)e=e.parent;return e}return e.eW?u(e.parent,n):void 0}function f(e,n){return!a&&r(n.iR,e)}function g(e,n){var t=E.cI?n[0].toLowerCase():n[0];return e.k.hasOwnProperty(t)&&e.k[t]}function p(e,n,t,r){var a=r?"":x.classPrefix,i='<span class="'+a,o=t?"":"</span>";return i+=e+'">',i+n+o}function d(){if(!L.k)return n(y);var e="",t=0;L.lR.lastIndex=0;for(var r=L.lR.exec(y);r;){e+=n(y.substr(t,r.index-t));var a=g(L,r);a?(B+=a[1],e+=p(a[0],n(r[0]))):e+=n(r[0]),t=L.lR.lastIndex,r=L.lR.exec(y)}return e+n(y.substr(t))}function h(){if(L.sL&&!w[L.sL])return n(y);var e=L.sL?s(L.sL,y,!0,M[L.sL]):l(y);return L.r>0&&(B+=e.r),"continuous"==L.subLanguageMode&&(M[L.sL]=e.top),p(e.language,e.value,!1,!0)}function b(){return void 0!==L.sL?h():d()}function v(e,t){var r=e.cN?p(e.cN,"",!0):"";e.rB?(k+=r,y=""):e.eB?(k+=n(t)+r,y=""):(k+=r,y=t),L=Object.create(e,{parent:{value:L}})}function m(e,t){if(y+=e,void 0===t)return k+=b(),0;var r=o(t,L);if(r)return k+=b(),v(r,t),r.rB?0:t.length;var a=u(L,t);if(a){var i=L;i.rE||i.eE||(y+=t),k+=b();do L.cN&&(k+="</span>"),B+=L.r,L=L.parent;while(L!=a.parent);return i.eE&&(k+=n(t)),y="",a.starts&&v(a.starts,""),i.rE?0:t.length}if(f(t,L))throw new Error('Illegal lexeme "'+t+'" for mode "'+(L.cN||"<unnamed>")+'"');return y+=t,t.length||1}var E=N(e);if(!E)throw new Error('Unknown language: "'+e+'"');c(E);var R,L=i||E,M={},k="";for(R=L;R!=E;R=R.parent)R.cN&&(k=p(R.cN,"",!0)+k);var y="",B=0;try{for(var C,j,I=0;;){if(L.t.lastIndex=I,C=L.t.exec(t),!C)break;j=m(t.substr(I,C.index-I),C[0]),I=C.index+j}for(m(t.substr(I)),R=L;R.parent;R=R.parent)R.cN&&(k+="</span>");return{r:B,value:k,language:e,top:L}}catch(S){if(-1!=S.message.indexOf("Illegal"))return{r:0,value:n(t)};throw S}}function l(e,t){t=t||x.languages||Object.keys(w);var r={r:0,value:n(e)},a=r;return t.forEach(function(n){if(N(n)){var t=s(n,e,!1);t.language=n,t.r>a.r&&(a=t),t.r>r.r&&(a=r,r=t)}}),a.language&&(r.second_best=a),r}function f(e){return x.tabReplace&&(e=e.replace(/^((<[^>]+>|\t)+)/gm,function(e,n){return n.replace(/\t/g,x.tabReplace)})),x.useBR&&(e=e.replace(/\n/g,"<br>")),e}function g(e,n,t){var r=n?E[n]:t,a=[e.trim()];return e.match(/\bhljs\b/)||a.push("hljs"),-1===e.indexOf(r)&&a.push(r),a.join(" ").trim()}function p(e){var n=a(e);if(!/no(-?)highlight|plain|text/.test(n)){var t;x.useBR?(t=document.createElementNS("http://www.w3.org/1999/xhtml","div"),t.innerHTML=e.innerHTML.replace(/\n/g,"").replace(/<br[ \/]*>/g,"\n")):t=e;var r=t.textContent,i=n?s(n,r,!0):l(r),c=o(t);if(c.length){var p=document.createElementNS("http://www.w3.org/1999/xhtml","div");p.innerHTML=i.value,i.value=u(c,o(p),r)}i.value=f(i.value),e.innerHTML=i.value,e.className=g(e.className,n,i.language),e.result={language:i.language,re:i.r},i.second_best&&(e.second_best={language:i.second_best.language,re:i.second_best.r})}}function d(e){x=i(x,e)}function h(){if(!h.called){h.called=!0;var e=document.querySelectorAll("pre code");Array.prototype.forEach.call(e,p)}}function b(){addEventListener("DOMContentLoaded",h,!1),addEventListener("load",h,!1)}function v(n,t){var r=w[n]=t(e);r.aliases&&r.aliases.forEach(function(e){E[e]=n})}function m(){return Object.keys(w)}function N(e){return w[e]||w[E[e]]}var x={classPrefix:"hljs-",tabReplace:null,useBR:!1,languages:void 0},w={},E={};return e.highlight=s,e.highlightAuto=l,e.fixMarkup=f,e.highlightBlock=p,e.configure=d,e.initHighlighting=h,e.initHighlightingOnLoad=b,e.registerLanguage=v,e.listLanguages=m,e.getLanguage=N,e.inherit=i,e.IR="[a-zA-Z]\\w*",e.UIR="[a-zA-Z_]\\w*",e.NR="\\b\\d+(\\.\\d+)?",e.CNR="\\b(0[xX][a-fA-F0-9]+|(\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)",e.BNR="\\b(0b[01]+)",e.RSR="!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|-|-=|/=|/|:|;|<<|<<=|<=|<|===|==|=|>>>=|>>=|>=|>>>|>>|>|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~",e.BE={b:"\\\\[\\s\\S]",r:0},e.ASM={cN:"string",b:"'",e:"'",i:"\\n",c:[e.BE]},e.QSM={cN:"string",b:'"',e:'"',i:"\\n",c:[e.BE]},e.PWM={b:/\b(a|an|the|are|I|I'm|isn't|don't|doesn't|won't|but|just|should|pretty|simply|enough|gonna|going|wtf|so|such)\b/},e.C=function(n,t,r){var a=e.inherit({cN:"comment",b:n,e:t,c:[]},r||{});return a.c.push(e.PWM),a},e.CLCM=e.C("//","$"),e.CBCM=e.C("/\\*","\\*/"),e.HCM=e.C("#","$"),e.NM={cN:"number",b:e.NR,r:0},e.CNM={cN:"number",b:e.CNR,r:0},e.BNM={cN:"number",b:e.BNR,r:0},e.CSSNM={cN:"number",b:e.NR+"(%|em|ex|ch|rem|vw|vh|vmin|vmax|cm|mm|in|pt|pc|px|deg|grad|rad|turn|s|ms|Hz|kHz|dpi|dpcm|dppx)?",r:0},e.RM={cN:"regexp",b:/\//,e:/\/[gimuy]*/,i:/\n/,c:[e.BE,{b:/\[/,e:/\]/,r:0,c:[e.BE]}]},e.TM={cN:"title",b:e.IR,r:0},e.UTM={cN:"title",b:e.UIR,r:0},e});hljs.registerLanguage("objectivec",function(e){var t={cN:"built_in",b:"(AV|CA|CF|CG|CI|MK|MP|NS|UI)\\w+"},i={keyword:"int float while char export sizeof typedef const struct for union unsigned long volatile static bool mutable if do return goto void enum else break extern asm case short default double register explicit signed typename this switch continue wchar_t inline readonly assign readwrite self @synchronized id typeof nonatomic super unichar IBOutlet IBAction strong weak copy in out inout bycopy byref oneway __strong __weak __block __autoreleasing @private @protected @public @try @property @end @throw @catch @finally @autoreleasepool @synthesize @dynamic @selector @optional @required",literal:"false true FALSE TRUE nil YES NO NULL",built_in:"BOOL dispatch_once_t dispatch_queue_t dispatch_sync dispatch_async dispatch_once"},o=/[a-zA-Z@][a-zA-Z0-9_]*/,n="@interface @class @protocol @implementation";return{aliases:["m","mm","objc","obj-c"],k:i,l:o,i:"</",c:[t,e.CLCM,e.CBCM,e.CNM,e.QSM,{cN:"string",v:[{b:'@"',e:'"',i:"\\n",c:[e.BE]},{b:"'",e:"[^\\\\]'",i:"[^\\\\][^']"}]},{cN:"preprocessor",b:"#",e:"$",c:[{cN:"title",v:[{b:'"',e:'"'},{b:"<",e:">"}]}]},{cN:"class",b:"("+n.split(" ").join("|")+")\\b",e:"({|$)",eE:!0,k:n,l:o,c:[e.UTM]},{cN:"variable",b:"\\."+e.UIR,r:0}]}});hljs.registerLanguage("sql",function(e){var t=e.C("--","$");return{cI:!0,i:/[<>]/,c:[{cN:"operator",bK:"begin end start commit rollback savepoint lock alter create drop rename call delete do handler insert load replace select truncate update set show pragma grant merge describe use explain help declare prepare execute deallocate savepoint release unlock purge reset change stop analyze cache flush optimize repair kill install uninstall checksum restore check backup revoke",e:/;/,eW:!0,k:{keyword:"abs absolute acos action add adddate addtime aes_decrypt aes_encrypt after aggregate all allocate alter analyze and any are as asc ascii asin assertion at atan atan2 atn2 authorization authors avg backup before begin benchmark between bin binlog bit_and bit_count bit_length bit_or bit_xor both by cache call cascade cascaded case cast catalog ceil ceiling chain change changed char_length character_length charindex charset check checksum checksum_agg choose close coalesce coercibility collate collation collationproperty column columns columns_updated commit compress concat concat_ws concurrent connect connection connection_id consistent constraint constraints continue contributors conv convert convert_tz corresponding cos cot count count_big crc32 create cross cume_dist curdate current current_date current_time current_timestamp current_user cursor curtime data database databases datalength date_add date_format date_sub dateadd datediff datefromparts datename datepart datetime2fromparts datetimeoffsetfromparts day dayname dayofmonth dayofweek dayofyear deallocate declare decode default deferrable deferred degrees delayed delete des_decrypt des_encrypt des_key_file desc describe descriptor diagnostics difference disconnect distinct distinctrow div do domain double drop dumpfile each else elt enclosed encode encrypt end end-exec engine engines eomonth errors escape escaped event eventdata events except exception exec execute exists exp explain export_set extended external extract fast fetch field fields find_in_set first first_value floor flush for force foreign format found found_rows from from_base64 from_days from_unixtime full function get get_format get_lock getdate getutcdate global go goto grant grants greatest group group_concat grouping grouping_id gtid_subset gtid_subtract handler having help hex high_priority hosts hour ident_current ident_incr ident_seed identified identity if ifnull ignore iif ilike immediate in index indicator inet6_aton inet6_ntoa inet_aton inet_ntoa infile initially inner innodb input insert install instr intersect into is is_free_lock is_ipv4 is_ipv4_compat is_ipv4_mapped is_not is_not_null is_used_lock isdate isnull isolation join key kill language last last_day last_insert_id last_value lcase lead leading least leaves left len lenght level like limit lines ln load load_file local localtime localtimestamp locate lock log log10 log2 logfile logs low_priority lower lpad ltrim make_set makedate maketime master master_pos_wait match matched max md5 medium merge microsecond mid min minute mod mode module month monthname mutex name_const names national natural nchar next no no_write_to_binlog not now nullif nvarchar oct octet_length of old_password on only open optimize option optionally or ord order outer outfile output pad parse partial partition password patindex percent_rank percentile_cont percentile_disc period_add period_diff pi plugin position pow power pragma precision prepare preserve primary prior privileges procedure procedure_analyze processlist profile profiles public publishingservername purge quarter query quick quote quotename radians rand read references regexp relative relaylog release release_lock rename repair repeat replace replicate reset restore restrict return returns reverse revoke right rlike rollback rollup round row row_count rows rpad rtrim savepoint schema scroll sec_to_time second section select serializable server session session_user set sha sha1 sha2 share show sign sin size slave sleep smalldatetimefromparts snapshot some soname soundex sounds_like space sql sql_big_result sql_buffer_result sql_cache sql_calc_found_rows sql_no_cache sql_small_result sql_variant_property sqlstate sqrt square start starting status std stddev stddev_pop stddev_samp stdev stdevp stop str str_to_date straight_join strcmp string stuff subdate substr substring subtime subtring_index sum switchoffset sysdate sysdatetime sysdatetimeoffset system_user sysutcdatetime table tables tablespace tan temporary terminated tertiary_weights then time time_format time_to_sec timediff timefromparts timestamp timestampadd timestampdiff timezone_hour timezone_minute to to_base64 to_days to_seconds todatetimeoffset trailing transaction translation trigger trigger_nestlevel triggers trim truncate try_cast try_convert try_parse ucase uncompress uncompressed_length unhex unicode uninstall union unique unix_timestamp unknown unlock update upgrade upped upper usage use user user_resources using utc_date utc_time utc_timestamp uuid uuid_short validate_password_strength value values var var_pop var_samp variables variance varp version view warnings week weekday weekofyear weight_string when whenever where with work write xml xor year yearweek zon",literal:"true false null",built_in:"array bigint binary bit blob boolean char character date dec decimal float int integer interval number numeric real serial smallint varchar varying int8 serial8 text"},c:[{cN:"string",b:"'",e:"'",c:[e.BE,{b:"''"}]},{cN:"string",b:'"',e:'"',c:[e.BE,{b:'""'}]},{cN:"string",b:"`",e:"`",c:[e.BE]},e.CNM,e.CBCM,t]},e.CBCM,t]}});hljs.registerLanguage("javascript",function(e){return{aliases:["js"],k:{keyword:"in of if for while finally var new function do return void else break catch instanceof with throw case default try this switch continue typeof delete let yield const export super debugger as await",literal:"true false null undefined NaN Infinity",built_in:"eval isFinite isNaN parseFloat parseInt decodeURI decodeURIComponent encodeURI encodeURIComponent escape unescape Object Function Boolean Error EvalError InternalError RangeError ReferenceError StopIteration SyntaxError TypeError URIError Number Math Date String RegExp Array Float32Array Float64Array Int16Array Int32Array Int8Array Uint16Array Uint32Array Uint8Array Uint8ClampedArray ArrayBuffer DataView JSON Intl arguments require module console window document Symbol Set Map WeakSet WeakMap Proxy Reflect Promise"},c:[{cN:"pi",r:10,v:[{b:/^\s*('|")use strict('|")/},{b:/^\s*('|")use asm('|")/}]},e.ASM,e.QSM,{cN:"string",b:"`",e:"`",c:[e.BE,{cN:"subst",b:"\\$\\{",e:"\\}"}]},e.CLCM,e.CBCM,{cN:"number",b:"\\b(0[xXbBoO][a-fA-F0-9]+|(\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)",r:0},{b:"("+e.RSR+"|\\b(case|return|throw)\\b)\\s*",k:"return throw case",c:[e.CLCM,e.CBCM,e.RM,{b:/</,e:/>\s*[);\]]/,r:0,sL:"xml"}],r:0},{cN:"function",bK:"function",e:/\{/,eE:!0,c:[e.inherit(e.TM,{b:/[A-Za-z$_][0-9A-Za-z$_]*/}),{cN:"params",b:/\(/,e:/\)/,c:[e.CLCM,e.CBCM],i:/["'\(]/}],i:/\[|%/},{b:/\$[(.]/},{b:"\\."+e.IR,r:0},{bK:"import",e:"[;$]",k:"import from as",c:[e.ASM,e.QSM]},{cN:"class",bK:"class",e:/[{;=]/,eE:!0,i:/[:"\[\]]/,c:[{bK:"extends"},e.UTM]}]}});hljs.registerLanguage("scss",function(e){{var t="[a-zA-Z-][a-zA-Z0-9_-]*",i={cN:"variable",b:"(\\$"+t+")\\b"},r={cN:"function",b:t+"\\(",rB:!0,eE:!0,e:"\\("},o={cN:"hexcolor",b:"#[0-9A-Fa-f]+"};({cN:"attribute",b:"[A-Z\\_\\.\\-]+",e:":",eE:!0,i:"[^\\s]",starts:{cN:"value",eW:!0,eE:!0,c:[r,o,e.CSSNM,e.QSM,e.ASM,e.CBCM,{cN:"important",b:"!important"}]}})}return{cI:!0,i:"[=/|']",c:[e.CLCM,e.CBCM,r,{cN:"id",b:"\\#[A-Za-z0-9_-]+",r:0},{cN:"class",b:"\\.[A-Za-z0-9_-]+",r:0},{cN:"attr_selector",b:"\\[",e:"\\]",i:"$"},{cN:"tag",b:"\\b(a|abbr|acronym|address|area|article|aside|audio|b|base|big|blockquote|body|br|button|canvas|caption|cite|code|col|colgroup|command|datalist|dd|del|details|dfn|div|dl|dt|em|embed|fieldset|figcaption|figure|footer|form|frame|frameset|(h[1-6])|head|header|hgroup|hr|html|i|iframe|img|input|ins|kbd|keygen|label|legend|li|link|map|mark|meta|meter|nav|noframes|noscript|object|ol|optgroup|option|output|p|param|pre|progress|q|rp|rt|ruby|samp|script|section|select|small|span|strike|strong|style|sub|sup|table|tbody|td|textarea|tfoot|th|thead|time|title|tr|tt|ul|var|video)\\b",r:0},{cN:"pseudo",b:":(visited|valid|root|right|required|read-write|read-only|out-range|optional|only-of-type|only-child|nth-of-type|nth-last-of-type|nth-last-child|nth-child|not|link|left|last-of-type|last-child|lang|invalid|indeterminate|in-range|hover|focus|first-of-type|first-line|first-letter|first-child|first|enabled|empty|disabled|default|checked|before|after|active)"},{cN:"pseudo",b:"::(after|before|choices|first-letter|first-line|repeat-index|repeat-item|selection|value)"},i,{cN:"attribute",b:"\\b(z-index|word-wrap|word-spacing|word-break|width|widows|white-space|visibility|vertical-align|unicode-bidi|transition-timing-function|transition-property|transition-duration|transition-delay|transition|transform-style|transform-origin|transform|top|text-underline-position|text-transform|text-shadow|text-rendering|text-overflow|text-indent|text-decoration-style|text-decoration-line|text-decoration-color|text-decoration|text-align-last|text-align|tab-size|table-layout|right|resize|quotes|position|pointer-events|perspective-origin|perspective|page-break-inside|page-break-before|page-break-after|padding-top|padding-right|padding-left|padding-bottom|padding|overflow-y|overflow-x|overflow-wrap|overflow|outline-width|outline-style|outline-offset|outline-color|outline|orphans|order|opacity|object-position|object-fit|normal|none|nav-up|nav-right|nav-left|nav-index|nav-down|min-width|min-height|max-width|max-height|mask|marks|margin-top|margin-right|margin-left|margin-bottom|margin|list-style-type|list-style-position|list-style-image|list-style|line-height|letter-spacing|left|justify-content|initial|inherit|ime-mode|image-orientation|image-resolution|image-rendering|icon|hyphens|height|font-weight|font-variant-ligatures|font-variant|font-style|font-stretch|font-size-adjust|font-size|font-language-override|font-kerning|font-feature-settings|font-family|font|float|flex-wrap|flex-shrink|flex-grow|flex-flow|flex-direction|flex-basis|flex|filter|empty-cells|display|direction|cursor|counter-reset|counter-increment|content|column-width|column-span|column-rule-width|column-rule-style|column-rule-color|column-rule|column-gap|column-fill|column-count|columns|color|clip-path|clip|clear|caption-side|break-inside|break-before|break-after|box-sizing|box-shadow|box-decoration-break|bottom|border-width|border-top-width|border-top-style|border-top-right-radius|border-top-left-radius|border-top-color|border-top|border-style|border-spacing|border-right-width|border-right-style|border-right-color|border-right|border-radius|border-left-width|border-left-style|border-left-color|border-left|border-image-width|border-image-source|border-image-slice|border-image-repeat|border-image-outset|border-image|border-color|border-collapse|border-bottom-width|border-bottom-style|border-bottom-right-radius|border-bottom-left-radius|border-bottom-color|border-bottom|border|background-size|background-repeat|background-position|background-origin|background-image|background-color|background-clip|background-attachment|background-blend-mode|background|backface-visibility|auto|animation-timing-function|animation-play-state|animation-name|animation-iteration-count|animation-fill-mode|animation-duration|animation-direction|animation-delay|animation|align-self|align-items|align-content)\\b",i:"[^\\s]"},{cN:"value",b:"\\b(whitespace|wait|w-resize|visible|vertical-text|vertical-ideographic|uppercase|upper-roman|upper-alpha|underline|transparent|top|thin|thick|text|text-top|text-bottom|tb-rl|table-header-group|table-footer-group|sw-resize|super|strict|static|square|solid|small-caps|separate|se-resize|scroll|s-resize|rtl|row-resize|ridge|right|repeat|repeat-y|repeat-x|relative|progress|pointer|overline|outside|outset|oblique|nowrap|not-allowed|normal|none|nw-resize|no-repeat|no-drop|newspaper|ne-resize|n-resize|move|middle|medium|ltr|lr-tb|lowercase|lower-roman|lower-alpha|loose|list-item|line|line-through|line-edge|lighter|left|keep-all|justify|italic|inter-word|inter-ideograph|inside|inset|inline|inline-block|inherit|inactive|ideograph-space|ideograph-parenthesis|ideograph-numeric|ideograph-alpha|horizontal|hidden|help|hand|groove|fixed|ellipsis|e-resize|double|dotted|distribute|distribute-space|distribute-letter|distribute-all-lines|disc|disabled|default|decimal|dashed|crosshair|collapse|col-resize|circle|char|center|capitalize|break-word|break-all|bottom|both|bolder|bold|block|bidi-override|below|baseline|auto|always|all-scroll|absolute|table|table-cell)\\b"},{cN:"value",b:":",e:";",c:[r,i,o,e.CSSNM,e.QSM,e.ASM,{cN:"important",b:"!important"}]},{cN:"at_rule",b:"@",e:"[{;]",k:"mixin include extend for if else each while charset import debug media page content font-face namespace warn",c:[r,i,e.QSM,e.ASM,o,e.CSSNM,{cN:"preprocessor",b:"\\s[A-Za-z0-9_.-]+",r:0}]}]}});hljs.registerLanguage("mel",function(e){return{k:"int float string vector matrix if else switch case default while do for in break continue global proc return about abs addAttr addAttributeEditorNodeHelp addDynamic addNewShelfTab addPP addPanelCategory addPrefixToName advanceToNextDrivenKey affectedNet affects aimConstraint air alias aliasAttr align alignCtx alignCurve alignSurface allViewFit ambientLight angle angleBetween animCone animCurveEditor animDisplay animView annotate appendStringArray applicationName applyAttrPreset applyTake arcLenDimContext arcLengthDimension arclen arrayMapper art3dPaintCtx artAttrCtx artAttrPaintVertexCtx artAttrSkinPaintCtx artAttrTool artBuildPaintMenu artFluidAttrCtx artPuttyCtx artSelectCtx artSetPaintCtx artUserPaintCtx assignCommand assignInputDevice assignViewportFactories attachCurve attachDeviceAttr attachSurface attrColorSliderGrp attrCompatibility attrControlGrp attrEnumOptionMenu attrEnumOptionMenuGrp attrFieldGrp attrFieldSliderGrp attrNavigationControlGrp attrPresetEditWin attributeExists attributeInfo attributeMenu attributeQuery autoKeyframe autoPlace bakeClip bakeFluidShading bakePartialHistory bakeResults bakeSimulation basename basenameEx batchRender bessel bevel bevelPlus binMembership bindSkin blend2 blendShape blendShapeEditor blendShapePanel blendTwoAttr blindDataType boneLattice boundary boxDollyCtx boxZoomCtx bufferCurve buildBookmarkMenu buildKeyframeMenu button buttonManip CBG cacheFile cacheFileCombine cacheFileMerge cacheFileTrack camera cameraView canCreateManip canvas capitalizeString catch catchQuiet ceil changeSubdivComponentDisplayLevel changeSubdivRegion channelBox character characterMap characterOutlineEditor characterize chdir checkBox checkBoxGrp checkDefaultRenderGlobals choice circle circularFillet clamp clear clearCache clip clipEditor clipEditorCurrentTimeCtx clipSchedule clipSchedulerOutliner clipTrimBefore closeCurve closeSurface cluster cmdFileOutput cmdScrollFieldExecuter cmdScrollFieldReporter cmdShell coarsenSubdivSelectionList collision color colorAtPoint colorEditor colorIndex colorIndexSliderGrp colorSliderButtonGrp colorSliderGrp columnLayout commandEcho commandLine commandPort compactHairSystem componentEditor compositingInterop computePolysetVolume condition cone confirmDialog connectAttr connectControl connectDynamic connectJoint connectionInfo constrain constrainValue constructionHistory container containsMultibyte contextInfo control convertFromOldLayers convertIffToPsd convertLightmap convertSolidTx convertTessellation convertUnit copyArray copyFlexor copyKey copySkinWeights cos cpButton cpCache cpClothSet cpCollision cpConstraint cpConvClothToMesh cpForces cpGetSolverAttr cpPanel cpProperty cpRigidCollisionFilter cpSeam cpSetEdit cpSetSolverAttr cpSolver cpSolverTypes cpTool cpUpdateClothUVs createDisplayLayer createDrawCtx createEditor createLayeredPsdFile createMotionField createNewShelf createNode createRenderLayer createSubdivRegion cross crossProduct ctxAbort ctxCompletion ctxEditMode ctxTraverse currentCtx currentTime currentTimeCtx currentUnit curve curveAddPtCtx curveCVCtx curveEPCtx curveEditorCtx curveIntersect curveMoveEPCtx curveOnSurface curveSketchCtx cutKey cycleCheck cylinder dagPose date defaultLightListCheckBox defaultNavigation defineDataServer defineVirtualDevice deformer deg_to_rad delete deleteAttr deleteShadingGroupsAndMaterials deleteShelfTab deleteUI deleteUnusedBrushes delrandstr detachCurve detachDeviceAttr detachSurface deviceEditor devicePanel dgInfo dgdirty dgeval dgtimer dimWhen directKeyCtx directionalLight dirmap dirname disable disconnectAttr disconnectJoint diskCache displacementToPoly displayAffected displayColor displayCull displayLevelOfDetail displayPref displayRGBColor displaySmoothness displayStats displayString displaySurface distanceDimContext distanceDimension doBlur dolly dollyCtx dopeSheetEditor dot dotProduct doubleProfileBirailSurface drag dragAttrContext draggerContext dropoffLocator duplicate duplicateCurve duplicateSurface dynCache dynControl dynExport dynExpression dynGlobals dynPaintEditor dynParticleCtx dynPref dynRelEdPanel dynRelEditor dynamicLoad editAttrLimits editDisplayLayerGlobals editDisplayLayerMembers editRenderLayerAdjustment editRenderLayerGlobals editRenderLayerMembers editor editorTemplate effector emit emitter enableDevice encodeString endString endsWith env equivalent equivalentTol erf error eval evalDeferred evalEcho event exactWorldBoundingBox exclusiveLightCheckBox exec executeForEachObject exists exp expression expressionEditorListen extendCurve extendSurface extrude fcheck fclose feof fflush fgetline fgetword file fileBrowserDialog fileDialog fileExtension fileInfo filetest filletCurve filter filterCurve filterExpand filterStudioImport findAllIntersections findAnimCurves findKeyframe findMenuItem findRelatedSkinCluster finder firstParentOf fitBspline flexor floatEq floatField floatFieldGrp floatScrollBar floatSlider floatSlider2 floatSliderButtonGrp floatSliderGrp floor flow fluidCacheInfo fluidEmitter fluidVoxelInfo flushUndo fmod fontDialog fopen formLayout format fprint frameLayout fread freeFormFillet frewind fromNativePath fwrite gamma gauss geometryConstraint getApplicationVersionAsFloat getAttr getClassification getDefaultBrush getFileList getFluidAttr getInputDeviceRange getMayaPanelTypes getModifiers getPanel getParticleAttr getPluginResource getenv getpid glRender glRenderEditor globalStitch gmatch goal gotoBindPose grabColor gradientControl gradientControlNoAttr graphDollyCtx graphSelectContext graphTrackCtx gravity grid gridLayout group groupObjectsByName HfAddAttractorToAS HfAssignAS HfBuildEqualMap HfBuildFurFiles HfBuildFurImages HfCancelAFR HfConnectASToHF HfCreateAttractor HfDeleteAS HfEditAS HfPerformCreateAS HfRemoveAttractorFromAS HfSelectAttached HfSelectAttractors HfUnAssignAS hardenPointCurve hardware hardwareRenderPanel headsUpDisplay headsUpMessage help helpLine hermite hide hilite hitTest hotBox hotkey hotkeyCheck hsv_to_rgb hudButton hudSlider hudSliderButton hwReflectionMap hwRender hwRenderLoad hyperGraph hyperPanel hyperShade hypot iconTextButton iconTextCheckBox iconTextRadioButton iconTextRadioCollection iconTextScrollList iconTextStaticLabel ikHandle ikHandleCtx ikHandleDisplayScale ikSolver ikSplineHandleCtx ikSystem ikSystemInfo ikfkDisplayMethod illustratorCurves image imfPlugins inheritTransform insertJoint insertJointCtx insertKeyCtx insertKnotCurve insertKnotSurface instance instanceable instancer intField intFieldGrp intScrollBar intSlider intSliderGrp interToUI internalVar intersect iprEngine isAnimCurve isConnected isDirty isParentOf isSameObject isTrue isValidObjectName isValidString isValidUiName isolateSelect itemFilter itemFilterAttr itemFilterRender itemFilterType joint jointCluster jointCtx jointDisplayScale jointLattice keyTangent keyframe keyframeOutliner keyframeRegionCurrentTimeCtx keyframeRegionDirectKeyCtx keyframeRegionDollyCtx keyframeRegionInsertKeyCtx keyframeRegionMoveKeyCtx keyframeRegionScaleKeyCtx keyframeRegionSelectKeyCtx keyframeRegionSetKeyCtx keyframeRegionTrackCtx keyframeStats lassoContext lattice latticeDeformKeyCtx launch launchImageEditor layerButton layeredShaderPort layeredTexturePort layout layoutDialog lightList lightListEditor lightListPanel lightlink lineIntersection linearPrecision linstep listAnimatable listAttr listCameras listConnections listDeviceAttachments listHistory listInputDeviceAxes listInputDeviceButtons listInputDevices listMenuAnnotation listNodeTypes listPanelCategories listRelatives listSets listTransforms listUnselected listerEditor loadFluid loadNewShelf loadPlugin loadPluginLanguageResources loadPrefObjects localizedPanelLabel lockNode loft log longNameOf lookThru ls lsThroughFilter lsType lsUI Mayatomr mag makeIdentity makeLive makePaintable makeRoll makeSingleSurface makeTubeOn makebot manipMoveContext manipMoveLimitsCtx manipOptions manipRotateContext manipRotateLimitsCtx manipScaleContext manipScaleLimitsCtx marker match max memory menu menuBarLayout menuEditor menuItem menuItemToShelf menuSet menuSetPref messageLine min minimizeApp mirrorJoint modelCurrentTimeCtx modelEditor modelPanel mouse movIn movOut move moveIKtoFK moveKeyCtx moveVertexAlongDirection multiProfileBirailSurface mute nParticle nameCommand nameField namespace namespaceInfo newPanelItems newton nodeCast nodeIconButton nodeOutliner nodePreset nodeType noise nonLinear normalConstraint normalize nurbsBoolean nurbsCopyUVSet nurbsCube nurbsEditUV nurbsPlane nurbsSelect nurbsSquare nurbsToPoly nurbsToPolygonsPref nurbsToSubdiv nurbsToSubdivPref nurbsUVSet nurbsViewDirectionVector objExists objectCenter objectLayer objectType objectTypeUI obsoleteProc oceanNurbsPreviewPlane offsetCurve offsetCurveOnSurface offsetSurface openGLExtension openMayaPref optionMenu optionMenuGrp optionVar orbit orbitCtx orientConstraint outlinerEditor outlinerPanel overrideModifier paintEffectsDisplay pairBlend palettePort paneLayout panel panelConfiguration panelHistory paramDimContext paramDimension paramLocator parent parentConstraint particle particleExists particleInstancer particleRenderInfo partition pasteKey pathAnimation pause pclose percent performanceOptions pfxstrokes pickWalk picture pixelMove planarSrf plane play playbackOptions playblast plugAttr plugNode pluginInfo pluginResourceUtil pointConstraint pointCurveConstraint pointLight pointMatrixMult pointOnCurve pointOnSurface pointPosition poleVectorConstraint polyAppend polyAppendFacetCtx polyAppendVertex polyAutoProjection polyAverageNormal polyAverageVertex polyBevel polyBlendColor polyBlindData polyBoolOp polyBridgeEdge polyCacheMonitor polyCheck polyChipOff polyClipboard polyCloseBorder polyCollapseEdge polyCollapseFacet polyColorBlindData polyColorDel polyColorPerVertex polyColorSet polyCompare polyCone polyCopyUV polyCrease polyCreaseCtx polyCreateFacet polyCreateFacetCtx polyCube polyCut polyCutCtx polyCylinder polyCylindricalProjection polyDelEdge polyDelFacet polyDelVertex polyDuplicateAndConnect polyDuplicateEdge polyEditUV polyEditUVShell polyEvaluate polyExtrudeEdge polyExtrudeFacet polyExtrudeVertex polyFlipEdge polyFlipUV polyForceUV polyGeoSampler polyHelix polyInfo polyInstallAction polyLayoutUV polyListComponentConversion polyMapCut polyMapDel polyMapSew polyMapSewMove polyMergeEdge polyMergeEdgeCtx polyMergeFacet polyMergeFacetCtx polyMergeUV polyMergeVertex polyMirrorFace polyMoveEdge polyMoveFacet polyMoveFacetUV polyMoveUV polyMoveVertex polyNormal polyNormalPerVertex polyNormalizeUV polyOptUvs polyOptions polyOutput polyPipe polyPlanarProjection polyPlane polyPlatonicSolid polyPoke polyPrimitive polyPrism polyProjection polyPyramid polyQuad polyQueryBlindData polyReduce polySelect polySelectConstraint polySelectConstraintMonitor polySelectCtx polySelectEditCtx polySeparate polySetToFaceNormal polySewEdge polyShortestPathCtx polySmooth polySoftEdge polySphere polySphericalProjection polySplit polySplitCtx polySplitEdge polySplitRing polySplitVertex polyStraightenUVBorder polySubdivideEdge polySubdivideFacet polyToSubdiv polyTorus polyTransfer polyTriangulate polyUVSet polyUnite polyWedgeFace popen popupMenu pose pow preloadRefEd print progressBar progressWindow projFileViewer projectCurve projectTangent projectionContext projectionManip promptDialog propModCtx propMove psdChannelOutliner psdEditTextureFile psdExport psdTextureFile putenv pwd python querySubdiv quit rad_to_deg radial radioButton radioButtonGrp radioCollection radioMenuItemCollection rampColorPort rand randomizeFollicles randstate rangeControl readTake rebuildCurve rebuildSurface recordAttr recordDevice redo reference referenceEdit referenceQuery refineSubdivSelectionList refresh refreshAE registerPluginResource rehash reloadImage removeJoint removeMultiInstance removePanelCategory rename renameAttr renameSelectionList renameUI render renderGlobalsNode renderInfo renderLayerButton renderLayerParent renderLayerPostProcess renderLayerUnparent renderManip renderPartition renderQualityNode renderSettings renderThumbnailUpdate renderWindowEditor renderWindowSelectContext renderer reorder reorderDeformers requires reroot resampleFluid resetAE resetPfxToPolyCamera resetTool resolutionNode retarget reverseCurve reverseSurface revolve rgb_to_hsv rigidBody rigidSolver roll rollCtx rootOf rot rotate rotationInterpolation roundConstantRadius rowColumnLayout rowLayout runTimeCommand runup sampleImage saveAllShelves saveAttrPreset saveFluid saveImage saveInitialState saveMenu savePrefObjects savePrefs saveShelf saveToolSettings scale scaleBrushBrightness scaleComponents scaleConstraint scaleKey scaleKeyCtx sceneEditor sceneUIReplacement scmh scriptCtx scriptEditorInfo scriptJob scriptNode scriptTable scriptToShelf scriptedPanel scriptedPanelType scrollField scrollLayout sculpt searchPathArray seed selLoadSettings select selectContext selectCurveCV selectKey selectKeyCtx selectKeyframeRegionCtx selectMode selectPref selectPriority selectType selectedNodes selectionConnection separator setAttr setAttrEnumResource setAttrMapping setAttrNiceNameResource setConstraintRestPosition setDefaultShadingGroup setDrivenKeyframe setDynamic setEditCtx setEditor setFluidAttr setFocus setInfinity setInputDeviceMapping setKeyCtx setKeyPath setKeyframe setKeyframeBlendshapeTargetWts setMenuMode setNodeNiceNameResource setNodeTypeFlag setParent setParticleAttr setPfxToPolyCamera setPluginResource setProject setStampDensity setStartupMessage setState setToolTo setUITemplate setXformManip sets shadingConnection shadingGeometryRelCtx shadingLightRelCtx shadingNetworkCompare shadingNode shapeCompare shelfButton shelfLayout shelfTabLayout shellField shortNameOf showHelp showHidden showManipCtx showSelectionInTitle showShadingGroupAttrEditor showWindow sign simplify sin singleProfileBirailSurface size sizeBytes skinCluster skinPercent smoothCurve smoothTangentSurface smoothstep snap2to2 snapKey snapMode snapTogetherCtx snapshot soft softMod softModCtx sort sound soundControl source spaceLocator sphere sphrand spotLight spotLightPreviewPort spreadSheetEditor spring sqrt squareSurface srtContext stackTrace startString startsWith stitchAndExplodeShell stitchSurface stitchSurfacePoints strcmp stringArrayCatenate stringArrayContains stringArrayCount stringArrayInsertAtIndex stringArrayIntersector stringArrayRemove stringArrayRemoveAtIndex stringArrayRemoveDuplicates stringArrayRemoveExact stringArrayToString stringToStringArray strip stripPrefixFromName stroke subdAutoProjection subdCleanTopology subdCollapse subdDuplicateAndConnect subdEditUV subdListComponentConversion subdMapCut subdMapSewMove subdMatchTopology subdMirror subdToBlind subdToPoly subdTransferUVsToCache subdiv subdivCrease subdivDisplaySmoothness substitute substituteAllString substituteGeometry substring surface surfaceSampler surfaceShaderList swatchDisplayPort switchTable symbolButton symbolCheckBox sysFile system tabLayout tan tangentConstraint texLatticeDeformContext texManipContext texMoveContext texMoveUVShellContext texRotateContext texScaleContext texSelectContext texSelectShortestPathCtx texSmudgeUVContext texWinToolCtx text textCurves textField textFieldButtonGrp textFieldGrp textManip textScrollList textToShelf textureDisplacePlane textureHairColor texturePlacementContext textureWindow threadCount threePointArcCtx timeControl timePort timerX toNativePath toggle toggleAxis toggleWindowVisibility tokenize tokenizeList tolerance tolower toolButton toolCollection toolDropped toolHasOptions toolPropertyWindow torus toupper trace track trackCtx transferAttributes transformCompare transformLimits translator trim trunc truncateFluidCache truncateHairCache tumble tumbleCtx turbulence twoPointArcCtx uiRes uiTemplate unassignInputDevice undo undoInfo ungroup uniform unit unloadPlugin untangleUV untitledFileName untrim upAxis updateAE userCtx uvLink uvSnapshot validateShelfName vectorize view2dToolCtx viewCamera viewClipPlane viewFit viewHeadOn viewLookAt viewManip viewPlace viewSet visor volumeAxis vortex waitCursor warning webBrowser webBrowserPrefs whatIs window windowPref wire wireContext workspace wrinkle wrinkleContext writeTake xbmLangPathList xform",i:"</",c:[e.CNM,e.ASM,e.QSM,{cN:"string",b:"`",e:"`",c:[e.BE]},{cN:"variable",v:[{b:"\\$\\d"},{b:"[\\$\\%\\@](\\^\\w\\b|#\\w+|[^\\s\\w{]|{\\w+}|\\w+)"},{b:"\\*(\\^\\w\\b|#\\w+|[^\\s\\w{]|{\\w+}|\\w+)",r:0}]},e.CLCM,e.CBCM]}});hljs.registerLanguage("d",function(e){var r={keyword:"abstract alias align asm assert auto body break byte case cast catch class const continue debug default delete deprecated do else enum export extern final finally for foreach foreach_reverse|10 goto if immutable import in inout int interface invariant is lazy macro mixin module new nothrow out override package pragma private protected public pure ref return scope shared static struct super switch synchronized template this throw try typedef typeid typeof union unittest version void volatile while with __FILE__ __LINE__ __gshared|10 __thread __traits __DATE__ __EOF__ __TIME__ __TIMESTAMP__ __VENDOR__ __VERSION__",built_in:"bool cdouble cent cfloat char creal dchar delegate double dstring float function idouble ifloat ireal long real short string ubyte ucent uint ulong ushort wchar wstring",literal:"false null true"},t="(0|[1-9][\\d_]*)",a="(0|[1-9][\\d_]*|\\d[\\d_]*|[\\d_]+?\\d)",i="0[bB][01_]+",n="([\\da-fA-F][\\da-fA-F_]*|_[\\da-fA-F][\\da-fA-F_]*)",c="0[xX]"+n,_="([eE][+-]?"+a+")",d="("+a+"(\\.\\d*|"+_+")|\\d+\\."+a+a+"|\\."+t+_+"?)",o="(0[xX]("+n+"\\."+n+"|\\.?"+n+")[pP][+-]?"+a+")",s="("+t+"|"+i+"|"+c+")",l="("+o+"|"+d+")",u="\\\\(['\"\\?\\\\abfnrtv]|u[\\dA-Fa-f]{4}|[0-7]{1,3}|x[\\dA-Fa-f]{2}|U[\\dA-Fa-f]{8})|&[a-zA-Z\\d]{2,};",b={cN:"number",b:"\\b"+s+"(L|u|U|Lu|LU|uL|UL)?",r:0},f={cN:"number",b:"\\b("+l+"([fF]|L|i|[fF]i|Li)?|"+s+"(i|[fF]i|Li))",r:0},g={cN:"string",b:"'("+u+"|.)",e:"'",i:"."},h={b:u,r:0},p={cN:"string",b:'"',c:[h],e:'"[cwd]?'},w={cN:"string",b:'[rq]"',e:'"[cwd]?',r:5},N={cN:"string",b:"`",e:"`[cwd]?"},A={cN:"string",b:'x"[\\da-fA-F\\s\\n\\r]*"[cwd]?',r:10},F={cN:"string",b:'q"\\{',e:'\\}"'},m={cN:"shebang",b:"^#!",e:"$",r:5},y={cN:"preprocessor",b:"#(line)",e:"$",r:5},L={cN:"keyword",b:"@[a-zA-Z_][a-zA-Z_\\d]*"},v=e.C("\\/\\+","\\+\\/",{c:["self"],r:10});return{l:e.UIR,k:r,c:[e.CLCM,e.CBCM,v,A,p,w,N,F,f,b,g,m,y,L]}});hljs.registerLanguage("ruleslanguage",function(T){return{k:{keyword:"BILL_PERIOD BILL_START BILL_STOP RS_EFFECTIVE_START RS_EFFECTIVE_STOP RS_JURIS_CODE RS_OPCO_CODE INTDADDATTRIBUTE|5 INTDADDVMSG|5 INTDBLOCKOP|5 INTDBLOCKOPNA|5 INTDCLOSE|5 INTDCOUNT|5 INTDCOUNTSTATUSCODE|5 INTDCREATEMASK|5 INTDCREATEDAYMASK|5 INTDCREATEFACTORMASK|5 INTDCREATEHANDLE|5 INTDCREATEOVERRIDEDAYMASK|5 INTDCREATEOVERRIDEMASK|5 INTDCREATESTATUSCODEMASK|5 INTDCREATETOUPERIOD|5 INTDDELETE|5 INTDDIPTEST|5 INTDEXPORT|5 INTDGETERRORCODE|5 INTDGETERRORMESSAGE|5 INTDISEQUAL|5 INTDJOIN|5 INTDLOAD|5 INTDLOADACTUALCUT|5 INTDLOADDATES|5 INTDLOADHIST|5 INTDLOADLIST|5 INTDLOADLISTDATES|5 INTDLOADLISTENERGY|5 INTDLOADLISTHIST|5 INTDLOADRELATEDCHANNEL|5 INTDLOADSP|5 INTDLOADSTAGING|5 INTDLOADUOM|5 INTDLOADUOMDATES|5 INTDLOADUOMHIST|5 INTDLOADVERSION|5 INTDOPEN|5 INTDREADFIRST|5 INTDREADNEXT|5 INTDRECCOUNT|5 INTDRELEASE|5 INTDREPLACE|5 INTDROLLAVG|5 INTDROLLPEAK|5 INTDSCALAROP|5 INTDSCALE|5 INTDSETATTRIBUTE|5 INTDSETDSTPARTICIPANT|5 INTDSETSTRING|5 INTDSETVALUE|5 INTDSETVALUESTATUS|5 INTDSHIFTSTARTTIME|5 INTDSMOOTH|5 INTDSORT|5 INTDSPIKETEST|5 INTDSUBSET|5 INTDTOU|5 INTDTOURELEASE|5 INTDTOUVALUE|5 INTDUPDATESTATS|5 INTDVALUE|5 STDEV INTDDELETEEX|5 INTDLOADEXACTUAL|5 INTDLOADEXCUT|5 INTDLOADEXDATES|5 INTDLOADEX|5 INTDLOADEXRELATEDCHANNEL|5 INTDSAVEEX|5 MVLOAD|5 MVLOADACCT|5 MVLOADACCTDATES|5 MVLOADACCTHIST|5 MVLOADDATES|5 MVLOADHIST|5 MVLOADLIST|5 MVLOADLISTDATES|5 MVLOADLISTHIST|5 IF FOR NEXT DONE SELECT END CALL ABORT CLEAR CHANNEL FACTOR LIST NUMBER OVERRIDE SET WEEK DISTRIBUTIONNODE ELSE WHEN THEN OTHERWISE IENUM CSV INCLUDE LEAVE RIDER SAVE DELETE NOVALUE SECTION WARN SAVE_UPDATE DETERMINANT LABEL REPORT REVENUE EACH IN FROM TOTAL CHARGE BLOCK AND OR CSV_FILE RATE_CODE AUXILIARY_DEMAND UIDACCOUNT RS BILL_PERIOD_SELECT HOURS_PER_MONTH INTD_ERROR_STOP SEASON_SCHEDULE_NAME ACCOUNTFACTOR ARRAYUPPERBOUND CALLSTOREDPROC GETADOCONNECTION GETCONNECT GETDATASOURCE GETQUALIFIER GETUSERID HASVALUE LISTCOUNT LISTOP LISTUPDATE LISTVALUE PRORATEFACTOR RSPRORATE SETBINPATH SETDBMONITOR WQ_OPEN BILLINGHOURS DATE DATEFROMFLOAT DATETIMEFROMSTRING DATETIMETOSTRING DATETOFLOAT DAY DAYDIFF DAYNAME DBDATETIME HOUR MINUTE MONTH MONTHDIFF MONTHHOURS MONTHNAME ROUNDDATE SAMEWEEKDAYLASTYEAR SECOND WEEKDAY WEEKDIFF YEAR YEARDAY YEARSTR COMPSUM HISTCOUNT HISTMAX HISTMIN HISTMINNZ HISTVALUE MAXNRANGE MAXRANGE MINRANGE COMPIKVA COMPKVA COMPKVARFROMKQKW COMPLF IDATTR FLAG LF2KW LF2KWH MAXKW POWERFACTOR READING2USAGE AVGSEASON MAXSEASON MONTHLYMERGE SEASONVALUE SUMSEASON ACCTREADDATES ACCTTABLELOAD CONFIGADD CONFIGGET CREATEOBJECT CREATEREPORT EMAILCLIENT EXPBLKMDMUSAGE EXPMDMUSAGE EXPORT_USAGE FACTORINEFFECT GETUSERSPECIFIEDSTOP INEFFECT ISHOLIDAY RUNRATE SAVE_PROFILE SETREPORTTITLE USEREXIT WATFORRUNRATE TO TABLE ACOS ASIN ATAN ATAN2 BITAND CEIL COS COSECANT COSH COTANGENT DIVQUOT DIVREM EXP FABS FLOOR FMOD FREPM FREXPN LOG LOG10 MAX MAXN MIN MINNZ MODF POW ROUND ROUND2VALUE ROUNDINT SECANT SIN SINH SQROOT TAN TANH FLOAT2STRING FLOAT2STRINGNC INSTR LEFT LEN LTRIM MID RIGHT RTRIM STRING STRINGNC TOLOWER TOUPPER TRIM NUMDAYS READ_DATE STAGING",built_in:"IDENTIFIER OPTIONS XML_ELEMENT XML_OP XML_ELEMENT_OF DOMDOCCREATE DOMDOCLOADFILE DOMDOCLOADXML DOMDOCSAVEFILE DOMDOCGETROOT DOMDOCADDPI DOMNODEGETNAME DOMNODEGETTYPE DOMNODEGETVALUE DOMNODEGETCHILDCT DOMNODEGETFIRSTCHILD DOMNODEGETSIBLING DOMNODECREATECHILDELEMENT DOMNODESETATTRIBUTE DOMNODEGETCHILDELEMENTCT DOMNODEGETFIRSTCHILDELEMENT DOMNODEGETSIBLINGELEMENT DOMNODEGETATTRIBUTECT DOMNODEGETATTRIBUTEI DOMNODEGETATTRIBUTEBYNAME DOMNODEGETBYNAME"},c:[T.CLCM,T.CBCM,T.ASM,T.QSM,T.CNM,{cN:"array",b:"#[a-zA-Z .]+"}]}});hljs.registerLanguage("actionscript",function(e){var a="[a-zA-Z_$][a-zA-Z0-9_$]*",c="([*]|[a-zA-Z_$][a-zA-Z0-9_$]*)",t={cN:"rest_arg",b:"[.]{3}",e:a,r:10};return{aliases:["as"],k:{keyword:"as break case catch class const continue default delete do dynamic each else extends final finally for function get if implements import in include instanceof interface internal is namespace native new override package private protected public return set static super switch this throw try typeof use var void while with",literal:"true false null undefined"},c:[e.ASM,e.QSM,e.CLCM,e.CBCM,e.CNM,{cN:"package",bK:"package",e:"{",c:[e.TM]},{cN:"class",bK:"class interface",e:"{",eE:!0,c:[{bK:"extends implements"},e.TM]},{cN:"preprocessor",bK:"import include",e:";"},{cN:"function",bK:"function",e:"[{;]",eE:!0,i:"\\S",c:[e.TM,{cN:"params",b:"\\(",e:"\\)",c:[e.ASM,e.QSM,e.CLCM,e.CBCM,t]},{cN:"type",b:":",e:c,r:10}]}]}});hljs.registerLanguage("coffeescript",function(e){var c={keyword:"in if for while finally new do return else break catch instanceof throw try this switch continue typeof delete debugger super then unless until loop of by when and or is isnt not",literal:"true false null undefined yes no on off",reserved:"case default function var void with const let enum export import native __hasProp __extends __slice __bind __indexOf",built_in:"npm require console print module global window document"},n="[A-Za-z$_][0-9A-Za-z$_]*",t={cN:"subst",b:/#\{/,e:/}/,k:c},r=[e.BNM,e.inherit(e.CNM,{starts:{e:"(\\s*/)?",r:0}}),{cN:"string",v:[{b:/'''/,e:/'''/,c:[e.BE]},{b:/'/,e:/'/,c:[e.BE]},{b:/"""/,e:/"""/,c:[e.BE,t]},{b:/"/,e:/"/,c:[e.BE,t]}]},{cN:"regexp",v:[{b:"///",e:"///",c:[t,e.HCM]},{b:"//[gim]*",r:0},{b:/\/(?![ *])(\\\/|.)*?\/[gim]*(?=\W|$)/}]},{cN:"property",b:"@"+n},{b:"`",e:"`",eB:!0,eE:!0,sL:"javascript"}];t.c=r;var i=e.inherit(e.TM,{b:n}),s="(\\(.*\\))?\\s*\\B[-=]>",o={cN:"params",b:"\\([^\\(]",rB:!0,c:[{b:/\(/,e:/\)/,k:c,c:["self"].concat(r)}]};return{aliases:["coffee","cson","iced"],k:c,i:/\/\*/,c:r.concat([e.C("###","###"),e.HCM,{cN:"function",b:"^\\s*"+n+"\\s*=\\s*"+s,e:"[-=]>",rB:!0,c:[i,o]},{b:/[:\(,=]\s*/,r:0,c:[{cN:"function",b:s,e:"[-=]>",rB:!0,c:[o]}]},{cN:"class",bK:"class",e:"$",i:/[:="\[\]]/,c:[{bK:"extends",eW:!0,i:/[:="\[\]]/,c:[i]},i]},{cN:"attribute",b:n+":",e:":",rB:!0,rE:!0,r:0}])}});hljs.registerLanguage("tex",function(c){var e={cN:"command",b:"\\\\[a-zA-Zа-яА-я]+[\\*]?"},m={cN:"command",b:"\\\\[^a-zA-Zа-яА-я0-9]"},r={cN:"special",b:"[{}\\[\\]\\&#~]",r:0};return{c:[{b:"\\\\[a-zA-Zа-яА-я]+[\\*]? *= *-?\\d*\\.?\\d+(pt|pc|mm|cm|in|dd|cc|ex|em)?",rB:!0,c:[e,m,{cN:"number",b:" *=",e:"-?\\d*\\.?\\d+(pt|pc|mm|cm|in|dd|cc|ex|em)?",eB:!0}],r:10},e,m,r,{cN:"formula",b:"\\$\\$",e:"\\$\\$",c:[e,m,r],r:0},{cN:"formula",b:"\\$",e:"\\$",c:[e,m,r],r:0},c.C("%","$",{r:0})]}});hljs.registerLanguage("go",function(e){var t={keyword:"break default func interface select case map struct chan else goto package switch const fallthrough if range type continue for import return var go defer",constant:"true false iota nil",typename:"bool byte complex64 complex128 float32 float64 int8 int16 int32 int64 string uint8 uint16 uint32 uint64 int uint uintptr rune",built_in:"append cap close complex copy imag len make new panic print println real recover delete"};return{aliases:["golang"],k:t,i:"</",c:[e.CLCM,e.CBCM,e.QSM,{cN:"string",b:"'",e:"[^\\\\]'"},{cN:"string",b:"`",e:"`"},{cN:"number",b:e.CNR+"[dflsi]?",r:0},e.CNM]}});hljs.registerLanguage("vbscript-html",function(s){return{sL:"xml",subLanguageMode:"continuous",c:[{b:"<%",e:"%>",sL:"vbscript"}]}});hljs.registerLanguage("haskell",function(e){var c=[e.C("--","$"),e.C("{-","-}",{c:["self"]})],a={cN:"pragma",b:"{-#",e:"#-}"},i={cN:"preprocessor",b:"^#",e:"$"},n={cN:"type",b:"\\b[A-Z][\\w']*",r:0},t={cN:"container",b:"\\(",e:"\\)",i:'"',c:[a,i,{cN:"type",b:"\\b[A-Z][\\w]*(\\((\\.\\.|,|\\w+)\\))?"},e.inherit(e.TM,{b:"[_a-z][\\w']*"})].concat(c)},l={cN:"container",b:"{",e:"}",c:t.c};return{aliases:["hs"],k:"let in if then else case of where do module import hiding qualified type data newtype deriving class instance as default infix infixl infixr foreign export ccall stdcall cplusplus jvm dotnet safe unsafe family forall mdo proc rec",c:[{cN:"module",b:"\\bmodule\\b",e:"where",k:"module where",c:[t].concat(c),i:"\\W\\.|;"},{cN:"import",b:"\\bimport\\b",e:"$",k:"import|0 qualified as hiding",c:[t].concat(c),i:"\\W\\.|;"},{cN:"class",b:"^(\\s*)?(class|instance)\\b",e:"where",k:"class family instance where",c:[n,t].concat(c)},{cN:"typedef",b:"\\b(data|(new)?type)\\b",e:"$",k:"data family type newtype deriving",c:[a,n,t,l].concat(c)},{cN:"default",bK:"default",e:"$",c:[n,t].concat(c)},{cN:"infix",bK:"infix infixl infixr",e:"$",c:[e.CNM].concat(c)},{cN:"foreign",b:"\\bforeign\\b",e:"$",k:"foreign import export ccall stdcall cplusplus jvm dotnet safe unsafe",c:[n,e.QSM].concat(c)},{cN:"shebang",b:"#!\\/usr\\/bin\\/env runhaskell",e:"$"},a,i,e.QSM,e.CNM,n,e.inherit(e.TM,{b:"^[_a-z][\\w']*"}),{b:"->|<-"}].concat(c)}});hljs.registerLanguage("scilab",function(e){var n=[e.CNM,{cN:"string",b:"'|\"",e:"'|\"",c:[e.BE,{b:"''"}]}];return{aliases:["sci"],k:{keyword:"abort break case clear catch continue do elseif else endfunction end for functionglobal if pause return resume select try then while%f %F %t %T %pi %eps %inf %nan %e %i %z %s",built_in:"abs and acos asin atan ceil cd chdir clearglobal cosh cos cumprod deff disp errorexec execstr exists exp eye gettext floor fprintf fread fsolve imag isdef isemptyisinfisnan isvector lasterror length load linspace list listfiles log10 log2 logmax min msprintf mclose mopen ones or pathconvert poly printf prod pwd rand realround sinh sin size gsort sprintf sqrt strcat strcmps tring sum system tanh tantype typename warning zeros matrix"},i:'("|#|/\\*|\\s+/\\w+)',c:[{cN:"function",bK:"function endfunction",e:"$",k:"function endfunction|10",c:[e.UTM,{cN:"params",b:"\\(",e:"\\)"}]},{cN:"transposed_variable",b:"[a-zA-Z_][a-zA-Z_0-9]*('+[\\.']*|[\\.']+)",e:"",r:0},{cN:"matrix",b:"\\[",e:"\\]'*[\\.']*",r:0,c:n},e.C("//","$")].concat(n)}});hljs.registerLanguage("profile",function(e){return{c:[e.CNM,{cN:"built_in",b:"{",e:"}$",eB:!0,eE:!0,c:[e.ASM,e.QSM],r:0},{cN:"filename",b:"[a-zA-Z_][\\da-zA-Z_]+\\.[\\da-zA-Z_]{1,3}",e:":",eE:!0},{cN:"header",b:"(ncalls|tottime|cumtime)",e:"$",k:"ncalls tottime|10 cumtime|10 filename",r:10},{cN:"summary",b:"function calls",e:"$",c:[e.CNM],r:10},e.ASM,e.QSM,{cN:"function",b:"\\(",e:"\\)$",c:[e.UTM],r:0}]}});hljs.registerLanguage("thrift",function(e){var t="bool byte i16 i32 i64 double string binary";return{k:{keyword:"namespace const typedef struct enum service exception void oneway set list map required optional",built_in:t,literal:"true false"},c:[e.QSM,e.NM,e.CLCM,e.CBCM,{cN:"class",bK:"struct enum service exception",e:/\{/,i:/\n/,c:[e.inherit(e.TM,{starts:{eW:!0,eE:!0}})]},{b:"\\b(set|list|map)\\s*<",e:">",k:t,c:["self"]}]}});hljs.registerLanguage("matlab",function(e){var a=[e.CNM,{cN:"string",b:"'",e:"'",c:[e.BE,{b:"''"}]}],s={r:0,c:[{cN:"operator",b:/'['\.]*/}]};return{k:{keyword:"break case catch classdef continue else elseif end enumerated events for function global if methods otherwise parfor persistent properties return spmd switch try while",built_in:"sin sind sinh asin asind asinh cos cosd cosh acos acosd acosh tan tand tanh atan atand atan2 atanh sec secd sech asec asecd asech csc cscd csch acsc acscd acsch cot cotd coth acot acotd acoth hypot exp expm1 log log1p log10 log2 pow2 realpow reallog realsqrt sqrt nthroot nextpow2 abs angle complex conj imag real unwrap isreal cplxpair fix floor ceil round mod rem sign airy besselj bessely besselh besseli besselk beta betainc betaln ellipj ellipke erf erfc erfcx erfinv expint gamma gammainc gammaln psi legendre cross dot factor isprime primes gcd lcm rat rats perms nchoosek factorial cart2sph cart2pol pol2cart sph2cart hsv2rgb rgb2hsv zeros ones eye repmat rand randn linspace logspace freqspace meshgrid accumarray size length ndims numel disp isempty isequal isequalwithequalnans cat reshape diag blkdiag tril triu fliplr flipud flipdim rot90 find sub2ind ind2sub bsxfun ndgrid permute ipermute shiftdim circshift squeeze isscalar isvector ans eps realmax realmin pi i inf nan isnan isinf isfinite j why compan gallery hadamard hankel hilb invhilb magic pascal rosser toeplitz vander wilkinson"},i:'(//|"|#|/\\*|\\s+/\\w+)',c:[{cN:"function",bK:"function",e:"$",c:[e.UTM,{cN:"params",b:"\\(",e:"\\)"},{cN:"params",b:"\\[",e:"\\]"}]},{b:/[a-zA-Z_][a-zA-Z_0-9]*'['\.]*/,rB:!0,r:0,c:[{b:/[a-zA-Z_][a-zA-Z_0-9]*/,r:0},s.c[0]]},{cN:"matrix",b:"\\[",e:"\\]",c:a,r:0,starts:s},{cN:"cell",b:"\\{",e:/}/,c:a,r:0,starts:s},{b:/\)/,r:0,starts:s},e.C("^\\s*\\%\\{\\s*$","^\\s*\\%\\}\\s*$"),e.C("\\%","$")].concat(a)}});hljs.registerLanguage("vbscript",function(e){return{aliases:["vbs"],cI:!0,k:{keyword:"call class const dim do loop erase execute executeglobal exit for each next function if then else on error option explicit new private property let get public randomize redim rem select case set stop sub while wend with end to elseif is or xor and not class_initialize class_terminate default preserve in me byval byref step resume goto",built_in:"lcase month vartype instrrev ubound setlocale getobject rgb getref string weekdayname rnd dateadd monthname now day minute isarray cbool round formatcurrency conversions csng timevalue second year space abs clng timeserial fixs len asc isempty maths dateserial atn timer isobject filter weekday datevalue ccur isdate instr datediff formatdatetime replace isnull right sgn array snumeric log cdbl hex chr lbound msgbox ucase getlocale cos cdate cbyte rtrim join hour oct typename trim strcomp int createobject loadpicture tan formatnumber mid scriptenginebuildversion scriptengine split scriptengineminorversion cint sin datepart ltrim sqr scriptenginemajorversion time derived eval date formatpercent exp inputbox left ascw chrw regexp server response request cstr err",literal:"true false null nothing empty"},i:"//",c:[e.inherit(e.QSM,{c:[{b:'""'}]}),e.C(/'/,/$/,{r:0}),e.CNM]}});hljs.registerLanguage("capnproto",function(t){return{aliases:["capnp"],k:{keyword:"struct enum interface union group import using const annotation extends in of on as with from fixed",built_in:"Void Bool Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float32 Float64 Text Data AnyPointer AnyStruct Capability List",literal:"true false"},c:[t.QSM,t.NM,t.HCM,{cN:"shebang",b:/@0x[\w\d]{16};/,i:/\n/},{cN:"number",b:/@\d+\b/},{cN:"class",bK:"struct enum",e:/\{/,i:/\n/,c:[t.inherit(t.TM,{starts:{eW:!0,eE:!0}})]},{cN:"class",bK:"interface",e:/\{/,i:/\n/,c:[t.inherit(t.TM,{starts:{eW:!0,eE:!0}})]}]}});hljs.registerLanguage("xl",function(e){var t="ObjectLoader Animate MovieCredits Slides Filters Shading Materials LensFlare Mapping VLCAudioVideo StereoDecoder PointCloud NetworkAccess RemoteControl RegExp ChromaKey Snowfall NodeJS Speech Charts",o={keyword:"if then else do while until for loop import with is as where when by data constant",literal:"true false nil",type:"integer real text name boolean symbol infix prefix postfix block tree",built_in:"in mod rem and or xor not abs sign floor ceil sqrt sin cos tan asin acos atan exp expm1 log log2 log10 log1p pi at",module:t,id:"text_length text_range text_find text_replace contains page slide basic_slide title_slide title subtitle fade_in fade_out fade_at clear_color color line_color line_width texture_wrap texture_transform texture scale_?x scale_?y scale_?z? translate_?x translate_?y translate_?z? rotate_?x rotate_?y rotate_?z? rectangle circle ellipse sphere path line_to move_to quad_to curve_to theme background contents locally time mouse_?x mouse_?y mouse_buttons"},a={cN:"constant",b:"[A-Z][A-Z_0-9]+",r:0},r={cN:"variable",b:"([A-Z][a-z_0-9]+)+",r:0},i={cN:"id",b:"[a-z][a-z_0-9]+",r:0},l={cN:"string",b:'"',e:'"',i:"\\n"},n={cN:"string",b:"'",e:"'",i:"\\n"},s={cN:"string",b:"<<",e:">>"},c={cN:"number",b:"[0-9]+#[0-9A-Z_]+(\\.[0-9-A-Z_]+)?#?([Ee][+-]?[0-9]+)?",r:10},_={cN:"import",bK:"import",e:"$",k:{keyword:"import",module:t},r:0,c:[l]},d={cN:"function",b:"[a-z].*->"};return{aliases:["tao"],l:/[a-zA-Z][a-zA-Z0-9_?]*/,k:o,c:[e.CLCM,e.CBCM,l,n,s,d,_,a,r,i,c,e.NM]}});hljs.registerLanguage("scala",function(e){var t={cN:"annotation",b:"@[A-Za-z]+"},a={cN:"string",b:'u?r?"""',e:'"""',r:10},r={cN:"symbol",b:"'\\w[\\w\\d_]*(?!')"},c={cN:"type",b:"\\b[A-Z][A-Za-z0-9_]*",r:0},i={cN:"title",b:/[^0-9\n\t "'(),.`{}\[\]:;][^\n\t "'(),.`{}\[\]:;]+|[^0-9\n\t "'(),.`{}\[\]:;=]/,r:0},l={cN:"class",bK:"class object trait type",e:/[:={\[(\n;]/,c:[{cN:"keyword",bK:"extends with",r:10},i]},n={cN:"function",bK:"def val",e:/[:={\[(\n;]/,c:[i]};return{k:{literal:"true false null",keyword:"type yield lazy override def with val var sealed abstract private trait object if forSome for while throw finally protected extends import final return else break new catch super class case package default try this match continue throws implicit"},c:[e.CLCM,e.CBCM,a,e.QSM,r,c,n,l,e.CNM,t]}});hljs.registerLanguage("elixir",function(e){var n="[a-zA-Z_][a-zA-Z0-9_]*(\\!|\\?)?",r="[a-zA-Z_]\\w*[!?=]?|[-+~]\\@|<<|>>|=~|===?|<=>|[<>]=?|\\*\\*|[-/+%^&*~`|]|\\[\\]=?",b="and false then defined module in return redo retry end for true self when next until do begin unless nil break not case cond alias while ensure or include use alias fn quote",c={cN:"subst",b:"#\\{",e:"}",l:n,k:b},a={cN:"string",c:[e.BE,c],v:[{b:/'/,e:/'/},{b:/"/,e:/"/}]},i={cN:"function",bK:"def defp defmacro",e:/\B\b/,c:[e.inherit(e.TM,{b:n,endsParent:!0})]},s=e.inherit(i,{cN:"class",bK:"defmodule defrecord",e:/\bdo\b|$|;/}),l=[a,e.HCM,s,i,{cN:"constant",b:"(\\b[A-Z_]\\w*(.)?)+",r:0},{cN:"symbol",b:":",c:[a,{b:r}],r:0},{cN:"symbol",b:n+":",r:0},{cN:"number",b:"(\\b0[0-7_]+)|(\\b0x[0-9a-fA-F_]+)|(\\b[1-9][0-9_]*(\\.[0-9_]+)?)|[0_]\\b",r:0},{cN:"variable",b:"(\\$\\W)|((\\$|\\@\\@?)(\\w+))"},{b:"->"},{b:"("+e.RSR+")\\s*",c:[e.HCM,{cN:"regexp",i:"\\n",c:[e.BE,c],v:[{b:"/",e:"/[a-z]*"},{b:"%r\\[",e:"\\][a-z]*"}]}],r:0}];return c.c=l,{l:n,k:b,c:l}});hljs.registerLanguage("sml",function(e){return{aliases:["ml"],k:{keyword:"abstype and andalso as case datatype do else end eqtype exception fn fun functor handle if in include infix infixr let local nonfix of op open orelse raise rec sharing sig signature struct structure then type val with withtype where while",built_in:"array bool char exn int list option order real ref string substring vector unit word",literal:"true false NONE SOME LESS EQUAL GREATER nil"},i:/\/\/|>>/,l:"[a-z_]\\w*!?",c:[{cN:"literal",b:"\\[(\\|\\|)?\\]|\\(\\)"},e.C("\\(\\*","\\*\\)",{c:["self"]}),{cN:"symbol",b:"'[A-Za-z_](?!')[\\w']*"},{cN:"tag",b:"`[A-Z][\\w']*"},{cN:"type",b:"\\b[A-Z][\\w']*",r:0},{b:"[a-z_]\\w*'[\\w']*"},e.inherit(e.ASM,{cN:"char",r:0}),e.inherit(e.QSM,{i:null}),{cN:"number",b:"\\b(0[xX][a-fA-F0-9_]+[Lln]?|0[oO][0-7_]+[Lln]?|0[bB][01_]+[Lln]?|[0-9][0-9_]*([Lln]|(\\.[0-9_]*)?([eE][-+]?[0-9_]+)?)?)",r:0},{b:/[-=]>/}]}});hljs.registerLanguage("apache",function(e){var r={cN:"number",b:"[\\$%]\\d+"};return{aliases:["apacheconf"],cI:!0,c:[e.HCM,{cN:"tag",b:"</?",e:">"},{cN:"keyword",b:/\w+/,r:0,k:{common:"order deny allow setenv rewriterule rewriteengine rewritecond documentroot sethandler errordocument loadmodule options header listen serverroot servername"},starts:{e:/$/,r:0,k:{literal:"on off all"},c:[{cN:"sqbracket",b:"\\s\\[",e:"\\]$"},{cN:"cbracket",b:"[\\$%]\\{",e:"\\}",c:["self",r]},r,e.QSM]}}],i:/\S/}});hljs.registerLanguage("dockerfile",function(n){return{aliases:["docker"],cI:!0,k:{built_ins:"from maintainer cmd expose add copy entrypoint volume user workdir onbuild run env"},c:[n.HCM,{k:{built_in:"run cmd entrypoint volume add copy workdir onbuild"},b:/^ *(onbuild +)?(run|cmd|entrypoint|volume|add|copy|workdir) +/,starts:{e:/[^\\]\n/,sL:"bash",subLanguageMode:"continuous"}},{k:{built_in:"from maintainer expose env user onbuild"},b:/^ *(onbuild +)?(from|maintainer|expose|env|user|onbuild) +/,e:/[^\\]\n/,c:[n.ASM,n.QSM,n.NM,n.HCM]}]}});hljs.registerLanguage("markdown",function(e){return{aliases:["md","mkdown","mkd"],c:[{cN:"header",v:[{b:"^#{1,6}",e:"$"},{b:"^.+?\\n[=-]{2,}$"}]},{b:"<",e:">",sL:"xml",r:0},{cN:"bullet",b:"^([*+-]|(\\d+\\.))\\s+"},{cN:"strong",b:"[*_]{2}.+?[*_]{2}"},{cN:"emphasis",v:[{b:"\\*.+?\\*"},{b:"_.+?_",r:0}]},{cN:"blockquote",b:"^>\\s+",e:"$"},{cN:"code",v:[{b:"`.+?`"},{b:"^( {4}|	)",e:"$",r:0}]},{cN:"horizontal_rule",b:"^[-\\*]{3,}",e:"$"},{b:"\\[.+?\\][\\(\\[].*?[\\)\\]]",rB:!0,c:[{cN:"link_label",b:"\\[",e:"\\]",eB:!0,rE:!0,r:0},{cN:"link_url",b:"\\]\\(",e:"\\)",eB:!0,eE:!0},{cN:"link_reference",b:"\\]\\[",e:"\\]",eB:!0,eE:!0}],r:10},{b:"^\\[.+\\]:",rB:!0,c:[{cN:"link_reference",b:"\\[",e:"\\]:",eB:!0,eE:!0,starts:{cN:"link_url",e:"$"}}]}]}});hljs.registerLanguage("haml",function(s){return{cI:!0,c:[{cN:"doctype",b:"^!!!( (5|1\\.1|Strict|Frameset|Basic|Mobile|RDFa|XML\\b.*))?$",r:10},s.C("^\\s*(!=#|=#|-#|/).*$",!1,{r:0}),{b:"^\\s*(-|=|!=)(?!#)",starts:{e:"\\n",sL:"ruby"}},{cN:"tag",b:"^\\s*%",c:[{cN:"title",b:"\\w+"},{cN:"value",b:"[#\\.]\\w+"},{b:"{\\s*",e:"\\s*}",eE:!0,c:[{b:":\\w+\\s*=>",e:",\\s+",rB:!0,eW:!0,c:[{cN:"symbol",b:":\\w+"},{cN:"string",b:'"',e:'"'},{cN:"string",b:"'",e:"'"},{b:"\\w+",r:0}]}]},{b:"\\(\\s*",e:"\\s*\\)",eE:!0,c:[{b:"\\w+\\s*=",e:"\\s+",rB:!0,eW:!0,c:[{cN:"attribute",b:"\\w+",r:0},{cN:"string",b:'"',e:'"'},{cN:"string",b:"'",e:"'"},{b:"\\w+",r:0}]}]}]},{cN:"bullet",b:"^\\s*[=~]\\s*",r:0},{b:"#{",starts:{e:"}",sL:"ruby"}}]}});hljs.registerLanguage("fortran",function(e){var t={cN:"params",b:"\\(",e:"\\)"},n={constant:".False. .True.",type:"integer real character complex logical dimension allocatable|10 parameter external implicit|10 none double precision assign intent optional pointer target in out common equivalence data",keyword:"kind do while private call intrinsic where elsewhere type endtype endmodule endselect endinterface end enddo endif if forall endforall only contains default return stop then public subroutine|10 function program .and. .or. .not. .le. .eq. .ge. .gt. .lt. goto save else use module select case access blank direct exist file fmt form formatted iostat name named nextrec number opened rec recl sequential status unformatted unit continue format pause cycle exit c_null_char c_alert c_backspace c_form_feed flush wait decimal round iomsg synchronous nopass non_overridable pass protected volatile abstract extends import non_intrinsic value deferred generic final enumerator class associate bind enum c_int c_short c_long c_long_long c_signed_char c_size_t c_int8_t c_int16_t c_int32_t c_int64_t c_int_least8_t c_int_least16_t c_int_least32_t c_int_least64_t c_int_fast8_t c_int_fast16_t c_int_fast32_t c_int_fast64_t c_intmax_t C_intptr_t c_float c_double c_long_double c_float_complex c_double_complex c_long_double_complex c_bool c_char c_null_ptr c_null_funptr c_new_line c_carriage_return c_horizontal_tab c_vertical_tab iso_c_binding c_loc c_funloc c_associated  c_f_pointer c_ptr c_funptr iso_fortran_env character_storage_size error_unit file_storage_size input_unit iostat_end iostat_eor numeric_storage_size output_unit c_f_procpointer ieee_arithmetic ieee_support_underflow_control ieee_get_underflow_mode ieee_set_underflow_mode newunit contiguous pad position action delim readwrite eor advance nml interface procedure namelist include sequence elemental pure",built_in:"alog alog10 amax0 amax1 amin0 amin1 amod cabs ccos cexp clog csin csqrt dabs dacos dasin datan datan2 dcos dcosh ddim dexp dint dlog dlog10 dmax1 dmin1 dmod dnint dsign dsin dsinh dsqrt dtan dtanh float iabs idim idint idnint ifix isign max0 max1 min0 min1 sngl algama cdabs cdcos cdexp cdlog cdsin cdsqrt cqabs cqcos cqexp cqlog cqsin cqsqrt dcmplx dconjg derf derfc dfloat dgamma dimag dlgama iqint qabs qacos qasin qatan qatan2 qcmplx qconjg qcos qcosh qdim qerf qerfc qexp qgamma qimag qlgama qlog qlog10 qmax1 qmin1 qmod qnint qsign qsin qsinh qsqrt qtan qtanh abs acos aimag aint anint asin atan atan2 char cmplx conjg cos cosh exp ichar index int log log10 max min nint sign sin sinh sqrt tan tanh print write dim lge lgt lle llt mod nullify allocate deallocate adjustl adjustr all allocated any associated bit_size btest ceiling count cshift date_and_time digits dot_product eoshift epsilon exponent floor fraction huge iand ibclr ibits ibset ieor ior ishft ishftc lbound len_trim matmul maxexponent maxloc maxval merge minexponent minloc minval modulo mvbits nearest pack present product radix random_number random_seed range repeat reshape rrspacing scale scan selected_int_kind selected_real_kind set_exponent shape size spacing spread sum system_clock tiny transpose trim ubound unpack verify achar iachar transfer dble entry dprod cpu_time command_argument_count get_command get_command_argument get_environment_variable is_iostat_end ieee_arithmetic ieee_support_underflow_control ieee_get_underflow_mode ieee_set_underflow_mode is_iostat_eor move_alloc new_line selected_char_kind same_type_as extends_type_ofacosh asinh atanh bessel_j0 bessel_j1 bessel_jn bessel_y0 bessel_y1 bessel_yn erf erfc erfc_scaled gamma log_gamma hypot norm2 atomic_define atomic_ref execute_command_line leadz trailz storage_size merge_bits bge bgt ble blt dshiftl dshiftr findloc iall iany iparity image_index lcobound ucobound maskl maskr num_images parity popcnt poppar shifta shiftl shiftr this_image"};return{cI:!0,aliases:["f90","f95"],k:n,c:[e.inherit(e.ASM,{cN:"string",r:0}),e.inherit(e.QSM,{cN:"string",r:0}),{cN:"function",bK:"subroutine function program",i:"[${=\\n]",c:[e.UTM,t]},e.C("!","$",{r:0}),{cN:"number",b:"(?=\\b|\\+|\\-|\\.)(?=\\.\\d|\\d)(?:\\d+)?(?:\\.?\\d*)(?:[de][+-]?\\d+)?\\b\\.?",r:0}]}});hljs.registerLanguage("smali",function(r){var t=["add","and","cmp","cmpg","cmpl","const","div","double","float","goto","if","int","long","move","mul","neg","new","nop","not","or","rem","return","shl","shr","sput","sub","throw","ushr","xor"],n=["aget","aput","array","check","execute","fill","filled","goto/16","goto/32","iget","instance","invoke","iput","monitor","packed","sget","sparse"],s=["transient","constructor","abstract","final","synthetic","public","private","protected","static","bridge","system"];return{aliases:["smali"],c:[{cN:"string",b:'"',e:'"',r:0},r.C("#","$",{r:0}),{cN:"keyword",b:"\\s*\\.end\\s[a-zA-Z0-9]*",r:1},{cN:"keyword",b:"^[ ]*\\.[a-zA-Z]*",r:0},{cN:"keyword",b:"\\s:[a-zA-Z_0-9]*",r:0},{cN:"keyword",b:"\\s("+s.join("|")+")",r:1},{cN:"keyword",b:"\\[",r:0},{cN:"instruction",b:"\\s("+t.join("|")+")\\s",r:1},{cN:"instruction",b:"\\s("+t.join("|")+")((\\-|/)[a-zA-Z0-9]+)+\\s",r:10},{cN:"instruction",b:"\\s("+n.join("|")+")((\\-|/)[a-zA-Z0-9]+)*\\s",r:10},{cN:"class",b:"L[^(;:\n]*;",r:0},{cN:"function",b:'( |->)[^(\n ;"]*\\(',r:0},{cN:"function",b:"\\)",r:0},{cN:"variable",b:"[vp][0-9]+",r:0}]}});hljs.registerLanguage("julia",function(r){var e={keyword:"in abstract baremodule begin bitstype break catch ccall const continue do else elseif end export finally for function global if immutable import importall let local macro module quote return try type typealias using while",literal:"true false ANY ARGS CPU_CORES C_NULL DL_LOAD_PATH DevNull ENDIAN_BOM ENV I|0 Inf Inf16 Inf32 InsertionSort JULIA_HOME LOAD_PATH MS_ASYNC MS_INVALIDATE MS_SYNC MergeSort NaN NaN16 NaN32 OS_NAME QuickSort RTLD_DEEPBIND RTLD_FIRST RTLD_GLOBAL RTLD_LAZY RTLD_LOCAL RTLD_NODELETE RTLD_NOLOAD RTLD_NOW RoundDown RoundFromZero RoundNearest RoundToZero RoundUp STDERR STDIN STDOUT VERSION WORD_SIZE catalan cglobal e eu eulergamma golden im nothing pi γ π φ",built_in:"ASCIIString AbstractArray AbstractRNG AbstractSparseArray Any ArgumentError Array Associative Base64Pipe Bidiagonal BigFloat BigInt BitArray BitMatrix BitVector Bool BoundsError Box CFILE Cchar Cdouble Cfloat Char CharString Cint Clong Clonglong ClusterManager Cmd Coff_t Colon Complex Complex128 Complex32 Complex64 Condition Cptrdiff_t Cshort Csize_t Cssize_t Cuchar Cuint Culong Culonglong Cushort Cwchar_t DArray DataType DenseArray Diagonal Dict DimensionMismatch DirectIndexString Display DivideError DomainError EOFError EachLine Enumerate ErrorException Exception Expr Factorization FileMonitor FileOffset Filter Float16 Float32 Float64 FloatRange FloatingPoint Function GetfieldNode GotoNode Hermitian IO IOBuffer IOStream IPv4 IPv6 InexactError Int Int128 Int16 Int32 Int64 Int8 IntSet Integer InterruptException IntrinsicFunction KeyError LabelNode LambdaStaticData LineNumberNode LoadError LocalProcess MIME MathConst MemoryError MersenneTwister Method MethodError MethodTable Module NTuple NewvarNode Nothing Number ObjectIdDict OrdinalRange OverflowError ParseError PollingFileWatcher ProcessExitedException ProcessGroup Ptr QuoteNode Range Range1 Ranges Rational RawFD Real Regex RegexMatch RemoteRef RepString RevString RopeString RoundingMode Set SharedArray Signed SparseMatrixCSC StackOverflowError Stat StatStruct StepRange String SubArray SubString SymTridiagonal Symbol SymbolNode Symmetric SystemError Task TextDisplay Timer TmStruct TopNode Triangular Tridiagonal Type TypeConstructor TypeError TypeName TypeVar UTF16String UTF32String UTF8String UdpSocket Uint Uint128 Uint16 Uint32 Uint64 Uint8 UndefRefError UndefVarError UniformScaling UnionType UnitRange Unsigned Vararg VersionNumber WString WeakKeyDict WeakRef Woodbury Zip"},t="[A-Za-z_\\u00A1-\\uFFFF][A-Za-z_0-9\\u00A1-\\uFFFF]*",o={l:t,k:e},n={cN:"type-annotation",b:/::/},a={cN:"subtype",b:/<:/},i={cN:"number",b:/(\b0x[\d_]*(\.[\d_]*)?|0x\.\d[\d_]*)p[-+]?\d+|\b0[box][a-fA-F0-9][a-fA-F0-9_]*|(\b\d[\d_]*(\.[\d_]*)?|\.\d[\d_]*)([eEfF][-+]?\d+)?/,r:0},l={cN:"char",b:/'(.|\\[xXuU][a-zA-Z0-9]+)'/},c={cN:"subst",b:/\$\(/,e:/\)/,k:e},u={cN:"variable",b:"\\$"+t},d={cN:"string",c:[r.BE,c,u],v:[{b:/\w*"/,e:/"\w*/},{b:/\w*"""/,e:/"""\w*/}]},g={cN:"string",c:[r.BE,c,u],b:"`",e:"`"},s={cN:"macrocall",b:"@"+t},S={cN:"comment",v:[{b:"#=",e:"=#",r:10},{b:"#",e:"$"}]};return o.c=[i,l,n,a,d,g,s,S,r.HCM],c.c=o.c,o});hljs.registerLanguage("delphi",function(e){var r="exports register file shl array record property for mod while set ally label uses raise not stored class safecall var interface or private static exit index inherited to else stdcall override shr asm far resourcestring finalization packed virtual out and protected library do xorwrite goto near function end div overload object unit begin string on inline repeat until destructor write message program with read initialization except default nil if case cdecl in downto threadvar of try pascal const external constructor type public then implementation finally published procedure",t=[e.CLCM,e.C(/\{/,/\}/,{r:0}),e.C(/\(\*/,/\*\)/,{r:10})],i={cN:"string",b:/'/,e:/'/,c:[{b:/''/}]},c={cN:"string",b:/(#\d+)+/},o={b:e.IR+"\\s*=\\s*class\\s*\\(",rB:!0,c:[e.TM]},n={cN:"function",bK:"function constructor destructor procedure",e:/[:;]/,k:"function constructor|10 destructor|10 procedure|10",c:[e.TM,{cN:"params",b:/\(/,e:/\)/,k:r,c:[i,c]}].concat(t)};return{cI:!0,k:r,i:/"|\$[G-Zg-z]|\/\*|<\/|\|/,c:[i,c,e.NM,o,n].concat(t)}});hljs.registerLanguage("brainfuck",function(r){var n={cN:"literal",b:"[\\+\\-]",r:0};return{aliases:["bf"],c:[r.C("[^\\[\\]\\.,\\+\\-<> \r\n]","[\\[\\]\\.,\\+\\-<> \r\n]",{rE:!0,r:0}),{cN:"title",b:"[\\[\\]]",r:0},{cN:"string",b:"[\\.,]",r:0},{b:/\+\+|\-\-/,rB:!0,c:[n]},n]}});hljs.registerLanguage("ini",function(e){return{cI:!0,i:/\S/,c:[e.C(";","$"),{cN:"title",b:"^\\[",e:"\\]"},{cN:"setting",b:"^[a-z0-9\\[\\]_-]+[ \\t]*=[ \\t]*",e:"$",c:[{cN:"value",eW:!0,k:"on off true false yes no",c:[e.QSM,e.NM],r:0}]}]}});hljs.registerLanguage("json",function(e){var t={literal:"true false null"},i=[e.QSM,e.CNM],l={cN:"value",e:",",eW:!0,eE:!0,c:i,k:t},c={b:"{",e:"}",c:[{cN:"attribute",b:'\\s*"',e:'"\\s*:\\s*',eB:!0,eE:!0,c:[e.BE],i:"\\n",starts:l}],i:"\\S"},n={b:"\\[",e:"\\]",c:[e.inherit(l,{cN:null})],i:"\\S"};return i.splice(i.length,0,c,n),{c:i,k:t,i:"\\S"}});hljs.registerLanguage("powershell",function(e){var t={b:"`[\\s\\S]",r:0},r={cN:"variable",v:[{b:/\$[\w\d][\w\d_:]*/}]},o={cN:"string",b:/"/,e:/"/,c:[t,r,{cN:"variable",b:/\$[A-z]/,e:/[^A-z]/}]},a={cN:"string",b:/'/,e:/'/};return{aliases:["ps"],l:/-?[A-z\.\-]+/,cI:!0,k:{keyword:"if else foreach return function do while until elseif begin for trap data dynamicparam end break throw param continue finally in switch exit filter try process catch",literal:"$null $true $false",built_in:"Add-Content Add-History Add-Member Add-PSSnapin Clear-Content Clear-Item Clear-Item Property Clear-Variable Compare-Object ConvertFrom-SecureString Convert-Path ConvertTo-Html ConvertTo-SecureString Copy-Item Copy-ItemProperty Export-Alias Export-Clixml Export-Console Export-Csv ForEach-Object Format-Custom Format-List Format-Table Format-Wide Get-Acl Get-Alias Get-AuthenticodeSignature Get-ChildItem Get-Command Get-Content Get-Credential Get-Culture Get-Date Get-EventLog Get-ExecutionPolicy Get-Help Get-History Get-Host Get-Item Get-ItemProperty Get-Location Get-Member Get-PfxCertificate Get-Process Get-PSDrive Get-PSProvider Get-PSSnapin Get-Service Get-TraceSource Get-UICulture Get-Unique Get-Variable Get-WmiObject Group-Object Import-Alias Import-Clixml Import-Csv Invoke-Expression Invoke-History Invoke-Item Join-Path Measure-Command Measure-Object Move-Item Move-ItemProperty New-Alias New-Item New-ItemProperty New-Object New-PSDrive New-Service New-TimeSpan New-Variable Out-Default Out-File Out-Host Out-Null Out-Printer Out-String Pop-Location Push-Location Read-Host Remove-Item Remove-ItemProperty Remove-PSDrive Remove-PSSnapin Remove-Variable Rename-Item Rename-ItemProperty Resolve-Path Restart-Service Resume-Service Select-Object Select-String Set-Acl Set-Alias Set-AuthenticodeSignature Set-Content Set-Date Set-ExecutionPolicy Set-Item Set-ItemProperty Set-Location Set-PSDebug Set-Service Set-TraceSource Set-Variable Sort-Object Split-Path Start-Service Start-Sleep Start-Transcript Stop-Process Stop-Service Stop-Transcript Suspend-Service Tee-Object Test-Path Trace-Command Update-FormatData Update-TypeData Where-Object Write-Debug Write-Error Write-Host Write-Output Write-Progress Write-Verbose Write-Warning",operator:"-ne -eq -lt -gt -ge -le -not -like -notlike -match -notmatch -contains -notcontains -in -notin -replace"},c:[e.HCM,e.NM,o,a,r]}});hljs.registerLanguage("gradle",function(e){return{cI:!0,k:{keyword:"task project allprojects subprojects artifacts buildscript configurations dependencies repositories sourceSets description delete from into include exclude source classpath destinationDir includes options sourceCompatibility targetCompatibility group flatDir doLast doFirst flatten todir fromdir ant def abstract break case catch continue default do else extends final finally for if implements instanceof native new private protected public return static switch synchronized throw throws transient try volatile while strictfp package import false null super this true antlrtask checkstyle codenarc copy boolean byte char class double float int interface long short void compile runTime file fileTree abs any append asList asWritable call collect compareTo count div dump each eachByte eachFile eachLine every find findAll flatten getAt getErr getIn getOut getText grep immutable inject inspect intersect invokeMethods isCase join leftShift minus multiply newInputStream newOutputStream newPrintWriter newReader newWriter next plus pop power previous print println push putAt read readBytes readLines reverse reverseEach round size sort splitEachLine step subMap times toInteger toList tokenize upto waitForOrKill withPrintWriter withReader withStream withWriter withWriterAppend write writeLine"},c:[e.CLCM,e.CBCM,e.ASM,e.QSM,e.NM,e.RM]}});hljs.registerLanguage("erb",function(e){return{sL:"xml",subLanguageMode:"continuous",c:[e.C("<%#","%>"),{b:"<%[%=-]?",e:"[%-]?%>",sL:"ruby",eB:!0,eE:!0}]}});hljs.registerLanguage("swift",function(e){var i={keyword:"class deinit enum extension func import init let protocol static struct subscript typealias var break case continue default do else fallthrough if in for return switch where while as dynamicType is new super self Self Type __COLUMN__ __FILE__ __FUNCTION__ __LINE__ associativity didSet get infix inout left mutating none nonmutating operator override postfix precedence prefix right set unowned unowned safe unsafe weak willSet",literal:"true false nil",built_in:"abs advance alignof alignofValue assert bridgeFromObjectiveC bridgeFromObjectiveCUnconditional bridgeToObjectiveC bridgeToObjectiveCUnconditional c contains count countElements countLeadingZeros debugPrint debugPrintln distance dropFirst dropLast dump encodeBitsAsWords enumerate equal false filter find getBridgedObjectiveCType getVaList indices insertionSort isBridgedToObjectiveC isBridgedVerbatimToObjectiveC isUniquelyReferenced join lexicographicalCompare map max maxElement min minElement nil numericCast partition posix print println quickSort reduce reflect reinterpretCast reverse roundUpToAlignment sizeof sizeofValue sort split startsWith strideof strideofValue swap swift toString transcode true underestimateCount unsafeReflect withExtendedLifetime withObjectAtPlusZero withUnsafePointer withUnsafePointerToObject withUnsafePointers withVaList"},t={cN:"type",b:"\\b[A-Z][\\w']*",r:0},n=e.C("/\\*","\\*/",{c:["self"]}),r={cN:"subst",b:/\\\(/,e:"\\)",k:i,c:[]},s={cN:"number",b:"\\b([\\d_]+(\\.[\\deE_]+)?|0x[a-fA-F0-9_]+(\\.[a-fA-F0-9p_]+)?|0b[01_]+|0o[0-7_]+)\\b",r:0},o=e.inherit(e.QSM,{c:[r,e.BE]});return r.c=[s],{k:i,c:[o,e.CLCM,n,t,s,{cN:"func",bK:"func",e:"{",eE:!0,c:[e.inherit(e.TM,{b:/[A-Za-z$_][0-9A-Za-z$_]*/,i:/\(/}),{cN:"generics",b:/</,e:/>/,i:/>/},{cN:"params",b:/\(/,e:/\)/,endsParent:!0,k:i,c:["self",s,o,e.CBCM,{b:":"}],i:/["']/}],i:/\[|%/},{cN:"class",bK:"struct protocol class extension enum",k:i,e:"\\{",eE:!0,c:[e.inherit(e.TM,{b:/[A-Za-z$_][0-9A-Za-z$_]*/})]},{cN:"preprocessor",b:"(@assignment|@class_protocol|@exported|@final|@lazy|@noreturn|@NSCopying|@NSManaged|@objc|@optional|@required|@auto_closure|@noreturn|@IBAction|@IBDesignable|@IBInspectable|@IBOutlet|@infix|@prefix|@postfix)"}]}});hljs.registerLanguage("lisp",function(b){var e="[a-zA-Z_\\-\\+\\*\\/\\<\\=\\>\\&\\#][a-zA-Z0-9_\\-\\+\\*\\/\\<\\=\\>\\&\\#!]*",c="\\|[^]*?\\|",r="(\\-|\\+)?\\d+(\\.\\d+|\\/\\d+)?((d|e|f|l|s|D|E|F|L|S)(\\+|\\-)?\\d+)?",a={cN:"shebang",b:"^#!",e:"$"},i={cN:"literal",b:"\\b(t{1}|nil)\\b"},l={cN:"number",v:[{b:r,r:0},{b:"#(b|B)[0-1]+(/[0-1]+)?"},{b:"#(o|O)[0-7]+(/[0-7]+)?"},{b:"#(x|X)[0-9a-fA-F]+(/[0-9a-fA-F]+)?"},{b:"#(c|C)\\("+r+" +"+r,e:"\\)"}]},t=b.inherit(b.QSM,{i:null}),d=b.C(";","$",{r:0}),n={cN:"variable",b:"\\*",e:"\\*"},u={cN:"keyword",b:"[:&]"+e},N={b:e,r:0},o={b:c},s={b:"\\(",e:"\\)",c:["self",i,t,l,N]},v={cN:"quoted",c:[l,t,n,u,s,N],v:[{b:"['`]\\(",e:"\\)"},{b:"\\(quote ",e:"\\)",k:"quote"},{b:"'"+c}]},f={cN:"quoted",v:[{b:"'"+e},{b:"#'"+e+"(::"+e+")*"}]},g={cN:"list",b:"\\(\\s*",e:"\\)"},q={eW:!0,r:0};return g.c=[{cN:"keyword",v:[{b:e},{b:c}]},q],q.c=[v,f,g,i,l,t,d,n,u,o,N],{i:/\S/,c:[l,a,i,t,d,v,f,g,N]}});hljs.registerLanguage("rsl",function(e){return{k:{keyword:"float color point normal vector matrix while for if do return else break extern continue",built_in:"abs acos ambient area asin atan atmosphere attribute calculatenormal ceil cellnoise clamp comp concat cos degrees depth Deriv diffuse distance Du Dv environment exp faceforward filterstep floor format fresnel incident length lightsource log match max min mod noise normalize ntransform opposite option phong pnoise pow printf ptlined radians random reflect refract renderinfo round setcomp setxcomp setycomp setzcomp shadow sign sin smoothstep specular specularbrdf spline sqrt step tan texture textureinfo trace transform vtransform xcomp ycomp zcomp"},i:"</",c:[e.CLCM,e.CBCM,e.QSM,e.ASM,e.CNM,{cN:"preprocessor",b:"#",e:"$"},{cN:"shader",bK:"surface displacement light volume imager",e:"\\("},{cN:"shading",bK:"illuminate illuminance gather",e:"\\("}]}});hljs.registerLanguage("scheme",function(e){var t="[^\\(\\)\\[\\]\\{\\}\",'`;#|\\\\\\s]+",r="(\\-|\\+)?\\d+([./]\\d+)?",i=r+"[+\\-]"+r+"i",a={built_in:"case-lambda call/cc class define-class exit-handler field import inherit init-field interface let*-values let-values let/ec mixin opt-lambda override protect provide public rename require require-for-syntax syntax syntax-case syntax-error unit/sig unless when with-syntax and begin call-with-current-continuation call-with-input-file call-with-output-file case cond define define-syntax delay do dynamic-wind else for-each if lambda let let* let-syntax letrec letrec-syntax map or syntax-rules ' * + , ,@ - ... / ; < <= = => > >= ` abs acos angle append apply asin assoc assq assv atan boolean? caar cadr call-with-input-file call-with-output-file call-with-values car cdddar cddddr cdr ceiling char->integer char-alphabetic? char-ci<=? char-ci<? char-ci=? char-ci>=? char-ci>? char-downcase char-lower-case? char-numeric? char-ready? char-upcase char-upper-case? char-whitespace? char<=? char<? char=? char>=? char>? char? close-input-port close-output-port complex? cons cos current-input-port current-output-port denominator display eof-object? eq? equal? eqv? eval even? exact->inexact exact? exp expt floor force gcd imag-part inexact->exact inexact? input-port? integer->char integer? interaction-environment lcm length list list->string list->vector list-ref list-tail list? load log magnitude make-polar make-rectangular make-string make-vector max member memq memv min modulo negative? newline not null-environment null? number->string number? numerator odd? open-input-file open-output-file output-port? pair? peek-char port? positive? procedure? quasiquote quote quotient rational? rationalize read read-char real-part real? remainder reverse round scheme-report-environment set! set-car! set-cdr! sin sqrt string string->list string->number string->symbol string-append string-ci<=? string-ci<? string-ci=? string-ci>=? string-ci>? string-copy string-fill! string-length string-ref string-set! string<=? string<? string=? string>=? string>? string? substring symbol->string symbol? tan transcript-off transcript-on truncate values vector vector->list vector-fill! vector-length vector-ref vector-set! with-input-from-file with-output-to-file write write-char zero?"},n={cN:"shebang",b:"^#!",e:"$"},c={cN:"literal",b:"(#t|#f|#\\\\"+t+"|#\\\\.)"},l={cN:"number",v:[{b:r,r:0},{b:i,r:0},{b:"#b[0-1]+(/[0-1]+)?"},{b:"#o[0-7]+(/[0-7]+)?"},{b:"#x[0-9a-f]+(/[0-9a-f]+)?"}]},s=e.QSM,o=[e.C(";","$",{r:0}),e.C("#\\|","\\|#")],u={b:t,r:0},p={cN:"variable",b:"'"+t},d={eW:!0,r:0},g={cN:"list",v:[{b:"\\(",e:"\\)"},{b:"\\[",e:"\\]"}],c:[{cN:"keyword",b:t,l:t,k:a},d]};return d.c=[c,l,s,u,p,g].concat(o),{i:/\S/,c:[n,l,s,p,g].concat(o)}});hljs.registerLanguage("stata",function(e){return{aliases:["do","ado"],cI:!0,k:"if else in foreach for forv forva forval forvalu forvalue forvalues by bys bysort xi quietly qui capture about ac ac_7 acprplot acprplot_7 adjust ado adopath adoupdate alpha ameans an ano anov anova anova_estat anova_terms anovadef aorder ap app appe appen append arch arch_dr arch_estat arch_p archlm areg areg_p args arima arima_dr arima_estat arima_p as asmprobit asmprobit_estat asmprobit_lf asmprobit_mfx__dlg asmprobit_p ass asse asser assert avplot avplot_7 avplots avplots_7 bcskew0 bgodfrey binreg bip0_lf biplot bipp_lf bipr_lf bipr_p biprobit bitest bitesti bitowt blogit bmemsize boot bootsamp bootstrap bootstrap_8 boxco_l boxco_p boxcox boxcox_6 boxcox_p bprobit br break brier bro brow brows browse brr brrstat bs bs_7 bsampl_w bsample bsample_7 bsqreg bstat bstat_7 bstat_8 bstrap bstrap_7 ca ca_estat ca_p cabiplot camat canon canon_8 canon_8_p canon_estat canon_p cap caprojection capt captu captur capture cat cc cchart cchart_7 cci cd censobs_table centile cf char chdir checkdlgfiles checkestimationsample checkhlpfiles checksum chelp ci cii cl class classutil clear cli clis clist clo clog clog_lf clog_p clogi clogi_sw clogit clogit_lf clogit_p clogitp clogl_sw cloglog clonevar clslistarray cluster cluster_measures cluster_stop cluster_tree cluster_tree_8 clustermat cmdlog cnr cnre cnreg cnreg_p cnreg_sw cnsreg codebook collaps4 collapse colormult_nb colormult_nw compare compress conf confi confir confirm conren cons const constr constra constrai constrain constraint continue contract copy copyright copysource cor corc corr corr2data corr_anti corr_kmo corr_smc corre correl correla correlat correlate corrgram cou coun count cox cox_p cox_sw coxbase coxhaz coxvar cprplot cprplot_7 crc cret cretu cretur creturn cross cs cscript cscript_log csi ct ct_is ctset ctst_5 ctst_st cttost cumsp cumsp_7 cumul cusum cusum_7 cutil d datasig datasign datasigna datasignat datasignatu datasignatur datasignature datetof db dbeta de dec deco decod decode deff des desc descr descri describ describe destring dfbeta dfgls dfuller di di_g dir dirstats dis discard disp disp_res disp_s displ displa display distinct do doe doed doedi doedit dotplot dotplot_7 dprobit drawnorm drop ds ds_util dstdize duplicates durbina dwstat dydx e ed edi edit egen eivreg emdef en enc enco encod encode eq erase ereg ereg_lf ereg_p ereg_sw ereghet ereghet_glf ereghet_glf_sh ereghet_gp ereghet_ilf ereghet_ilf_sh ereghet_ip eret eretu eretur ereturn err erro error est est_cfexist est_cfname est_clickable est_expand est_hold est_table est_unhold est_unholdok estat estat_default estat_summ estat_vce_only esti estimates etodow etof etomdy ex exi exit expand expandcl fac fact facto factor factor_estat factor_p factor_pca_rotated factor_rotate factormat fcast fcast_compute fcast_graph fdades fdadesc fdadescr fdadescri fdadescrib fdadescribe fdasav fdasave fdause fh_st file open file read file close file filefilter fillin find_hlp_file findfile findit findit_7 fit fl fli flis flist for5_0 form forma format fpredict frac_154 frac_adj frac_chk frac_cox frac_ddp frac_dis frac_dv frac_in frac_mun frac_pp frac_pq frac_pv frac_wgt frac_xo fracgen fracplot fracplot_7 fracpoly fracpred fron_ex fron_hn fron_p fron_tn fron_tn2 frontier ftodate ftoe ftomdy ftowdate g gamhet_glf gamhet_gp gamhet_ilf gamhet_ip gamma gamma_d2 gamma_p gamma_sw gammahet gdi_hexagon gdi_spokes ge gen gene gener genera generat generate genrank genstd genvmean gettoken gl gladder gladder_7 glim_l01 glim_l02 glim_l03 glim_l04 glim_l05 glim_l06 glim_l07 glim_l08 glim_l09 glim_l10 glim_l11 glim_l12 glim_lf glim_mu glim_nw1 glim_nw2 glim_nw3 glim_p glim_v1 glim_v2 glim_v3 glim_v4 glim_v5 glim_v6 glim_v7 glm glm_6 glm_p glm_sw glmpred glo glob globa global glogit glogit_8 glogit_p gmeans gnbre_lf gnbreg gnbreg_5 gnbreg_p gomp_lf gompe_sw gomper_p gompertz gompertzhet gomphet_glf gomphet_glf_sh gomphet_gp gomphet_ilf gomphet_ilf_sh gomphet_ip gphdot gphpen gphprint gprefs gprobi_p gprobit gprobit_8 gr gr7 gr_copy gr_current gr_db gr_describe gr_dir gr_draw gr_draw_replay gr_drop gr_edit gr_editviewopts gr_example gr_example2 gr_export gr_print gr_qscheme gr_query gr_read gr_rename gr_replay gr_save gr_set gr_setscheme gr_table gr_undo gr_use graph graph7 grebar greigen greigen_7 greigen_8 grmeanby grmeanby_7 gs_fileinfo gs_filetype gs_graphinfo gs_stat gsort gwood h hadimvo hareg hausman haver he heck_d2 heckma_p heckman heckp_lf heckpr_p heckprob hel help hereg hetpr_lf hetpr_p hetprob hettest hexdump hilite hist hist_7 histogram hlogit hlu hmeans hotel hotelling hprobit hreg hsearch icd9 icd9_ff icd9p iis impute imtest inbase include inf infi infil infile infix inp inpu input ins insheet insp inspe inspec inspect integ inten intreg intreg_7 intreg_p intrg2_ll intrg_ll intrg_ll2 ipolate iqreg ir irf irf_create irfm iri is_svy is_svysum isid istdize ivprob_1_lf ivprob_lf ivprobit ivprobit_p ivreg ivreg_footnote ivtob_1_lf ivtob_lf ivtobit ivtobit_p jackknife jacknife jknife jknife_6 jknife_8 jkstat joinby kalarma1 kap kap_3 kapmeier kappa kapwgt kdensity kdensity_7 keep ksm ksmirnov ktau kwallis l la lab labe label labelbook ladder levels levelsof leverage lfit lfit_p li lincom line linktest lis list lloghet_glf lloghet_glf_sh lloghet_gp lloghet_ilf lloghet_ilf_sh lloghet_ip llogi_sw llogis_p llogist llogistic llogistichet lnorm_lf lnorm_sw lnorma_p lnormal lnormalhet lnormhet_glf lnormhet_glf_sh lnormhet_gp lnormhet_ilf lnormhet_ilf_sh lnormhet_ip lnskew0 loadingplot loc loca local log logi logis_lf logistic logistic_p logit logit_estat logit_p loglogs logrank loneway lookfor lookup lowess lowess_7 lpredict lrecomp lroc lroc_7 lrtest ls lsens lsens_7 lsens_x lstat ltable ltable_7 ltriang lv lvr2plot lvr2plot_7 m ma mac macr macro makecns man manova manova_estat manova_p manovatest mantel mark markin markout marksample mat mat_capp mat_order mat_put_rr mat_rapp mata mata_clear mata_describe mata_drop mata_matdescribe mata_matsave mata_matuse mata_memory mata_mlib mata_mosave mata_rename mata_which matalabel matcproc matlist matname matr matri matrix matrix_input__dlg matstrik mcc mcci md0_ md1_ md1debug_ md2_ md2debug_ mds mds_estat mds_p mdsconfig mdslong mdsmat mdsshepard mdytoe mdytof me_derd mean means median memory memsize meqparse mer merg merge mfp mfx mhelp mhodds minbound mixed_ll mixed_ll_reparm mkassert mkdir mkmat mkspline ml ml_5 ml_adjs ml_bhhhs ml_c_d ml_check ml_clear ml_cnt ml_debug ml_defd ml_e0 ml_e0_bfgs ml_e0_cycle ml_e0_dfp ml_e0i ml_e1 ml_e1_bfgs ml_e1_bhhh ml_e1_cycle ml_e1_dfp ml_e2 ml_e2_cycle ml_ebfg0 ml_ebfr0 ml_ebfr1 ml_ebh0q ml_ebhh0 ml_ebhr0 ml_ebr0i ml_ecr0i ml_edfp0 ml_edfr0 ml_edfr1 ml_edr0i ml_eds ml_eer0i ml_egr0i ml_elf ml_elf_bfgs ml_elf_bhhh ml_elf_cycle ml_elf_dfp ml_elfi ml_elfs ml_enr0i ml_enrr0 ml_erdu0 ml_erdu0_bfgs ml_erdu0_bhhh ml_erdu0_bhhhq ml_erdu0_cycle ml_erdu0_dfp ml_erdu0_nrbfgs ml_exde ml_footnote ml_geqnr ml_grad0 ml_graph ml_hbhhh ml_hd0 ml_hold ml_init ml_inv ml_log ml_max ml_mlout ml_mlout_8 ml_model ml_nb0 ml_opt ml_p ml_plot ml_query ml_rdgrd ml_repor ml_s_e ml_score ml_searc ml_technique ml_unhold mleval mlf_ mlmatbysum mlmatsum mlog mlogi mlogit mlogit_footnote mlogit_p mlopts mlsum mlvecsum mnl0_ mor more mov move mprobit mprobit_lf mprobit_p mrdu0_ mrdu1_ mvdecode mvencode mvreg mvreg_estat n nbreg nbreg_al nbreg_lf nbreg_p nbreg_sw nestreg net newey newey_7 newey_p news nl nl_7 nl_9 nl_9_p nl_p nl_p_7 nlcom nlcom_p nlexp2 nlexp2_7 nlexp2a nlexp2a_7 nlexp3 nlexp3_7 nlgom3 nlgom3_7 nlgom4 nlgom4_7 nlinit nllog3 nllog3_7 nllog4 nllog4_7 nlog_rd nlogit nlogit_p nlogitgen nlogittree nlpred no nobreak noi nois noisi noisil noisily note notes notes_dlg nptrend numlabel numlist odbc old_ver olo olog ologi ologi_sw ologit ologit_p ologitp on one onew onewa oneway op_colnm op_comp op_diff op_inv op_str opr opro oprob oprob_sw oprobi oprobi_p oprobit oprobitp opts_exclusive order orthog orthpoly ou out outf outfi outfil outfile outs outsh outshe outshee outsheet ovtest pac pac_7 palette parse parse_dissim pause pca pca_8 pca_display pca_estat pca_p pca_rotate pcamat pchart pchart_7 pchi pchi_7 pcorr pctile pentium pergram pergram_7 permute permute_8 personal peto_st pkcollapse pkcross pkequiv pkexamine pkexamine_7 pkshape pksumm pksumm_7 pl plo plot plugin pnorm pnorm_7 poisgof poiss_lf poiss_sw poisso_p poisson poisson_estat post postclose postfile postutil pperron pr prais prais_e prais_e2 prais_p predict predictnl preserve print pro prob probi probit probit_estat probit_p proc_time procoverlay procrustes procrustes_estat procrustes_p profiler prog progr progra program prop proportion prtest prtesti pwcorr pwd q\\s qby qbys qchi qchi_7 qladder qladder_7 qnorm qnorm_7 qqplot qqplot_7 qreg qreg_c qreg_p qreg_sw qu quadchk quantile quantile_7 que quer query range ranksum ratio rchart rchart_7 rcof recast reclink recode reg reg3 reg3_p regdw regr regre regre_p2 regres regres_p regress regress_estat regriv_p remap ren rena renam rename renpfix repeat replace report reshape restore ret retu retur return rm rmdir robvar roccomp roccomp_7 roccomp_8 rocf_lf rocfit rocfit_8 rocgold rocplot rocplot_7 roctab roctab_7 rolling rologit rologit_p rot rota rotat rotate rotatemat rreg rreg_p ru run runtest rvfplot rvfplot_7 rvpplot rvpplot_7 sa safesum sample sampsi sav save savedresults saveold sc sca scal scala scalar scatter scm_mine sco scob_lf scob_p scobi_sw scobit scor score scoreplot scoreplot_help scree screeplot screeplot_help sdtest sdtesti se search separate seperate serrbar serrbar_7 serset set set_defaults sfrancia sh she shel shell shewhart shewhart_7 signestimationsample signrank signtest simul simul_7 simulate simulate_8 sktest sleep slogit slogit_d2 slogit_p smooth snapspan so sor sort spearman spikeplot spikeplot_7 spikeplt spline_x split sqreg sqreg_p sret sretu sretur sreturn ssc st st_ct st_hc st_hcd st_hcd_sh st_is st_issys st_note st_promo st_set st_show st_smpl st_subid stack statsby statsby_8 stbase stci stci_7 stcox stcox_estat stcox_fr stcox_fr_ll stcox_p stcox_sw stcoxkm stcoxkm_7 stcstat stcurv stcurve stcurve_7 stdes stem stepwise stereg stfill stgen stir stjoin stmc stmh stphplot stphplot_7 stphtest stphtest_7 stptime strate strate_7 streg streg_sw streset sts sts_7 stset stsplit stsum sttocc sttoct stvary stweib su suest suest_8 sum summ summa summar summari summariz summarize sunflower sureg survcurv survsum svar svar_p svmat svy svy_disp svy_dreg svy_est svy_est_7 svy_estat svy_get svy_gnbreg_p svy_head svy_header svy_heckman_p svy_heckprob_p svy_intreg_p svy_ivreg_p svy_logistic_p svy_logit_p svy_mlogit_p svy_nbreg_p svy_ologit_p svy_oprobit_p svy_poisson_p svy_probit_p svy_regress_p svy_sub svy_sub_7 svy_x svy_x_7 svy_x_p svydes svydes_8 svygen svygnbreg svyheckman svyheckprob svyintreg svyintreg_7 svyintrg svyivreg svylc svylog_p svylogit svymarkout svymarkout_8 svymean svymlog svymlogit svynbreg svyolog svyologit svyoprob svyoprobit svyopts svypois svypois_7 svypoisson svyprobit svyprobt svyprop svyprop_7 svyratio svyreg svyreg_p svyregress svyset svyset_7 svyset_8 svytab svytab_7 svytest svytotal sw sw_8 swcnreg swcox swereg swilk swlogis swlogit swologit swoprbt swpois swprobit swqreg swtobit swweib symmetry symmi symplot symplot_7 syntax sysdescribe sysdir sysuse szroeter ta tab tab1 tab2 tab_or tabd tabdi tabdis tabdisp tabi table tabodds tabodds_7 tabstat tabu tabul tabula tabulat tabulate te tempfile tempname tempvar tes test testnl testparm teststd tetrachoric time_it timer tis tob tobi tobit tobit_p tobit_sw token tokeni tokeniz tokenize tostring total translate translator transmap treat_ll treatr_p treatreg trim trnb_cons trnb_mean trpoiss_d2 trunc_ll truncr_p truncreg tsappend tset tsfill tsline tsline_ex tsreport tsrevar tsrline tsset tssmooth tsunab ttest ttesti tut_chk tut_wait tutorial tw tware_st two twoway twoway__fpfit_serset twoway__function_gen twoway__histogram_gen twoway__ipoint_serset twoway__ipoints_serset twoway__kdensity_gen twoway__lfit_serset twoway__normgen_gen twoway__pci_serset twoway__qfit_serset twoway__scatteri_serset twoway__sunflower_gen twoway_ksm_serset ty typ type typeof u unab unabbrev unabcmd update us use uselabel var var_mkcompanion var_p varbasic varfcast vargranger varirf varirf_add varirf_cgraph varirf_create varirf_ctable varirf_describe varirf_dir varirf_drop varirf_erase varirf_graph varirf_ograph varirf_rename varirf_set varirf_table varlist varlmar varnorm varsoc varstable varstable_w varstable_w2 varwle vce vec vec_fevd vec_mkphi vec_p vec_p_w vecirf_create veclmar veclmar_w vecnorm vecnorm_w vecrank vecstable verinst vers versi versio version view viewsource vif vwls wdatetof webdescribe webseek webuse weib1_lf weib2_lf weib_lf weib_lf0 weibhet_glf weibhet_glf_sh weibhet_glfa weibhet_glfa_sh weibhet_gp weibhet_ilf weibhet_ilf_sh weibhet_ilfa weibhet_ilfa_sh weibhet_ip weibu_sw weibul_p weibull weibull_c weibull_s weibullhet wh whelp whi which whil while wilc_st wilcoxon win wind windo window winexec wntestb wntestb_7 wntestq xchart xchart_7 xcorr xcorr_7 xi xi_6 xmlsav xmlsave xmluse xpose xsh xshe xshel xshell xt_iis xt_tis xtab_p xtabond xtbin_p xtclog xtcloglog xtcloglog_8 xtcloglog_d2 xtcloglog_pa_p xtcloglog_re_p xtcnt_p xtcorr xtdata xtdes xtfront_p xtfrontier xtgee xtgee_elink xtgee_estat xtgee_makeivar xtgee_p xtgee_plink xtgls xtgls_p xthaus xthausman xtht_p xthtaylor xtile xtint_p xtintreg xtintreg_8 xtintreg_d2 xtintreg_p xtivp_1 xtivp_2 xtivreg xtline xtline_ex xtlogit xtlogit_8 xtlogit_d2 xtlogit_fe_p xtlogit_pa_p xtlogit_re_p xtmixed xtmixed_estat xtmixed_p xtnb_fe xtnb_lf xtnbreg xtnbreg_pa_p xtnbreg_refe_p xtpcse xtpcse_p xtpois xtpoisson xtpoisson_d2 xtpoisson_pa_p xtpoisson_refe_p xtpred xtprobit xtprobit_8 xtprobit_d2 xtprobit_re_p xtps_fe xtps_lf xtps_ren xtps_ren_8 xtrar_p xtrc xtrc_p xtrchh xtrefe_p xtreg xtreg_be xtreg_fe xtreg_ml xtreg_pa_p xtreg_re xtregar xtrere_p xtset xtsf_ll xtsf_llti xtsum xttab xttest0 xttobit xttobit_8 xttobit_p xttrans yx yxview__barlike_draw yxview_area_draw yxview_bar_draw yxview_dot_draw yxview_dropline_draw yxview_function_draw yxview_iarrow_draw yxview_ilabels_draw yxview_normal_draw yxview_pcarrow_draw yxview_pcbarrow_draw yxview_pccapsym_draw yxview_pcscatter_draw yxview_pcspike_draw yxview_rarea_draw yxview_rbar_draw yxview_rbarm_draw yxview_rcap_draw yxview_rcapsym_draw yxview_rconnected_draw yxview_rline_draw yxview_rscatter_draw yxview_rspike_draw yxview_spike_draw yxview_sunflower_draw zap_s zinb zinb_llf zinb_plf zip zip_llf zip_p zip_plf zt_ct_5 zt_hc_5 zt_hcd_5 zt_is_5 zt_iss_5 zt_sho_5 zt_smp_5 ztbase_5 ztcox_5 ztdes_5 ztereg_5 ztfill_5 ztgen_5 ztir_5 ztjoin_5 ztnb ztnb_p ztp ztp_p zts_5 ztset_5 ztspli_5 ztsum_5 zttoct_5 ztvary_5 ztweib_5",c:[{cN:"label",v:[{b:"\\$\\{?[a-zA-Z0-9_]+\\}?"},{b:"`[a-zA-Z0-9_]+'"}]},{cN:"string",v:[{b:'`"[^\r\n]*?"\''},{b:'"[^\r\n"]*"'}]},{cN:"literal",v:[{b:"\\b(abs|acos|asin|atan|atan2|atanh|ceil|cloglog|comb|cos|digamma|exp|floor|invcloglog|invlogit|ln|lnfact|lnfactorial|lngamma|log|log10|max|min|mod|reldif|round|sign|sin|sqrt|sum|tan|tanh|trigamma|trunc|betaden|Binomial|binorm|binormal|chi2|chi2tail|dgammapda|dgammapdada|dgammapdadx|dgammapdx|dgammapdxdx|F|Fden|Ftail|gammaden|gammap|ibeta|invbinomial|invchi2|invchi2tail|invF|invFtail|invgammap|invibeta|invnchi2|invnFtail|invnibeta|invnorm|invnormal|invttail|nbetaden|nchi2|nFden|nFtail|nibeta|norm|normal|normalden|normd|npnchi2|tden|ttail|uniform|abbrev|char|index|indexnot|length|lower|ltrim|match|plural|proper|real|regexm|regexr|regexs|reverse|rtrim|string|strlen|strlower|strltrim|strmatch|strofreal|strpos|strproper|strreverse|strrtrim|strtrim|strupper|subinstr|subinword|substr|trim|upper|word|wordcount|_caller|autocode|byteorder|chop|clip|cond|e|epsdouble|epsfloat|group|inlist|inrange|irecode|matrix|maxbyte|maxdouble|maxfloat|maxint|maxlong|mi|minbyte|mindouble|minfloat|minint|minlong|missing|r|recode|replay|return|s|scalar|d|date|day|dow|doy|halfyear|mdy|month|quarter|week|year|d|daily|dofd|dofh|dofm|dofq|dofw|dofy|h|halfyearly|hofd|m|mofd|monthly|q|qofd|quarterly|tin|twithin|w|weekly|wofd|y|yearly|yh|ym|yofd|yq|yw|cholesky|colnumb|colsof|corr|det|diag|diag0cnt|el|get|hadamard|I|inv|invsym|issym|issymmetric|J|matmissing|matuniform|mreldif|nullmat|rownumb|rowsof|sweep|syminv|trace|vec|vecdiag)(?=\\(|$)"}]},e.C("^[ 	]*\\*.*$",!1),e.CLCM,e.CBCM]}});hljs.registerLanguage("asciidoc",function(e){return{aliases:["adoc"],c:[e.C("^/{4,}\\n","\\n/{4,}$",{r:10}),e.C("^//","$",{r:0}),{cN:"title",b:"^\\.\\w.*$"},{b:"^[=\\*]{4,}\\n",e:"\\n^[=\\*]{4,}$",r:10},{cN:"header",b:"^(={1,5}) .+?( \\1)?$",r:10},{cN:"header",b:"^[^\\[\\]\\n]+?\\n[=\\-~\\^\\+]{2,}$",r:10},{cN:"attribute",b:"^:.+?:",e:"\\s",eE:!0,r:10},{cN:"attribute",b:"^\\[.+?\\]$",r:0},{cN:"blockquote",b:"^_{4,}\\n",e:"\\n_{4,}$",r:10},{cN:"code",b:"^[\\-\\.]{4,}\\n",e:"\\n[\\-\\.]{4,}$",r:10},{b:"^\\+{4,}\\n",e:"\\n\\+{4,}$",c:[{b:"<",e:">",sL:"xml",r:0}],r:10},{cN:"bullet",b:"^(\\*+|\\-+|\\.+|[^\\n]+?::)\\s+"},{cN:"label",b:"^(NOTE|TIP|IMPORTANT|WARNING|CAUTION):\\s+",r:10},{cN:"strong",b:"\\B\\*(?![\\*\\s])",e:"(\\n{2}|\\*)",c:[{b:"\\\\*\\w",r:0}]},{cN:"emphasis",b:"\\B'(?!['\\s])",e:"(\\n{2}|')",c:[{b:"\\\\'\\w",r:0}],r:0},{cN:"emphasis",b:"_(?![_\\s])",e:"(\\n{2}|_)",r:0},{cN:"smartquote",v:[{b:"``.+?''"},{b:"`.+?'"}]},{cN:"code",b:"(`.+?`|\\+.+?\\+)",r:0},{cN:"code",b:"^[ \\t]",e:"$",r:0},{cN:"horizontal_rule",b:"^'{3,}[ \\t]*$",r:10},{b:"(link:)?(http|https|ftp|file|irc|image:?):\\S+\\[.*?\\]",rB:!0,c:[{b:"(link|image:?):",r:0},{cN:"link_url",b:"\\w",e:"[^\\[]+",r:0},{cN:"link_label",b:"\\[",e:"\\]",eB:!0,eE:!0,r:0}],r:10}]}});hljs.registerLanguage("php",function(e){var c={cN:"variable",b:"\\$+[a-zA-Z_-ÿ][a-zA-Z0-9_-ÿ]*"},i={cN:"preprocessor",b:/<\?(php)?|\?>/},a={cN:"string",c:[e.BE,i],v:[{b:'b"',e:'"'},{b:"b'",e:"'"},e.inherit(e.ASM,{i:null}),e.inherit(e.QSM,{i:null})]},n={v:[e.BNM,e.CNM]};return{aliases:["php3","php4","php5","php6"],cI:!0,k:"and include_once list abstract global private echo interface as static endswitch array null if endwhile or const for endforeach self var while isset public protected exit foreach throw elseif include __FILE__ empty require_once do xor return parent clone use __CLASS__ __LINE__ else break print eval new catch __METHOD__ case exception default die require __FUNCTION__ enddeclare final try switch continue endfor endif declare unset true false trait goto instanceof insteadof __DIR__ __NAMESPACE__ yield finally",c:[e.CLCM,e.HCM,e.C("/\\*","\\*/",{c:[{cN:"phpdoc",b:"\\s@[A-Za-z]+"},i]}),e.C("__halt_compiler.+?;",!1,{eW:!0,k:"__halt_compiler",l:e.UIR}),{cN:"string",b:"<<<['\"]?\\w+['\"]?$",e:"^\\w+;",c:[e.BE]},i,c,{b:/(::|->)+[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/},{cN:"function",bK:"function",e:/[;{]/,eE:!0,i:"\\$|\\[|%",c:[e.UTM,{cN:"params",b:"\\(",e:"\\)",c:["self",c,e.CBCM,a,n]}]},{cN:"class",bK:"class interface",e:"{",eE:!0,i:/[:\(\$"]/,c:[{bK:"extends implements"},e.UTM]},{bK:"namespace",e:";",i:/[\.']/,c:[e.UTM]},{bK:"use",e:";",c:[e.UTM]},{b:"=>"},a,n]}});hljs.registerLanguage("java",function(e){var a=e.UIR+"(<"+e.UIR+">)?",t="false synchronized int abstract float private char boolean static null if const for true while long strictfp finally protected import native final void enum else break transient catch instanceof byte super volatile case assert short package default double public try this switch continue throws protected public private",c="(\\b(0b[01_]+)|\\b0[xX][a-fA-F0-9_]+|(\\b[\\d_]+(\\.[\\d_]*)?|\\.[\\d_]+)([eE][-+]?\\d+)?)[lLfF]?",r={cN:"number",b:c,r:0};return{aliases:["jsp"],k:t,i:/<\//,c:[{cN:"javadoc",b:"/\\*\\*",e:"\\*/",r:0,c:[{cN:"javadoctag",b:"(^|\\s)@[A-Za-z]+"}]},e.CLCM,e.CBCM,e.ASM,e.QSM,{cN:"class",bK:"class interface",e:/[{;=]/,eE:!0,k:"class interface",i:/[:"\[\]]/,c:[{bK:"extends implements"},e.UTM]},{bK:"new throw return",r:0},{cN:"function",b:"("+a+"\\s+)+"+e.UIR+"\\s*\\(",rB:!0,e:/[{;=]/,eE:!0,k:t,c:[{b:e.UIR+"\\s*\\(",rB:!0,r:0,c:[e.UTM]},{cN:"params",b:/\(/,e:/\)/,k:t,r:0,c:[e.ASM,e.QSM,e.CNM,e.CBCM]},e.CLCM,e.CBCM]},r,{cN:"annotation",b:"@[A-Za-z]+"}]}});hljs.registerLanguage("glsl",function(e){return{k:{keyword:"atomic_uint attribute bool break bvec2 bvec3 bvec4 case centroid coherent const continue default discard dmat2 dmat2x2 dmat2x3 dmat2x4 dmat3 dmat3x2 dmat3x3 dmat3x4 dmat4 dmat4x2 dmat4x3 dmat4x4 do double dvec2 dvec3 dvec4 else flat float for highp if iimage1D iimage1DArray iimage2D iimage2DArray iimage2DMS iimage2DMSArray iimage2DRect iimage3D iimageBuffer iimageCube iimageCubeArray image1D image1DArray image2D image2DArray image2DMS image2DMSArray image2DRect image3D imageBuffer imageCube imageCubeArray in inout int invariant isampler1D isampler1DArray isampler2D isampler2DArray isampler2DMS isampler2DMSArray isampler2DRect isampler3D isamplerBuffer isamplerCube isamplerCubeArray ivec2 ivec3 ivec4 layout lowp mat2 mat2x2 mat2x3 mat2x4 mat3 mat3x2 mat3x3 mat3x4 mat4 mat4x2 mat4x3 mat4x4 mediump noperspective out patch precision readonly restrict return sample sampler1D sampler1DArray sampler1DArrayShadow sampler1DShadow sampler2D sampler2DArray sampler2DArrayShadow sampler2DMS sampler2DMSArray sampler2DRect sampler2DRectShadow sampler2DShadow sampler3D samplerBuffer samplerCube samplerCubeArray samplerCubeArrayShadow samplerCubeShadow smooth struct subroutine switch uimage1D uimage1DArray uimage2D uimage2DArray uimage2DMS uimage2DMSArray uimage2DRect uimage3D uimageBuffer uimageCube uimageCubeArray uint uniform usampler1D usampler1DArray usampler2D usampler2DArray usampler2DMS usampler2DMSArray usampler2DRect usampler3D usamplerBuffer usamplerCube usamplerCubeArray uvec2 uvec3 uvec4 varying vec2 vec3 vec4 void volatile while writeonly",built_in:"gl_BackColor gl_BackLightModelProduct gl_BackLightProduct gl_BackMaterial gl_BackSecondaryColor gl_ClipDistance gl_ClipPlane gl_ClipVertex gl_Color gl_DepthRange gl_EyePlaneQ gl_EyePlaneR gl_EyePlaneS gl_EyePlaneT gl_Fog gl_FogCoord gl_FogFragCoord gl_FragColor gl_FragCoord gl_FragData gl_FragDepth gl_FrontColor gl_FrontFacing gl_FrontLightModelProduct gl_FrontLightProduct gl_FrontMaterial gl_FrontSecondaryColor gl_InstanceID gl_InvocationID gl_Layer gl_LightModel gl_LightSource gl_MaxAtomicCounterBindings gl_MaxAtomicCounterBufferSize gl_MaxClipDistances gl_MaxClipPlanes gl_MaxCombinedAtomicCounterBuffers gl_MaxCombinedAtomicCounters gl_MaxCombinedImageUniforms gl_MaxCombinedImageUnitsAndFragmentOutputs gl_MaxCombinedTextureImageUnits gl_MaxDrawBuffers gl_MaxFragmentAtomicCounterBuffers gl_MaxFragmentAtomicCounters gl_MaxFragmentImageUniforms gl_MaxFragmentInputComponents gl_MaxFragmentUniformComponents gl_MaxFragmentUniformVectors gl_MaxGeometryAtomicCounterBuffers gl_MaxGeometryAtomicCounters gl_MaxGeometryImageUniforms gl_MaxGeometryInputComponents gl_MaxGeometryOutputComponents gl_MaxGeometryOutputVertices gl_MaxGeometryTextureImageUnits gl_MaxGeometryTotalOutputComponents gl_MaxGeometryUniformComponents gl_MaxGeometryVaryingComponents gl_MaxImageSamples gl_MaxImageUnits gl_MaxLights gl_MaxPatchVertices gl_MaxProgramTexelOffset gl_MaxTessControlAtomicCounterBuffers gl_MaxTessControlAtomicCounters gl_MaxTessControlImageUniforms gl_MaxTessControlInputComponents gl_MaxTessControlOutputComponents gl_MaxTessControlTextureImageUnits gl_MaxTessControlTotalOutputComponents gl_MaxTessControlUniformComponents gl_MaxTessEvaluationAtomicCounterBuffers gl_MaxTessEvaluationAtomicCounters gl_MaxTessEvaluationImageUniforms gl_MaxTessEvaluationInputComponents gl_MaxTessEvaluationOutputComponents gl_MaxTessEvaluationTextureImageUnits gl_MaxTessEvaluationUniformComponents gl_MaxTessGenLevel gl_MaxTessPatchComponents gl_MaxTextureCoords gl_MaxTextureImageUnits gl_MaxTextureUnits gl_MaxVaryingComponents gl_MaxVaryingFloats gl_MaxVaryingVectors gl_MaxVertexAtomicCounterBuffers gl_MaxVertexAtomicCounters gl_MaxVertexAttribs gl_MaxVertexImageUniforms gl_MaxVertexOutputComponents gl_MaxVertexTextureImageUnits gl_MaxVertexUniformComponents gl_MaxVertexUniformVectors gl_MaxViewports gl_MinProgramTexelOffsetgl_ModelViewMatrix gl_ModelViewMatrixInverse gl_ModelViewMatrixInverseTranspose gl_ModelViewMatrixTranspose gl_ModelViewProjectionMatrix gl_ModelViewProjectionMatrixInverse gl_ModelViewProjectionMatrixInverseTranspose gl_ModelViewProjectionMatrixTranspose gl_MultiTexCoord0 gl_MultiTexCoord1 gl_MultiTexCoord2 gl_MultiTexCoord3 gl_MultiTexCoord4 gl_MultiTexCoord5 gl_MultiTexCoord6 gl_MultiTexCoord7 gl_Normal gl_NormalMatrix gl_NormalScale gl_ObjectPlaneQ gl_ObjectPlaneR gl_ObjectPlaneS gl_ObjectPlaneT gl_PatchVerticesIn gl_PerVertex gl_Point gl_PointCoord gl_PointSize gl_Position gl_PrimitiveID gl_PrimitiveIDIn gl_ProjectionMatrix gl_ProjectionMatrixInverse gl_ProjectionMatrixInverseTranspose gl_ProjectionMatrixTranspose gl_SampleID gl_SampleMask gl_SampleMaskIn gl_SamplePosition gl_SecondaryColor gl_TessCoord gl_TessLevelInner gl_TessLevelOuter gl_TexCoord gl_TextureEnvColor gl_TextureMatrixInverseTranspose gl_TextureMatrixTranspose gl_Vertex gl_VertexID gl_ViewportIndex gl_in gl_out EmitStreamVertex EmitVertex EndPrimitive EndStreamPrimitive abs acos acosh all any asin asinh atan atanh atomicCounter atomicCounterDecrement atomicCounterIncrement barrier bitCount bitfieldExtract bitfieldInsert bitfieldReverse ceil clamp cos cosh cross dFdx dFdy degrees determinant distance dot equal exp exp2 faceforward findLSB findMSB floatBitsToInt floatBitsToUint floor fma fract frexp ftransform fwidth greaterThan greaterThanEqual imageAtomicAdd imageAtomicAnd imageAtomicCompSwap imageAtomicExchange imageAtomicMax imageAtomicMin imageAtomicOr imageAtomicXor imageLoad imageStore imulExtended intBitsToFloat interpolateAtCentroid interpolateAtOffset interpolateAtSample inverse inversesqrt isinf isnan ldexp length lessThan lessThanEqual log log2 matrixCompMult max memoryBarrier min mix mod modf noise1 noise2 noise3 noise4 normalize not notEqual outerProduct packDouble2x32 packHalf2x16 packSnorm2x16 packSnorm4x8 packUnorm2x16 packUnorm4x8 pow radians reflect refract round roundEven shadow1D shadow1DLod shadow1DProj shadow1DProjLod shadow2D shadow2DLod shadow2DProj shadow2DProjLod sign sin sinh smoothstep sqrt step tan tanh texelFetch texelFetchOffset texture texture1D texture1DLod texture1DProj texture1DProjLod texture2D texture2DLod texture2DProj texture2DProjLod texture3D texture3DLod texture3DProj texture3DProjLod textureCube textureCubeLod textureGather textureGatherOffset textureGatherOffsets textureGrad textureGradOffset textureLod textureLodOffset textureOffset textureProj textureProjGrad textureProjGradOffset textureProjLod textureProjLodOffset textureProjOffset textureQueryLod textureSize transpose trunc uaddCarry uintBitsToFloat umulExtended unpackDouble2x32 unpackHalf2x16 unpackSnorm2x16 unpackSnorm4x8 unpackUnorm2x16 unpackUnorm4x8 usubBorrow gl_TextureMatrix gl_TextureMatrixInverse",literal:"true false"},i:'"',c:[e.CLCM,e.CBCM,e.CNM,{cN:"preprocessor",b:"#",e:"$"}]}});hljs.registerLanguage("lua",function(e){var t="\\[=*\\[",a="\\]=*\\]",r={b:t,e:a,c:["self"]},n=[e.C("--(?!"+t+")","$"),e.C("--"+t,a,{c:[r],r:10})];return{l:e.UIR,k:{keyword:"and break do else elseif end false for if in local nil not or repeat return then true until while",built_in:"_G _VERSION assert collectgarbage dofile error getfenv getmetatable ipairs load loadfile loadstring module next pairs pcall print rawequal rawget rawset require select setfenv setmetatable tonumber tostring type unpack xpcall coroutine debug io math os package string table"},c:n.concat([{cN:"function",bK:"function",e:"\\)",c:[e.inherit(e.TM,{b:"([_a-zA-Z]\\w*\\.)*([_a-zA-Z]\\w*:)?[_a-zA-Z]\\w*"}),{cN:"params",b:"\\(",eW:!0,c:n}].concat(n)},e.CNM,e.ASM,e.QSM,{cN:"string",b:t,e:a,c:[r],r:5}])}});hljs.registerLanguage("protobuf",function(e){return{k:{keyword:"package import option optional required repeated group",built_in:"double float int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32 sfixed64 bool string bytes",literal:"true false"},c:[e.QSM,e.NM,e.CLCM,{cN:"class",bK:"message enum service",e:/\{/,i:/\n/,c:[e.inherit(e.TM,{starts:{eW:!0,eE:!0}})]},{cN:"function",bK:"rpc",e:/;/,eE:!0,k:"rpc returns"},{cN:"constant",b:/^\s*[A-Z_]+/,e:/\s*=/,eE:!0}]}});hljs.registerLanguage("gcode",function(e){var N="[A-Z_][A-Z0-9_.]*",i="\\%",c={literal:"",built_in:"",keyword:"IF DO WHILE ENDWHILE CALL ENDIF SUB ENDSUB GOTO REPEAT ENDREPEAT EQ LT GT NE GE LE OR XOR"},r={cN:"preprocessor",b:"([O])([0-9]+)"},l=[e.CLCM,e.CBCM,e.C(/\(/,/\)/),e.inherit(e.CNM,{b:"([-+]?([0-9]*\\.?[0-9]+\\.?))|"+e.CNR}),e.inherit(e.ASM,{i:null}),e.inherit(e.QSM,{i:null}),{cN:"keyword",b:"([G])([0-9]+\\.?[0-9]?)"},{cN:"title",b:"([M])([0-9]+\\.?[0-9]?)"},{cN:"title",b:"(VC|VS|#)",e:"(\\d+)"},{cN:"title",b:"(VZOFX|VZOFY|VZOFZ)"},{cN:"built_in",b:"(ATAN|ABS|ACOS|ASIN|SIN|COS|EXP|FIX|FUP|ROUND|LN|TAN)(\\[)",e:"([-+]?([0-9]*\\.?[0-9]+\\.?))(\\])"},{cN:"label",v:[{b:"N",e:"\\d+",i:"\\W"}]}];return{aliases:["nc"],cI:!0,l:N,k:c,c:[{cN:"preprocessor",b:i},r].concat(l)}});hljs.registerLanguage("vim",function(e){return{l:/[!#@\w]+/,k:{keyword:"N|0 P|0 X|0 a|0 ab abc abo al am an|0 ar arga argd arge argdo argg argl argu as au aug aun b|0 bN ba bad bd be bel bf bl bm bn bo bp br brea breaka breakd breakl bro bufdo buffers bun bw c|0 cN cNf ca cabc caddb cad caddf cal cat cb cc ccl cd ce cex cf cfir cgetb cgete cg changes chd che checkt cl cla clo cm cmapc cme cn cnew cnf cno cnorea cnoreme co col colo com comc comp con conf cope cp cpf cq cr cs cst cu cuna cunme cw d|0 delm deb debugg delc delf dif diffg diffo diffp diffpu diffs diffthis dig di dl dell dj dli do doautoa dp dr ds dsp e|0 ea ec echoe echoh echom echon el elsei em en endfo endf endt endw ene ex exe exi exu f|0 files filet fin fina fini fir fix fo foldc foldd folddoc foldo for fu g|0 go gr grepa gu gv ha h|0 helpf helpg helpt hi hid his i|0 ia iabc if ij il im imapc ime ino inorea inoreme int is isp iu iuna iunme j|0 ju k|0 keepa kee keepj lN lNf l|0 lad laddb laddf la lan lat lb lc lch lcl lcs le lefta let lex lf lfir lgetb lgete lg lgr lgrepa lh ll lla lli lmak lm lmapc lne lnew lnf ln loadk lo loc lockv lol lope lp lpf lr ls lt lu lua luad luaf lv lvimgrepa lw m|0 ma mak map mapc marks mat me menut mes mk mks mksp mkv mkvie mod mz mzf nbc nb nbs n|0 new nm nmapc nme nn nnoreme noa no noh norea noreme norm nu nun nunme ol o|0 om omapc ome on ono onoreme opt ou ounme ow p|0 profd prof pro promptr pc ped pe perld po popu pp pre prev ps pt ptN ptf ptj ptl ptn ptp ptr pts pu pw py3 python3 py3d py3f py pyd pyf q|0 quita qa r|0 rec red redi redr redraws reg res ret retu rew ri rightb rub rubyd rubyf rund ru rv s|0 sN san sa sal sav sb sbN sba sbf sbl sbm sbn sbp sbr scrip scripte scs se setf setg setl sf sfir sh sim sig sil sl sla sm smap smapc sme sn sni sno snor snoreme sor so spelld spe spelli spellr spellu spellw sp spr sre st sta startg startr star stopi stj sts sun sunm sunme sus sv sw sy synti sync t|0 tN tabN tabc tabdo tabe tabf tabfir tabl tabm tabnew tabn tabo tabp tabr tabs tab ta tags tc tcld tclf te tf th tj tl tm tn to tp tr try ts tu u|0 undoj undol una unh unl unlo unm unme uns up v|0 ve verb vert vim vimgrepa vi viu vie vm vmapc vme vne vn vnoreme vs vu vunme windo w|0 wN wa wh wi winc winp wn wp wq wqa ws wu wv x|0 xa xmapc xm xme xn xnoreme xu xunme y|0 z|0 ~ Next Print append abbreviate abclear aboveleft all amenu anoremenu args argadd argdelete argedit argglobal arglocal argument ascii autocmd augroup aunmenu buffer bNext ball badd bdelete behave belowright bfirst blast bmodified bnext botright bprevious brewind break breakadd breakdel breaklist browse bunload bwipeout change cNext cNfile cabbrev cabclear caddbuffer caddexpr caddfile call catch cbuffer cclose center cexpr cfile cfirst cgetbuffer cgetexpr cgetfile chdir checkpath checktime clist clast close cmap cmapclear cmenu cnext cnewer cnfile cnoremap cnoreabbrev cnoremenu copy colder colorscheme command comclear compiler continue confirm copen cprevious cpfile cquit crewind cscope cstag cunmap cunabbrev cunmenu cwindow delete delmarks debug debuggreedy delcommand delfunction diffupdate diffget diffoff diffpatch diffput diffsplit digraphs display deletel djump dlist doautocmd doautoall deletep drop dsearch dsplit edit earlier echo echoerr echohl echomsg else elseif emenu endif endfor endfunction endtry endwhile enew execute exit exusage file filetype find finally finish first fixdel fold foldclose folddoopen folddoclosed foldopen function global goto grep grepadd gui gvim hardcopy help helpfind helpgrep helptags highlight hide history insert iabbrev iabclear ijump ilist imap imapclear imenu inoremap inoreabbrev inoremenu intro isearch isplit iunmap iunabbrev iunmenu join jumps keepalt keepmarks keepjumps lNext lNfile list laddexpr laddbuffer laddfile last language later lbuffer lcd lchdir lclose lcscope left leftabove lexpr lfile lfirst lgetbuffer lgetexpr lgetfile lgrep lgrepadd lhelpgrep llast llist lmake lmap lmapclear lnext lnewer lnfile lnoremap loadkeymap loadview lockmarks lockvar lolder lopen lprevious lpfile lrewind ltag lunmap luado luafile lvimgrep lvimgrepadd lwindow move mark make mapclear match menu menutranslate messages mkexrc mksession mkspell mkvimrc mkview mode mzscheme mzfile nbclose nbkey nbsart next nmap nmapclear nmenu nnoremap nnoremenu noautocmd noremap nohlsearch noreabbrev noremenu normal number nunmap nunmenu oldfiles open omap omapclear omenu only onoremap onoremenu options ounmap ounmenu ownsyntax print profdel profile promptfind promptrepl pclose pedit perl perldo pop popup ppop preserve previous psearch ptag ptNext ptfirst ptjump ptlast ptnext ptprevious ptrewind ptselect put pwd py3do py3file python pydo pyfile quit quitall qall read recover redo redir redraw redrawstatus registers resize retab return rewind right rightbelow ruby rubydo rubyfile rundo runtime rviminfo substitute sNext sandbox sargument sall saveas sbuffer sbNext sball sbfirst sblast sbmodified sbnext sbprevious sbrewind scriptnames scriptencoding scscope set setfiletype setglobal setlocal sfind sfirst shell simalt sign silent sleep slast smagic smapclear smenu snext sniff snomagic snoremap snoremenu sort source spelldump spellgood spellinfo spellrepall spellundo spellwrong split sprevious srewind stop stag startgreplace startreplace startinsert stopinsert stjump stselect sunhide sunmap sunmenu suspend sview swapname syntax syntime syncbind tNext tabNext tabclose tabedit tabfind tabfirst tablast tabmove tabnext tabonly tabprevious tabrewind tag tcl tcldo tclfile tearoff tfirst throw tjump tlast tmenu tnext topleft tprevious trewind tselect tunmenu undo undojoin undolist unabbreviate unhide unlet unlockvar unmap unmenu unsilent update vglobal version verbose vertical vimgrep vimgrepadd visual viusage view vmap vmapclear vmenu vnew vnoremap vnoremenu vsplit vunmap vunmenu write wNext wall while winsize wincmd winpos wnext wprevious wqall wsverb wundo wviminfo xit xall xmapclear xmap xmenu xnoremap xnoremenu xunmap xunmenu yank",built_in:"abs acos add and append argc argidx argv asin atan atan2 browse browsedir bufexists buflisted bufloaded bufname bufnr bufwinnr byte2line byteidx call ceil changenr char2nr cindent clearmatches col complete complete_add complete_check confirm copy cos cosh count cscope_connection cursor deepcopy delete did_filetype diff_filler diff_hlID empty escape eval eventhandler executable exists exp expand extend feedkeys filereadable filewritable filter finddir findfile float2nr floor fmod fnameescape fnamemodify foldclosed foldclosedend foldlevel foldtext foldtextresult foreground function garbagecollect get getbufline getbufvar getchar getcharmod getcmdline getcmdpos getcmdtype getcwd getfontname getfperm getfsize getftime getftype getline getloclist getmatches getpid getpos getqflist getreg getregtype gettabvar gettabwinvar getwinposx getwinposy getwinvar glob globpath has has_key haslocaldir hasmapto histadd histdel histget histnr hlexists hlID hostname iconv indent index input inputdialog inputlist inputrestore inputsave inputsecret insert invert isdirectory islocked items join keys len libcall libcallnr line line2byte lispindent localtime log log10 luaeval map maparg mapcheck match matchadd matcharg matchdelete matchend matchlist matchstr max min mkdir mode mzeval nextnonblank nr2char or pathshorten pow prevnonblank printf pumvisible py3eval pyeval range readfile reltime reltimestr remote_expr remote_foreground remote_peek remote_read remote_send remove rename repeat resolve reverse round screenattr screenchar screencol screenrow search searchdecl searchpair searchpairpos searchpos server2client serverlist setbufvar setcmdpos setline setloclist setmatches setpos setqflist setreg settabvar settabwinvar setwinvar sha256 shellescape shiftwidth simplify sin sinh sort soundfold spellbadword spellsuggest split sqrt str2float str2nr strchars strdisplaywidth strftime stridx string strlen strpart strridx strtrans strwidth submatch substitute synconcealed synID synIDattr synIDtrans synstack system tabpagebuflist tabpagenr tabpagewinnr tagfiles taglist tan tanh tempname tolower toupper tr trunc type undofile undotree values virtcol visualmode wildmenumode winbufnr wincol winheight winline winnr winrestcmd winrestview winsaveview winwidth writefile xor"},i:/[{:]/,c:[e.NM,e.ASM,{cN:"string",b:/"((\\")|[^"\n])*("|\n)/},{cN:"variable",b:/[bwtglsav]:[\w\d_]*/},{cN:"function",bK:"function function!",e:"$",r:0,c:[e.TM,{cN:"params",b:"\\(",e:"\\)"}]}]}});hljs.registerLanguage("processing",function(e){return{k:{keyword:"BufferedReader PVector PFont PImage PGraphics HashMap boolean byte char color double float int long String Array FloatDict FloatList IntDict IntList JSONArray JSONObject Object StringDict StringList Table TableRow XML false synchronized int abstract float private char boolean static null if const for true while long throw strictfp finally protected import native final return void enum else break transient new catch instanceof byte super volatile case assert short package default double public try this switch continue throws protected public private",constant:"P2D P3D HALF_PI PI QUARTER_PI TAU TWO_PI",variable:"displayHeight displayWidth mouseY mouseX mousePressed pmouseX pmouseY key keyCode pixels focused frameCount frameRate height width",title:"setup draw",built_in:"size createGraphics beginDraw createShape loadShape PShape arc ellipse line point quad rect triangle bezier bezierDetail bezierPoint bezierTangent curve curveDetail curvePoint curveTangent curveTightness shape shapeMode beginContour beginShape bezierVertex curveVertex endContour endShape quadraticVertex vertex ellipseMode noSmooth rectMode smooth strokeCap strokeJoin strokeWeight mouseClicked mouseDragged mouseMoved mousePressed mouseReleased mouseWheel keyPressed keyPressedkeyReleased keyTyped print println save saveFrame day hour millis minute month second year background clear colorMode fill noFill noStroke stroke alpha blue brightness color green hue lerpColor red saturation modelX modelY modelZ screenX screenY screenZ ambient emissive shininess specular add createImage beginCamera camera endCamera frustum ortho perspective printCamera printProjection cursor frameRate noCursor exit loop noLoop popStyle pushStyle redraw binary boolean byte char float hex int str unbinary unhex join match matchAll nf nfc nfp nfs split splitTokens trim append arrayCopy concat expand reverse shorten sort splice subset box sphere sphereDetail createInput createReader loadBytes loadJSONArray loadJSONObject loadStrings loadTable loadXML open parseXML saveTable selectFolder selectInput beginRaw beginRecord createOutput createWriter endRaw endRecord PrintWritersaveBytes saveJSONArray saveJSONObject saveStream saveStrings saveXML selectOutput popMatrix printMatrix pushMatrix resetMatrix rotate rotateX rotateY rotateZ scale shearX shearY translate ambientLight directionalLight lightFalloff lights lightSpecular noLights normal pointLight spotLight image imageMode loadImage noTint requestImage tint texture textureMode textureWrap blend copy filter get loadPixels set updatePixels blendMode loadShader PShaderresetShader shader createFont loadFont text textFont textAlign textLeading textMode textSize textWidth textAscent textDescent abs ceil constrain dist exp floor lerp log mag map max min norm pow round sq sqrt acos asin atan atan2 cos degrees radians sin tan noise noiseDetail noiseSeed random randomGaussian randomSeed"},c:[e.CLCM,e.CBCM,e.ASM,e.QSM,e.CNM]}});hljs.registerLanguage("mizar",function(e){return{k:"environ vocabularies notations constructors definitions registrations theorems schemes requirements begin end definition registration cluster existence pred func defpred deffunc theorem proof let take assume then thus hence ex for st holds consider reconsider such that and in provided of as from be being by means equals implies iff redefine define now not or attr is mode suppose per cases set thesis contradiction scheme reserve struct correctness compatibility coherence symmetry assymetry reflexivity irreflexivity connectedness uniqueness commutativity idempotence involutiveness projectivity",c:[e.C("::","$")]}});hljs.registerLanguage("vbnet",function(e){return{aliases:["vb"],cI:!0,k:{keyword:"addhandler addressof alias and andalso aggregate ansi as assembly auto binary by byref byval call case catch class compare const continue custom declare default delegate dim distinct do each equals else elseif end enum erase error event exit explicit finally for friend from function get global goto group handles if implements imports in inherits interface into is isfalse isnot istrue join key let lib like loop me mid mod module mustinherit mustoverride mybase myclass namespace narrowing new next not notinheritable notoverridable of off on operator option optional or order orelse overloads overridable overrides paramarray partial preserve private property protected public raiseevent readonly redim rem removehandler resume return select set shadows shared skip static step stop structure strict sub synclock take text then throw to try unicode until using when where while widening with withevents writeonly xor",built_in:"boolean byte cbool cbyte cchar cdate cdec cdbl char cint clng cobj csbyte cshort csng cstr ctype date decimal directcast double gettype getxmlnamespace iif integer long object sbyte short single string trycast typeof uinteger ulong ushort",literal:"true false nothing"},i:"//|{|}|endif|gosub|variant|wend",c:[e.inherit(e.QSM,{c:[{b:'""'}]}),e.C("'","$",{rB:!0,c:[{cN:"xmlDocTag",b:"'''|<!--|-->",c:[e.PWM]},{cN:"xmlDocTag",b:"</?",e:">",c:[e.PWM]}]}),e.CNM,{cN:"preprocessor",b:"#",e:"$",k:"if else elseif end region externalsource"}]}});hljs.registerLanguage("q",function(e){var s={keyword:"do while select delete by update from",constant:"0b 1b",built_in:"neg not null string reciprocal floor ceiling signum mod xbar xlog and or each scan over prior mmu lsq inv md5 ltime gtime count first var dev med cov cor all any rand sums prds mins maxs fills deltas ratios avgs differ prev next rank reverse iasc idesc asc desc msum mcount mavg mdev xrank mmin mmax xprev rotate distinct group where flip type key til get value attr cut set upsert raze union inter except cross sv vs sublist enlist read0 read1 hopen hclose hdel hsym hcount peach system ltrim rtrim trim lower upper ssr view tables views cols xcols keys xkey xcol xasc xdesc fkeys meta lj aj aj0 ij pj asof uj ww wj wj1 fby xgroup ungroup ej save load rsave rload show csv parse eval min max avg wavg wsum sin cos tan sum",typename:"`float `double int `timestamp `timespan `datetime `time `boolean `symbol `char `byte `short `long `real `month `date `minute `second `guid"};return{aliases:["k","kdb"],k:s,l:/\b(`?)[A-Za-z0-9_]+\b/,c:[e.CLCM,e.QSM,e.CNM]}});hljs.registerLanguage("livescript",function(e){var t={keyword:"in if for while finally new do return else break catch instanceof throw try this switch continue typeof delete debugger case default function var with then unless until loop of by when and or is isnt not it that otherwise from to til fallthrough super case default function var void const let enum export import native __hasProp __extends __slice __bind __indexOf",literal:"true false null undefined yes no on off it that void",built_in:"npm require console print module global window document"},s="[A-Za-z$_](?:-[0-9A-Za-z$_]|[0-9A-Za-z$_])*",i=e.inherit(e.TM,{b:s}),n={cN:"subst",b:/#\{/,e:/}/,k:t},r={cN:"subst",b:/#[A-Za-z$_]/,e:/(?:\-[0-9A-Za-z$_]|[0-9A-Za-z$_])*/,k:t},c=[e.BNM,{cN:"number",b:"(\\b0[xX][a-fA-F0-9_]+)|(\\b\\d(\\d|_\\d)*(\\.(\\d(\\d|_\\d)*)?)?(_*[eE]([-+]\\d(_\\d|\\d)*)?)?[_a-z]*)",r:0,starts:{e:"(\\s*/)?",r:0}},{cN:"string",v:[{b:/'''/,e:/'''/,c:[e.BE]},{b:/'/,e:/'/,c:[e.BE]},{b:/"""/,e:/"""/,c:[e.BE,n,r]},{b:/"/,e:/"/,c:[e.BE,n,r]},{b:/\\/,e:/(\s|$)/,eE:!0}]},{cN:"pi",v:[{b:"//",e:"//[gim]*",c:[n,e.HCM]},{b:/\/(?![ *])(\\\/|.)*?\/[gim]*(?=\W|$)/}]},{cN:"property",b:"@"+s},{b:"``",e:"``",eB:!0,eE:!0,sL:"javascript"}];n.c=c;var a={cN:"params",b:"\\(",rB:!0,c:[{b:/\(/,e:/\)/,k:t,c:["self"].concat(c)}]};return{aliases:["ls"],k:t,i:/\/\*/,c:c.concat([e.C("\\/\\*","\\*\\/"),e.HCM,{cN:"function",c:[i,a],rB:!0,v:[{b:"("+s+"\\s*(?:=|:=)\\s*)?(\\(.*\\))?\\s*\\B\\->\\*?",e:"\\->\\*?"},{b:"("+s+"\\s*(?:=|:=)\\s*)?!?(\\(.*\\))?\\s*\\B[-~]{1,2}>\\*?",e:"[-~]{1,2}>\\*?"},{b:"("+s+"\\s*(?:=|:=)\\s*)?(\\(.*\\))?\\s*\\B!?[-~]{1,2}>\\*?",e:"!?[-~]{1,2}>\\*?"}]},{cN:"class",bK:"class",e:"$",i:/[:="\[\]]/,c:[{bK:"extends",eW:!0,i:/[:="\[\]]/,c:[i]},i]},{cN:"attribute",b:s+":",e:":",rB:!0,rE:!0,r:0}])}});hljs.registerLanguage("haxe",function(e){var r="([*]|[a-zA-Z_$][a-zA-Z0-9_$]*)";return{aliases:["hx"],k:{keyword:"break callback case cast catch class continue default do dynamic else enum extends extern for function here if implements import in inline interface never new override package private public return static super switch this throw trace try typedef untyped using var while",literal:"true false null"},c:[e.ASM,e.QSM,e.CLCM,e.CBCM,e.CNM,{cN:"class",bK:"class interface",e:"{",eE:!0,c:[{bK:"extends implements"},e.TM]},{cN:"preprocessor",b:"#",e:"$",k:"if else elseif end error"},{cN:"function",bK:"function",e:"[{;]",eE:!0,i:"\\S",c:[e.TM,{cN:"params",b:"\\(",e:"\\)",c:[e.ASM,e.QSM,e.CLCM,e.CBCM]},{cN:"type",b:":",e:r,r:10}]}]}});hljs.registerLanguage("monkey",function(e){var n={cN:"number",r:0,v:[{b:"[$][a-fA-F0-9]+"},e.NM]};return{cI:!0,k:{keyword:"public private property continue exit extern new try catch eachin not abstract final select case default const local global field end if then else elseif endif while wend repeat until forever for to step next return module inline throw",built_in:"DebugLog DebugStop Error Print ACos ACosr ASin ASinr ATan ATan2 ATan2r ATanr Abs Abs Ceil Clamp Clamp Cos Cosr Exp Floor Log Max Max Min Min Pow Sgn Sgn Sin Sinr Sqrt Tan Tanr Seed PI HALFPI TWOPI",literal:"true false null and or shl shr mod"},c:[e.C("#rem","#end"),e.C("'","$",{r:0}),{cN:"function",bK:"function method",e:"[(=:]|$",i:/\n/,c:[e.UTM]},{cN:"class",bK:"class interface",e:"$",c:[{bK:"extends implements"},e.UTM]},{cN:"variable",b:"\\b(self|super)\\b"},{cN:"preprocessor",bK:"import",e:"$"},{cN:"preprocessor",b:"\\s*#",e:"$",k:"if else elseif endif end then"},{cN:"pi",b:"^\\s*strict\\b"},{bK:"alias",e:"=",c:[e.UTM]},e.QSM,n]}});hljs.registerLanguage("bash",function(e){var t={cN:"variable",v:[{b:/\$[\w\d#@][\w\d_]*/},{b:/\$\{(.*?)}/}]},s={cN:"string",b:/"/,e:/"/,c:[e.BE,t,{cN:"variable",b:/\$\(/,e:/\)/,c:[e.BE]}]},a={cN:"string",b:/'/,e:/'/};return{aliases:["sh","zsh"],l:/-?[a-z\.]+/,k:{keyword:"if then else elif fi for while in do done case esac function",literal:"true false",built_in:"break cd continue eval exec exit export getopts hash pwd readonly return shift test times trap umask unset alias bind builtin caller command declare echo enable help let local logout mapfile printf read readarray source type typeset ulimit unalias set shopt autoload bg bindkey bye cap chdir clone comparguments compcall compctl compdescribe compfiles compgroups compquote comptags comptry compvalues dirs disable disown echotc echoti emulate fc fg float functions getcap getln history integer jobs kill limit log noglob popd print pushd pushln rehash sched setcap setopt stat suspend ttyctl unfunction unhash unlimit unsetopt vared wait whence where which zcompile zformat zftp zle zmodload zparseopts zprof zpty zregexparse zsocket zstyle ztcp",operator:"-ne -eq -lt -gt -f -d -e -s -l -a"},c:[{cN:"shebang",b:/^#![^\n]+sh\s*$/,r:10},{cN:"function",b:/\w[\w\d_]*\s*\(\s*\)\s*\{/,rB:!0,c:[e.inherit(e.TM,{b:/\w[\w\d_]*/})],r:0},e.HCM,e.NM,s,a,t]}});hljs.registerLanguage("erlang",function(e){var r="[a-z'][a-zA-Z0-9_']*",c="("+r+":"+r+"|"+r+")",a={keyword:"after and andalso|10 band begin bnot bor bsl bzr bxor case catch cond div end fun if let not of orelse|10 query receive rem try when xor",literal:"false true"},n=e.C("%","$"),i={cN:"number",b:"\\b(\\d+#[a-fA-F0-9]+|\\d+(\\.\\d+)?([eE][-+]?\\d+)?)",r:0},b={b:"fun\\s+"+r+"/\\d+"},d={b:c+"\\(",e:"\\)",rB:!0,r:0,c:[{cN:"function_name",b:c,r:0},{b:"\\(",e:"\\)",eW:!0,rE:!0,r:0}]},o={cN:"tuple",b:"{",e:"}",r:0},t={cN:"variable",b:"\\b_([A-Z][A-Za-z0-9_]*)?",r:0},l={cN:"variable",b:"[A-Z][a-zA-Z0-9_]*",r:0},f={b:"#"+e.UIR,r:0,rB:!0,c:[{cN:"record_name",b:"#"+e.UIR,r:0},{b:"{",e:"}",r:0}]},s={bK:"fun receive if try case",e:"end",k:a};s.c=[n,b,e.inherit(e.ASM,{cN:""}),s,d,e.QSM,i,o,t,l,f];var u=[n,b,s,d,e.QSM,i,o,t,l,f];d.c[1].c=u,o.c=u,f.c[1].c=u;var v={cN:"params",b:"\\(",e:"\\)",c:u};return{aliases:["erl"],k:a,i:"(</|\\*=|\\+=|-=|/\\*|\\*/|\\(\\*|\\*\\))",c:[{cN:"function",b:"^"+r+"\\s*\\(",e:"->",rB:!0,i:"\\(|#|//|/\\*|\\\\|:|;",c:[v,e.inherit(e.TM,{b:r})],starts:{e:";|\\.",k:a,c:u}},n,{cN:"pp",b:"^-",e:"\\.",r:0,eE:!0,rB:!0,l:"-"+e.IR,k:"-module -record -undef -export -ifdef -ifndef -author -copyright -doc -vsn -import -include -include_lib -compile -define -else -endif -file -behaviour -behavior -spec",c:[v]},i,e.QSM,f,t,l,o,{b:/\.$/}]}});hljs.registerLanguage("kotlin",function(e){var a="val var get set class trait object public open private protected final enum if else do while for when break continue throw try catch finally import package is as in return fun override default companion reified inline volatile transient native";return{k:{typename:"Byte Short Char Int Long Boolean Float Double Void Unit Nothing",literal:"true false null",keyword:a},c:[e.CLCM,{cN:"javadoc",b:"/\\*\\*",e:"\\*//*",r:0,c:[{cN:"javadoctag",b:"(^|\\s)@[A-Za-z]+"}]},e.CBCM,{cN:"type",b:/</,e:/>/,rB:!0,eE:!1,r:0},{cN:"function",bK:"fun",e:"[(]|$",rB:!0,eE:!0,k:a,i:/fun\s+(<.*>)?[^\s\(]+(\s+[^\s\(]+)\s*=/,r:5,c:[{b:e.UIR+"\\s*\\(",rB:!0,r:0,c:[e.UTM]},{cN:"type",b:/</,e:/>/,k:"reified",r:0},{cN:"params",b:/\(/,e:/\)/,k:a,r:0,i:/\([^\(,\s:]+,/,c:[{cN:"typename",b:/:\s*/,e:/\s*[=\)]/,eB:!0,rE:!0,r:0}]},e.CLCM,e.CBCM]},{cN:"class",bK:"class trait",e:/[:\{(]|$/,eE:!0,i:"extends implements",c:[e.UTM,{cN:"type",b:/</,e:/>/,eB:!0,eE:!0,r:0},{cN:"typename",b:/[,:]\s*/,e:/[<\(,]|$/,eB:!0,rE:!0}]},{cN:"variable",bK:"var val",e:/\s*[=:$]/,eE:!0},e.QSM,{cN:"shebang",b:"^#!/usr/bin/env",e:"$",i:"\n"},e.CNM]}});hljs.registerLanguage("stylus",function(t){var e={cN:"variable",b:"\\$"+t.IR},o={cN:"hexcolor",b:"#([a-fA-F0-9]{6}|[a-fA-F0-9]{3})",r:10},i=["charset","css","debug","extend","font-face","for","import","include","media","mixin","page","warn","while"],r=["after","before","first-letter","first-line","active","first-child","focus","hover","lang","link","visited"],n=["a","abbr","address","article","aside","audio","b","blockquote","body","button","canvas","caption","cite","code","dd","del","details","dfn","div","dl","dt","em","fieldset","figcaption","figure","footer","form","h1","h2","h3","h4","h5","h6","header","hgroup","html","i","iframe","img","input","ins","kbd","label","legend","li","mark","menu","nav","object","ol","p","q","quote","samp","section","span","strong","summary","sup","table","tbody","td","textarea","tfoot","th","thead","time","tr","ul","var","video"],a="[\\.\\s\\n\\[\\:,]",l=["align-content","align-items","align-self","animation","animation-delay","animation-direction","animation-duration","animation-fill-mode","animation-iteration-count","animation-name","animation-play-state","animation-timing-function","auto","backface-visibility","background","background-attachment","background-clip","background-color","background-image","background-origin","background-position","background-repeat","background-size","border","border-bottom","border-bottom-color","border-bottom-left-radius","border-bottom-right-radius","border-bottom-style","border-bottom-width","border-collapse","border-color","border-image","border-image-outset","border-image-repeat","border-image-slice","border-image-source","border-image-width","border-left","border-left-color","border-left-style","border-left-width","border-radius","border-right","border-right-color","border-right-style","border-right-width","border-spacing","border-style","border-top","border-top-color","border-top-left-radius","border-top-right-radius","border-top-style","border-top-width","border-width","bottom","box-decoration-break","box-shadow","box-sizing","break-after","break-before","break-inside","caption-side","clear","clip","clip-path","color","column-count","column-fill","column-gap","column-rule","column-rule-color","column-rule-style","column-rule-width","column-span","column-width","columns","content","counter-increment","counter-reset","cursor","direction","display","empty-cells","filter","flex","flex-basis","flex-direction","flex-flow","flex-grow","flex-shrink","flex-wrap","float","font","font-family","font-feature-settings","font-kerning","font-language-override","font-size","font-size-adjust","font-stretch","font-style","font-variant","font-variant-ligatures","font-weight","height","hyphens","icon","image-orientation","image-rendering","image-resolution","ime-mode","inherit","initial","justify-content","left","letter-spacing","line-height","list-style","list-style-image","list-style-position","list-style-type","margin","margin-bottom","margin-left","margin-right","margin-top","marks","mask","max-height","max-width","min-height","min-width","nav-down","nav-index","nav-left","nav-right","nav-up","none","normal","object-fit","object-position","opacity","order","orphans","outline","outline-color","outline-offset","outline-style","outline-width","overflow","overflow-wrap","overflow-x","overflow-y","padding","padding-bottom","padding-left","padding-right","padding-top","page-break-after","page-break-before","page-break-inside","perspective","perspective-origin","pointer-events","position","quotes","resize","right","tab-size","table-layout","text-align","text-align-last","text-decoration","text-decoration-color","text-decoration-line","text-decoration-style","text-indent","text-overflow","text-rendering","text-shadow","text-transform","text-underline-position","top","transform","transform-origin","transform-style","transition","transition-delay","transition-duration","transition-property","transition-timing-function","unicode-bidi","vertical-align","visibility","white-space","widows","width","word-break","word-spacing","word-wrap","z-index"],d=["\\{","\\}","\\?","(\\bReturn\\b)","(\\bEnd\\b)","(\\bend\\b)",";","#\\s","\\*\\s","===\\s","\\|","%"];return{aliases:["styl"],cI:!1,i:"("+d.join("|")+")",k:"if else for in",c:[t.QSM,t.ASM,t.CLCM,t.CBCM,o,{b:"\\.[a-zA-Z][a-zA-Z0-9_-]*"+a,rB:!0,c:[{cN:"class",b:"\\.[a-zA-Z][a-zA-Z0-9_-]*"}]},{b:"\\#[a-zA-Z][a-zA-Z0-9_-]*"+a,rB:!0,c:[{cN:"id",b:"\\#[a-zA-Z][a-zA-Z0-9_-]*"}]},{b:"\\b("+n.join("|")+")"+a,rB:!0,c:[{cN:"tag",b:"\\b[a-zA-Z][a-zA-Z0-9_-]*"}]},{cN:"pseudo",b:"&?:?:\\b("+r.join("|")+")"+a},{cN:"at_rule",b:"@("+i.join("|")+")\\b"},e,t.CSSNM,t.NM,{cN:"function",b:"\\b[a-zA-Z][a-zA-Z0-9_-]*\\(.*\\)",i:"[\\n]",rB:!0,c:[{cN:"title",b:"\\b[a-zA-Z][a-zA-Z0-9_-]*"},{cN:"params",b:/\(/,e:/\)/,c:[o,e,t.ASM,t.CSSNM,t.NM,t.QSM]}]},{cN:"attribute",b:"\\b("+l.reverse().join("|")+")\\b"}]}});hljs.registerLanguage("css",function(e){var c="[a-zA-Z-][a-zA-Z0-9_-]*",a={cN:"function",b:c+"\\(",rB:!0,eE:!0,e:"\\("},r={cN:"rule",b:/[A-Z\_\.\-]+\s*:/,rB:!0,e:";",eW:!0,c:[{cN:"attribute",b:/\S/,e:":",eE:!0,starts:{cN:"value",eW:!0,eE:!0,c:[a,e.CSSNM,e.QSM,e.ASM,e.CBCM,{cN:"hexcolor",b:"#[0-9A-Fa-f]+"},{cN:"important",b:"!important"}]}}]};return{cI:!0,i:/[=\/|']/,c:[e.CBCM,r,{cN:"id",b:/\#[A-Za-z0-9_-]+/},{cN:"class",b:/\.[A-Za-z0-9_-]+/,r:0},{cN:"attr_selector",b:/\[/,e:/\]/,i:"$"},{cN:"pseudo",b:/:(:)?[a-zA-Z0-9\_\-\+\(\)"']+/},{cN:"at_rule",b:"@(font-face|page)",l:"[a-z-]+",k:"font-face page"},{cN:"at_rule",b:"@",e:"[{;]",c:[{cN:"keyword",b:/\S+/},{b:/\s/,eW:!0,eE:!0,r:0,c:[a,e.ASM,e.QSM,e.CSSNM]}]},{cN:"tag",b:c,r:0},{cN:"rules",b:"{",e:"}",i:/\S/,r:0,c:[e.CBCM,r]}]}});hljs.registerLanguage("puppet",function(e){var s="augeas computer cron exec file filebucket host interface k5login macauthorization mailalias maillist mcx mount nagios_command nagios_contact nagios_contactgroup nagios_host nagios_hostdependency nagios_hostescalation nagios_hostextinfo nagios_hostgroup nagios_service firewall nagios_servicedependency nagios_serviceescalation nagios_serviceextinfo nagios_servicegroup nagios_timeperiod notify package resources router schedule scheduled_task selboolean selmodule service ssh_authorized_key sshkey stage tidy user vlan yumrepo zfs zone zpool",r="alias audit before loglevel noop require subscribe tag owner ensure group mode name|0 changes context force incl lens load_path onlyif provider returns root show_diff type_check en_address ip_address realname command environment hour monute month monthday special target weekday creates cwd ogoutput refresh refreshonly tries try_sleep umask backup checksum content ctime force ignore links mtime purge recurse recurselimit replace selinux_ignore_defaults selrange selrole seltype seluser source souirce_permissions sourceselect validate_cmd validate_replacement allowdupe attribute_membership auth_membership forcelocal gid ia_load_module members system host_aliases ip allowed_trunk_vlans description device_url duplex encapsulation etherchannel native_vlan speed principals allow_root auth_class auth_type authenticate_user k_of_n mechanisms rule session_owner shared options device fstype enable hasrestart directory present absent link atboot blockdevice device dump pass remounts poller_tag use message withpath adminfile allow_virtual allowcdrom category configfiles flavor install_options instance package_settings platform responsefile status uninstall_options vendor unless_system_user unless_uid binary control flags hasstatus manifest pattern restart running start stop allowdupe auths expiry gid groups home iterations key_membership keys managehome membership password password_max_age password_min_age profile_membership profiles project purge_ssh_keys role_membership roles salt shell uid baseurl cost descr enabled enablegroups exclude failovermethod gpgcheck gpgkey http_caching include includepkgs keepalive metadata_expire metalink mirrorlist priority protect proxy proxy_password proxy_username repo_gpgcheck s3_enabled skip_if_unavailable sslcacert sslclientcert sslclientkey sslverify mounted",a={keyword:"and case class default define else elsif false if in import enherits node or true undef unless main settings $string "+s,literal:r,built_in:"architecture augeasversion blockdevices boardmanufacturer boardproductname boardserialnumber cfkey dhcp_servers domain ec2_ ec2_userdata facterversion filesystems ldom fqdn gid hardwareisa hardwaremodel hostname id|0 interfaces ipaddress ipaddress_ ipaddress6 ipaddress6_ iphostnumber is_virtual kernel kernelmajversion kernelrelease kernelversion kernelrelease kernelversion lsbdistcodename lsbdistdescription lsbdistid lsbdistrelease lsbmajdistrelease lsbminordistrelease lsbrelease macaddress macaddress_ macosx_buildversion macosx_productname macosx_productversion macosx_productverson_major macosx_productversion_minor manufacturer memoryfree memorysize netmask metmask_ network_ operatingsystem operatingsystemmajrelease operatingsystemrelease osfamily partitions path physicalprocessorcount processor processorcount productname ps puppetversion rubysitedir rubyversion selinux selinux_config_mode selinux_config_policy selinux_current_mode selinux_current_mode selinux_enforced selinux_policyversion serialnumber sp_ sshdsakey sshecdsakey sshrsakey swapencrypted swapfree swapsize timezone type uniqueid uptime uptime_days uptime_hours uptime_seconds uuid virtual vlans xendomains zfs_version zonenae zones zpool_version"},i=e.C("#","$"),o={cN:"string",c:[e.BE],v:[{b:/'/,e:/'/},{b:/"/,e:/"/}]},n=[o,i,{cN:"keyword",bK:"class",e:"$|;",i:/=/,c:[e.inherit(e.TM,{b:"(::)?[A-Za-z_]\\w*(::\\w+)*"}),i,o]},{cN:"keyword",b:"([a-zA-Z_(::)]+ *\\{)",c:[o,i],r:0},{cN:"keyword",b:"(\\}|\\{)",r:0},{cN:"function",b:"[a-zA-Z_]+\\s*=>"},{cN:"constant",b:"(::)?(\\b[A-Z][a-z_]*(::)?)+",r:0},{cN:"number",b:"(\\b0[0-7_]+)|(\\b0x[0-9a-fA-F_]+)|(\\b[1-9][0-9_]*(\\.[0-9_]+)?)|[0_]\\b",r:0}];return{aliases:["pp"],k:a,c:n}});hljs.registerLanguage("nimrod",function(t){return{aliases:["nim"],k:{keyword:"addr and as asm bind block break|0 case|0 cast const|0 continue|0 converter discard distinct|10 div do elif else|0 end|0 enum|0 except export finally for from generic if|0 import|0 in include|0 interface is isnot|10 iterator|10 let|0 macro method|10 mixin mod nil not notin|10 object|0 of or out proc|10 ptr raise ref|10 return shl shr static template|10 try|0 tuple type|0 using|0 var|0 when while|0 with without xor yield",literal:"shared guarded stdin stdout stderr result|10 true false"},c:[{cN:"decorator",b:/{\./,e:/\.}/,r:10},{cN:"string",b:/[a-zA-Z]\w*"/,e:/"/,c:[{b:/""/}]},{cN:"string",b:/([a-zA-Z]\w*)?"""/,e:/"""/},t.QSM,{cN:"type",b:/\b[A-Z]\w+\b/,r:0},{cN:"type",b:/\b(int|int8|int16|int32|int64|uint|uint8|uint16|uint32|uint64|float|float32|float64|bool|char|string|cstring|pointer|expr|stmt|void|auto|any|range|array|openarray|varargs|seq|set|clong|culong|cchar|cschar|cshort|cint|csize|clonglong|cfloat|cdouble|clongdouble|cuchar|cushort|cuint|culonglong|cstringarray|semistatic)\b/},{cN:"number",b:/\b(0[xX][0-9a-fA-F][_0-9a-fA-F]*)('?[iIuU](8|16|32|64))?/,r:0},{cN:"number",b:/\b(0o[0-7][_0-7]*)('?[iIuUfF](8|16|32|64))?/,r:0},{cN:"number",b:/\b(0(b|B)[01][_01]*)('?[iIuUfF](8|16|32|64))?/,r:0},{cN:"number",b:/\b(\d[_\d]*)('?[iIuUfF](8|16|32|64))?/,r:0},t.HCM]}});hljs.registerLanguage("smalltalk",function(a){var r="[a-z][a-zA-Z0-9_]*",s={cN:"char",b:"\\$.{1}"},c={cN:"symbol",b:"#"+a.UIR};return{aliases:["st"],k:"self super nil true false thisContext",c:[a.C('"','"'),a.ASM,{cN:"class",b:"\\b[A-Z][A-Za-z0-9_]*",r:0},{cN:"method",b:r+":",r:0},a.CNM,c,s,{cN:"localvars",b:"\\|[ ]*"+r+"([ ]+"+r+")*[ ]*\\|",rB:!0,e:/\|/,i:/\S/,c:[{b:"(\\|[ ]*)?"+r}]},{cN:"array",b:"\\#\\(",e:"\\)",c:[a.ASM,s,a.CNM,c]}]}});hljs.registerLanguage("x86asm",function(s){return{cI:!0,l:"\\.?"+s.IR,k:{keyword:"lock rep repe repz repne repnz xaquire xrelease bnd nobnd aaa aad aam aas adc add and arpl bb0_reset bb1_reset bound bsf bsr bswap bt btc btr bts call cbw cdq cdqe clc cld cli clts cmc cmp cmpsb cmpsd cmpsq cmpsw cmpxchg cmpxchg486 cmpxchg8b cmpxchg16b cpuid cpu_read cpu_write cqo cwd cwde daa das dec div dmint emms enter equ f2xm1 fabs fadd faddp fbld fbstp fchs fclex fcmovb fcmovbe fcmove fcmovnb fcmovnbe fcmovne fcmovnu fcmovu fcom fcomi fcomip fcomp fcompp fcos fdecstp fdisi fdiv fdivp fdivr fdivrp femms feni ffree ffreep fiadd ficom ficomp fidiv fidivr fild fimul fincstp finit fist fistp fisttp fisub fisubr fld fld1 fldcw fldenv fldl2e fldl2t fldlg2 fldln2 fldpi fldz fmul fmulp fnclex fndisi fneni fninit fnop fnsave fnstcw fnstenv fnstsw fpatan fprem fprem1 fptan frndint frstor fsave fscale fsetpm fsin fsincos fsqrt fst fstcw fstenv fstp fstsw fsub fsubp fsubr fsubrp ftst fucom fucomi fucomip fucomp fucompp fxam fxch fxtract fyl2x fyl2xp1 hlt ibts icebp idiv imul in inc incbin insb insd insw int int01 int1 int03 int3 into invd invpcid invlpg invlpga iret iretd iretq iretw jcxz jecxz jrcxz jmp jmpe lahf lar lds lea leave les lfence lfs lgdt lgs lidt lldt lmsw loadall loadall286 lodsb lodsd lodsq lodsw loop loope loopne loopnz loopz lsl lss ltr mfence monitor mov movd movq movsb movsd movsq movsw movsx movsxd movzx mul mwait neg nop not or out outsb outsd outsw packssdw packsswb packuswb paddb paddd paddsb paddsiw paddsw paddusb paddusw paddw pand pandn pause paveb pavgusb pcmpeqb pcmpeqd pcmpeqw pcmpgtb pcmpgtd pcmpgtw pdistib pf2id pfacc pfadd pfcmpeq pfcmpge pfcmpgt pfmax pfmin pfmul pfrcp pfrcpit1 pfrcpit2 pfrsqit1 pfrsqrt pfsub pfsubr pi2fd pmachriw pmaddwd pmagw pmulhriw pmulhrwa pmulhrwc pmulhw pmullw pmvgezb pmvlzb pmvnzb pmvzb pop popa popad popaw popf popfd popfq popfw por prefetch prefetchw pslld psllq psllw psrad psraw psrld psrlq psrlw psubb psubd psubsb psubsiw psubsw psubusb psubusw psubw punpckhbw punpckhdq punpckhwd punpcklbw punpckldq punpcklwd push pusha pushad pushaw pushf pushfd pushfq pushfw pxor rcl rcr rdshr rdmsr rdpmc rdtsc rdtscp ret retf retn rol ror rdm rsdc rsldt rsm rsts sahf sal salc sar sbb scasb scasd scasq scasw sfence sgdt shl shld shr shrd sidt sldt skinit smi smint smintold smsw stc std sti stosb stosd stosq stosw str sub svdc svldt svts swapgs syscall sysenter sysexit sysret test ud0 ud1 ud2b ud2 ud2a umov verr verw fwait wbinvd wrshr wrmsr xadd xbts xchg xlatb xlat xor cmove cmovz cmovne cmovnz cmova cmovnbe cmovae cmovnb cmovb cmovnae cmovbe cmovna cmovg cmovnle cmovge cmovnl cmovl cmovnge cmovle cmovng cmovc cmovnc cmovo cmovno cmovs cmovns cmovp cmovpe cmovnp cmovpo je jz jne jnz ja jnbe jae jnb jb jnae jbe jna jg jnle jge jnl jl jnge jle jng jc jnc jo jno js jns jpo jnp jpe jp sete setz setne setnz seta setnbe setae setnb setnc setb setnae setcset setbe setna setg setnle setge setnl setl setnge setle setng sets setns seto setno setpe setp setpo setnp addps addss andnps andps cmpeqps cmpeqss cmpleps cmpless cmpltps cmpltss cmpneqps cmpneqss cmpnleps cmpnless cmpnltps cmpnltss cmpordps cmpordss cmpunordps cmpunordss cmpps cmpss comiss cvtpi2ps cvtps2pi cvtsi2ss cvtss2si cvttps2pi cvttss2si divps divss ldmxcsr maxps maxss minps minss movaps movhps movlhps movlps movhlps movmskps movntps movss movups mulps mulss orps rcpps rcpss rsqrtps rsqrtss shufps sqrtps sqrtss stmxcsr subps subss ucomiss unpckhps unpcklps xorps fxrstor fxrstor64 fxsave fxsave64 xgetbv xsetbv xsave xsave64 xsaveopt xsaveopt64 xrstor xrstor64 prefetchnta prefetcht0 prefetcht1 prefetcht2 maskmovq movntq pavgb pavgw pextrw pinsrw pmaxsw pmaxub pminsw pminub pmovmskb pmulhuw psadbw pshufw pf2iw pfnacc pfpnacc pi2fw pswapd maskmovdqu clflush movntdq movnti movntpd movdqa movdqu movdq2q movq2dq paddq pmuludq pshufd pshufhw pshuflw pslldq psrldq psubq punpckhqdq punpcklqdq addpd addsd andnpd andpd cmpeqpd cmpeqsd cmplepd cmplesd cmpltpd cmpltsd cmpneqpd cmpneqsd cmpnlepd cmpnlesd cmpnltpd cmpnltsd cmpordpd cmpordsd cmpunordpd cmpunordsd cmppd comisd cvtdq2pd cvtdq2ps cvtpd2dq cvtpd2pi cvtpd2ps cvtpi2pd cvtps2dq cvtps2pd cvtsd2si cvtsd2ss cvtsi2sd cvtss2sd cvttpd2pi cvttpd2dq cvttps2dq cvttsd2si divpd divsd maxpd maxsd minpd minsd movapd movhpd movlpd movmskpd movupd mulpd mulsd orpd shufpd sqrtpd sqrtsd subpd subsd ucomisd unpckhpd unpcklpd xorpd addsubpd addsubps haddpd haddps hsubpd hsubps lddqu movddup movshdup movsldup clgi stgi vmcall vmclear vmfunc vmlaunch vmload vmmcall vmptrld vmptrst vmread vmresume vmrun vmsave vmwrite vmxoff vmxon invept invvpid pabsb pabsw pabsd palignr phaddw phaddd phaddsw phsubw phsubd phsubsw pmaddubsw pmulhrsw pshufb psignb psignw psignd extrq insertq movntsd movntss lzcnt blendpd blendps blendvpd blendvps dppd dpps extractps insertps movntdqa mpsadbw packusdw pblendvb pblendw pcmpeqq pextrb pextrd pextrq phminposuw pinsrb pinsrd pinsrq pmaxsb pmaxsd pmaxud pmaxuw pminsb pminsd pminud pminuw pmovsxbw pmovsxbd pmovsxbq pmovsxwd pmovsxwq pmovsxdq pmovzxbw pmovzxbd pmovzxbq pmovzxwd pmovzxwq pmovzxdq pmuldq pmulld ptest roundpd roundps roundsd roundss crc32 pcmpestri pcmpestrm pcmpistri pcmpistrm pcmpgtq popcnt getsec pfrcpv pfrsqrtv movbe aesenc aesenclast aesdec aesdeclast aesimc aeskeygenassist vaesenc vaesenclast vaesdec vaesdeclast vaesimc vaeskeygenassist vaddpd vaddps vaddsd vaddss vaddsubpd vaddsubps vandpd vandps vandnpd vandnps vblendpd vblendps vblendvpd vblendvps vbroadcastss vbroadcastsd vbroadcastf128 vcmpeq_ospd vcmpeqpd vcmplt_ospd vcmpltpd vcmple_ospd vcmplepd vcmpunord_qpd vcmpunordpd vcmpneq_uqpd vcmpneqpd vcmpnlt_uspd vcmpnltpd vcmpnle_uspd vcmpnlepd vcmpord_qpd vcmpordpd vcmpeq_uqpd vcmpnge_uspd vcmpngepd vcmpngt_uspd vcmpngtpd vcmpfalse_oqpd vcmpfalsepd vcmpneq_oqpd vcmpge_ospd vcmpgepd vcmpgt_ospd vcmpgtpd vcmptrue_uqpd vcmptruepd vcmplt_oqpd vcmple_oqpd vcmpunord_spd vcmpneq_uspd vcmpnlt_uqpd vcmpnle_uqpd vcmpord_spd vcmpeq_uspd vcmpnge_uqpd vcmpngt_uqpd vcmpfalse_ospd vcmpneq_ospd vcmpge_oqpd vcmpgt_oqpd vcmptrue_uspd vcmppd vcmpeq_osps vcmpeqps vcmplt_osps vcmpltps vcmple_osps vcmpleps vcmpunord_qps vcmpunordps vcmpneq_uqps vcmpneqps vcmpnlt_usps vcmpnltps vcmpnle_usps vcmpnleps vcmpord_qps vcmpordps vcmpeq_uqps vcmpnge_usps vcmpngeps vcmpngt_usps vcmpngtps vcmpfalse_oqps vcmpfalseps vcmpneq_oqps vcmpge_osps vcmpgeps vcmpgt_osps vcmpgtps vcmptrue_uqps vcmptrueps vcmplt_oqps vcmple_oqps vcmpunord_sps vcmpneq_usps vcmpnlt_uqps vcmpnle_uqps vcmpord_sps vcmpeq_usps vcmpnge_uqps vcmpngt_uqps vcmpfalse_osps vcmpneq_osps vcmpge_oqps vcmpgt_oqps vcmptrue_usps vcmpps vcmpeq_ossd vcmpeqsd vcmplt_ossd vcmpltsd vcmple_ossd vcmplesd vcmpunord_qsd vcmpunordsd vcmpneq_uqsd vcmpneqsd vcmpnlt_ussd vcmpnltsd vcmpnle_ussd vcmpnlesd vcmpord_qsd vcmpordsd vcmpeq_uqsd vcmpnge_ussd vcmpngesd vcmpngt_ussd vcmpngtsd vcmpfalse_oqsd vcmpfalsesd vcmpneq_oqsd vcmpge_ossd vcmpgesd vcmpgt_ossd vcmpgtsd vcmptrue_uqsd vcmptruesd vcmplt_oqsd vcmple_oqsd vcmpunord_ssd vcmpneq_ussd vcmpnlt_uqsd vcmpnle_uqsd vcmpord_ssd vcmpeq_ussd vcmpnge_uqsd vcmpngt_uqsd vcmpfalse_ossd vcmpneq_ossd vcmpge_oqsd vcmpgt_oqsd vcmptrue_ussd vcmpsd vcmpeq_osss vcmpeqss vcmplt_osss vcmpltss vcmple_osss vcmpless vcmpunord_qss vcmpunordss vcmpneq_uqss vcmpneqss vcmpnlt_usss vcmpnltss vcmpnle_usss vcmpnless vcmpord_qss vcmpordss vcmpeq_uqss vcmpnge_usss vcmpngess vcmpngt_usss vcmpngtss vcmpfalse_oqss vcmpfalsess vcmpneq_oqss vcmpge_osss vcmpgess vcmpgt_osss vcmpgtss vcmptrue_uqss vcmptruess vcmplt_oqss vcmple_oqss vcmpunord_sss vcmpneq_usss vcmpnlt_uqss vcmpnle_uqss vcmpord_sss vcmpeq_usss vcmpnge_uqss vcmpngt_uqss vcmpfalse_osss vcmpneq_osss vcmpge_oqss vcmpgt_oqss vcmptrue_usss vcmpss vcomisd vcomiss vcvtdq2pd vcvtdq2ps vcvtpd2dq vcvtpd2ps vcvtps2dq vcvtps2pd vcvtsd2si vcvtsd2ss vcvtsi2sd vcvtsi2ss vcvtss2sd vcvtss2si vcvttpd2dq vcvttps2dq vcvttsd2si vcvttss2si vdivpd vdivps vdivsd vdivss vdppd vdpps vextractf128 vextractps vhaddpd vhaddps vhsubpd vhsubps vinsertf128 vinsertps vlddqu vldqqu vldmxcsr vmaskmovdqu vmaskmovps vmaskmovpd vmaxpd vmaxps vmaxsd vmaxss vminpd vminps vminsd vminss vmovapd vmovaps vmovd vmovq vmovddup vmovdqa vmovqqa vmovdqu vmovqqu vmovhlps vmovhpd vmovhps vmovlhps vmovlpd vmovlps vmovmskpd vmovmskps vmovntdq vmovntqq vmovntdqa vmovntpd vmovntps vmovsd vmovshdup vmovsldup vmovss vmovupd vmovups vmpsadbw vmulpd vmulps vmulsd vmulss vorpd vorps vpabsb vpabsw vpabsd vpacksswb vpackssdw vpackuswb vpackusdw vpaddb vpaddw vpaddd vpaddq vpaddsb vpaddsw vpaddusb vpaddusw vpalignr vpand vpandn vpavgb vpavgw vpblendvb vpblendw vpcmpestri vpcmpestrm vpcmpistri vpcmpistrm vpcmpeqb vpcmpeqw vpcmpeqd vpcmpeqq vpcmpgtb vpcmpgtw vpcmpgtd vpcmpgtq vpermilpd vpermilps vperm2f128 vpextrb vpextrw vpextrd vpextrq vphaddw vphaddd vphaddsw vphminposuw vphsubw vphsubd vphsubsw vpinsrb vpinsrw vpinsrd vpinsrq vpmaddwd vpmaddubsw vpmaxsb vpmaxsw vpmaxsd vpmaxub vpmaxuw vpmaxud vpminsb vpminsw vpminsd vpminub vpminuw vpminud vpmovmskb vpmovsxbw vpmovsxbd vpmovsxbq vpmovsxwd vpmovsxwq vpmovsxdq vpmovzxbw vpmovzxbd vpmovzxbq vpmovzxwd vpmovzxwq vpmovzxdq vpmulhuw vpmulhrsw vpmulhw vpmullw vpmulld vpmuludq vpmuldq vpor vpsadbw vpshufb vpshufd vpshufhw vpshuflw vpsignb vpsignw vpsignd vpslldq vpsrldq vpsllw vpslld vpsllq vpsraw vpsrad vpsrlw vpsrld vpsrlq vptest vpsubb vpsubw vpsubd vpsubq vpsubsb vpsubsw vpsubusb vpsubusw vpunpckhbw vpunpckhwd vpunpckhdq vpunpckhqdq vpunpcklbw vpunpcklwd vpunpckldq vpunpcklqdq vpxor vrcpps vrcpss vrsqrtps vrsqrtss vroundpd vroundps vroundsd vroundss vshufpd vshufps vsqrtpd vsqrtps vsqrtsd vsqrtss vstmxcsr vsubpd vsubps vsubsd vsubss vtestps vtestpd vucomisd vucomiss vunpckhpd vunpckhps vunpcklpd vunpcklps vxorpd vxorps vzeroall vzeroupper pclmullqlqdq pclmulhqlqdq pclmullqhqdq pclmulhqhqdq pclmulqdq vpclmullqlqdq vpclmulhqlqdq vpclmullqhqdq vpclmulhqhqdq vpclmulqdq vfmadd132ps vfmadd132pd vfmadd312ps vfmadd312pd vfmadd213ps vfmadd213pd vfmadd123ps vfmadd123pd vfmadd231ps vfmadd231pd vfmadd321ps vfmadd321pd vfmaddsub132ps vfmaddsub132pd vfmaddsub312ps vfmaddsub312pd vfmaddsub213ps vfmaddsub213pd vfmaddsub123ps vfmaddsub123pd vfmaddsub231ps vfmaddsub231pd vfmaddsub321ps vfmaddsub321pd vfmsub132ps vfmsub132pd vfmsub312ps vfmsub312pd vfmsub213ps vfmsub213pd vfmsub123ps vfmsub123pd vfmsub231ps vfmsub231pd vfmsub321ps vfmsub321pd vfmsubadd132ps vfmsubadd132pd vfmsubadd312ps vfmsubadd312pd vfmsubadd213ps vfmsubadd213pd vfmsubadd123ps vfmsubadd123pd vfmsubadd231ps vfmsubadd231pd vfmsubadd321ps vfmsubadd321pd vfnmadd132ps vfnmadd132pd vfnmadd312ps vfnmadd312pd vfnmadd213ps vfnmadd213pd vfnmadd123ps vfnmadd123pd vfnmadd231ps vfnmadd231pd vfnmadd321ps vfnmadd321pd vfnmsub132ps vfnmsub132pd vfnmsub312ps vfnmsub312pd vfnmsub213ps vfnmsub213pd vfnmsub123ps vfnmsub123pd vfnmsub231ps vfnmsub231pd vfnmsub321ps vfnmsub321pd vfmadd132ss vfmadd132sd vfmadd312ss vfmadd312sd vfmadd213ss vfmadd213sd vfmadd123ss vfmadd123sd vfmadd231ss vfmadd231sd vfmadd321ss vfmadd321sd vfmsub132ss vfmsub132sd vfmsub312ss vfmsub312sd vfmsub213ss vfmsub213sd vfmsub123ss vfmsub123sd vfmsub231ss vfmsub231sd vfmsub321ss vfmsub321sd vfnmadd132ss vfnmadd132sd vfnmadd312ss vfnmadd312sd vfnmadd213ss vfnmadd213sd vfnmadd123ss vfnmadd123sd vfnmadd231ss vfnmadd231sd vfnmadd321ss vfnmadd321sd vfnmsub132ss vfnmsub132sd vfnmsub312ss vfnmsub312sd vfnmsub213ss vfnmsub213sd vfnmsub123ss vfnmsub123sd vfnmsub231ss vfnmsub231sd vfnmsub321ss vfnmsub321sd rdfsbase rdgsbase rdrand wrfsbase wrgsbase vcvtph2ps vcvtps2ph adcx adox rdseed clac stac xstore xcryptecb xcryptcbc xcryptctr xcryptcfb xcryptofb montmul xsha1 xsha256 llwpcb slwpcb lwpval lwpins vfmaddpd vfmaddps vfmaddsd vfmaddss vfmaddsubpd vfmaddsubps vfmsubaddpd vfmsubaddps vfmsubpd vfmsubps vfmsubsd vfmsubss vfnmaddpd vfnmaddps vfnmaddsd vfnmaddss vfnmsubpd vfnmsubps vfnmsubsd vfnmsubss vfrczpd vfrczps vfrczsd vfrczss vpcmov vpcomb vpcomd vpcomq vpcomub vpcomud vpcomuq vpcomuw vpcomw vphaddbd vphaddbq vphaddbw vphadddq vphaddubd vphaddubq vphaddubw vphaddudq vphadduwd vphadduwq vphaddwd vphaddwq vphsubbw vphsubdq vphsubwd vpmacsdd vpmacsdqh vpmacsdql vpmacssdd vpmacssdqh vpmacssdql vpmacsswd vpmacssww vpmacswd vpmacsww vpmadcsswd vpmadcswd vpperm vprotb vprotd vprotq vprotw vpshab vpshad vpshaq vpshaw vpshlb vpshld vpshlq vpshlw vbroadcasti128 vpblendd vpbroadcastb vpbroadcastw vpbroadcastd vpbroadcastq vpermd vpermpd vpermps vpermq vperm2i128 vextracti128 vinserti128 vpmaskmovd vpmaskmovq vpsllvd vpsllvq vpsravd vpsrlvd vpsrlvq vgatherdpd vgatherqpd vgatherdps vgatherqps vpgatherdd vpgatherqd vpgatherdq vpgatherqq xabort xbegin xend xtest andn bextr blci blcic blsi blsic blcfill blsfill blcmsk blsmsk blsr blcs bzhi mulx pdep pext rorx sarx shlx shrx tzcnt tzmsk t1mskc valignd valignq vblendmpd vblendmps vbroadcastf32x4 vbroadcastf64x4 vbroadcasti32x4 vbroadcasti64x4 vcompresspd vcompressps vcvtpd2udq vcvtps2udq vcvtsd2usi vcvtss2usi vcvttpd2udq vcvttps2udq vcvttsd2usi vcvttss2usi vcvtudq2pd vcvtudq2ps vcvtusi2sd vcvtusi2ss vexpandpd vexpandps vextractf32x4 vextractf64x4 vextracti32x4 vextracti64x4 vfixupimmpd vfixupimmps vfixupimmsd vfixupimmss vgetexppd vgetexpps vgetexpsd vgetexpss vgetmantpd vgetmantps vgetmantsd vgetmantss vinsertf32x4 vinsertf64x4 vinserti32x4 vinserti64x4 vmovdqa32 vmovdqa64 vmovdqu32 vmovdqu64 vpabsq vpandd vpandnd vpandnq vpandq vpblendmd vpblendmq vpcmpltd vpcmpled vpcmpneqd vpcmpnltd vpcmpnled vpcmpd vpcmpltq vpcmpleq vpcmpneqq vpcmpnltq vpcmpnleq vpcmpq vpcmpequd vpcmpltud vpcmpleud vpcmpnequd vpcmpnltud vpcmpnleud vpcmpud vpcmpequq vpcmpltuq vpcmpleuq vpcmpnequq vpcmpnltuq vpcmpnleuq vpcmpuq vpcompressd vpcompressq vpermi2d vpermi2pd vpermi2ps vpermi2q vpermt2d vpermt2pd vpermt2ps vpermt2q vpexpandd vpexpandq vpmaxsq vpmaxuq vpminsq vpminuq vpmovdb vpmovdw vpmovqb vpmovqd vpmovqw vpmovsdb vpmovsdw vpmovsqb vpmovsqd vpmovsqw vpmovusdb vpmovusdw vpmovusqb vpmovusqd vpmovusqw vpord vporq vprold vprolq vprolvd vprolvq vprord vprorq vprorvd vprorvq vpscatterdd vpscatterdq vpscatterqd vpscatterqq vpsraq vpsravq vpternlogd vpternlogq vptestmd vptestmq vptestnmd vptestnmq vpxord vpxorq vrcp14pd vrcp14ps vrcp14sd vrcp14ss vrndscalepd vrndscaleps vrndscalesd vrndscaless vrsqrt14pd vrsqrt14ps vrsqrt14sd vrsqrt14ss vscalefpd vscalefps vscalefsd vscalefss vscatterdpd vscatterdps vscatterqpd vscatterqps vshuff32x4 vshuff64x2 vshufi32x4 vshufi64x2 kandnw kandw kmovw knotw kortestw korw kshiftlw kshiftrw kunpckbw kxnorw kxorw vpbroadcastmb2q vpbroadcastmw2d vpconflictd vpconflictq vplzcntd vplzcntq vexp2pd vexp2ps vrcp28pd vrcp28ps vrcp28sd vrcp28ss vrsqrt28pd vrsqrt28ps vrsqrt28sd vrsqrt28ss vgatherpf0dpd vgatherpf0dps vgatherpf0qpd vgatherpf0qps vgatherpf1dpd vgatherpf1dps vgatherpf1qpd vgatherpf1qps vscatterpf0dpd vscatterpf0dps vscatterpf0qpd vscatterpf0qps vscatterpf1dpd vscatterpf1dps vscatterpf1qpd vscatterpf1qps prefetchwt1 bndmk bndcl bndcu bndcn bndmov bndldx bndstx sha1rnds4 sha1nexte sha1msg1 sha1msg2 sha256rnds2 sha256msg1 sha256msg2 hint_nop0 hint_nop1 hint_nop2 hint_nop3 hint_nop4 hint_nop5 hint_nop6 hint_nop7 hint_nop8 hint_nop9 hint_nop10 hint_nop11 hint_nop12 hint_nop13 hint_nop14 hint_nop15 hint_nop16 hint_nop17 hint_nop18 hint_nop19 hint_nop20 hint_nop21 hint_nop22 hint_nop23 hint_nop24 hint_nop25 hint_nop26 hint_nop27 hint_nop28 hint_nop29 hint_nop30 hint_nop31 hint_nop32 hint_nop33 hint_nop34 hint_nop35 hint_nop36 hint_nop37 hint_nop38 hint_nop39 hint_nop40 hint_nop41 hint_nop42 hint_nop43 hint_nop44 hint_nop45 hint_nop46 hint_nop47 hint_nop48 hint_nop49 hint_nop50 hint_nop51 hint_nop52 hint_nop53 hint_nop54 hint_nop55 hint_nop56 hint_nop57 hint_nop58 hint_nop59 hint_nop60 hint_nop61 hint_nop62 hint_nop63",literal:"ip eip rip al ah bl bh cl ch dl dh sil dil bpl spl r8b r9b r10b r11b r12b r13b r14b r15b ax bx cx dx si di bp sp r8w r9w r10w r11w r12w r13w r14w r15w eax ebx ecx edx esi edi ebp esp eip r8d r9d r10d r11d r12d r13d r14d r15d rax rbx rcx rdx rsi rdi rbp rsp r8 r9 r10 r11 r12 r13 r14 r15 cs ds es fs gs ss st st0 st1 st2 st3 st4 st5 st6 st7 mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7 xmm0  xmm1  xmm2  xmm3  xmm4  xmm5  xmm6  xmm7  xmm8  xmm9 xmm10  xmm11 xmm12 xmm13 xmm14 xmm15 xmm16 xmm17 xmm18 xmm19 xmm20 xmm21 xmm22 xmm23 xmm24 xmm25 xmm26 xmm27 xmm28 xmm29 xmm30 xmm31 ymm0  ymm1  ymm2  ymm3  ymm4  ymm5  ymm6  ymm7  ymm8  ymm9 ymm10  ymm11 ymm12 ymm13 ymm14 ymm15 ymm16 ymm17 ymm18 ymm19 ymm20 ymm21 ymm22 ymm23 ymm24 ymm25 ymm26 ymm27 ymm28 ymm29 ymm30 ymm31 zmm0  zmm1  zmm2  zmm3  zmm4  zmm5  zmm6  zmm7  zmm8  zmm9 zmm10  zmm11 zmm12 zmm13 zmm14 zmm15 zmm16 zmm17 zmm18 zmm19 zmm20 zmm21 zmm22 zmm23 zmm24 zmm25 zmm26 zmm27 zmm28 zmm29 zmm30 zmm31 k0 k1 k2 k3 k4 k5 k6 k7 bnd0 bnd1 bnd2 bnd3 cr0 cr1 cr2 cr3 cr4 cr8 dr0 dr1 dr2 dr3 dr8 tr3 tr4 tr5 tr6 tr7 r0 r1 r2 r3 r4 r5 r6 r7 r0b r1b r2b r3b r4b r5b r6b r7b r0w r1w r2w r3w r4w r5w r6w r7w r0d r1d r2d r3d r4d r5d r6d r7d r0h r1h r2h r3h r0l r1l r2l r3l r4l r5l r6l r7l r8l r9l r10l r11l r12l r13l r14l r15l",pseudo:"db dw dd dq dt ddq do dy dz resb resw resd resq rest resdq reso resy resz incbin equ times",preprocessor:"%define %xdefine %+ %undef %defstr %deftok %assign %strcat %strlen %substr %rotate %elif %else %endif %ifmacro %ifctx %ifidn %ifidni %ifid %ifnum %ifstr %iftoken %ifempty %ifenv %error %warning %fatal %rep %endrep %include %push %pop %repl %pathsearch %depend %use %arg %stacksize %local %line %comment %endcomment .nolist byte word dword qword nosplit rel abs seg wrt strict near far a32 ptr __FILE__ __LINE__ __SECT__  __BITS__ __OUTPUT_FORMAT__ __DATE__ __TIME__ __DATE_NUM__ __TIME_NUM__ __UTC_DATE__ __UTC_TIME__ __UTC_DATE_NUM__ __UTC_TIME_NUM__  __PASS__ struc endstruc istruc at iend align alignb sectalign daz nodaz up down zero default option assume public ",built_in:"bits use16 use32 use64 default section segment absolute extern global common cpu float __utf16__ __utf16le__ __utf16be__ __utf32__ __utf32le__ __utf32be__ __float8__ __float16__ __float32__ __float64__ __float80m__ __float80e__ __float128l__ __float128h__ __Infinity__ __QNaN__ __SNaN__ Inf NaN QNaN SNaN float8 float16 float32 float64 float80m float80e float128l float128h __FLOAT_DAZ__ __FLOAT_ROUND__ __FLOAT__"},c:[s.C(";","$",{r:0}),{cN:"number",b:"\\b(?:([0-9][0-9_]*)?\\.[0-9_]*(?:[eE][+-]?[0-9_]+)?|(0[Xx])?[0-9][0-9_]*\\.?[0-9_]*(?:[pP](?:[+-]?[0-9_]+)?)?)\\b",r:0},{cN:"number",b:"\\$[0-9][0-9A-Fa-f]*",r:0},{cN:"number",b:"\\b(?:[0-9A-Fa-f][0-9A-Fa-f_]*[HhXx]|[0-9][0-9_]*[DdTt]?|[0-7][0-7_]*[QqOo]|[0-1][0-1_]*[BbYy])\\b"},{cN:"number",b:"\\b(?:0[HhXx][0-9A-Fa-f_]+|0[DdTt][0-9_]+|0[QqOo][0-7_]+|0[BbYy][0-1_]+)\\b"},s.QSM,{cN:"string",b:"'",e:"[^\\\\]'",r:0},{cN:"string",b:"`",e:"[^\\\\]`",r:0},{cN:"string",b:"\\.[A-Za-z0-9]+",r:0},{cN:"label",b:"^\\s*[A-Za-z._?][A-Za-z0-9_$#@~.?]*(:|\\s+label)",r:0},{cN:"label",b:"^\\s*%%[A-Za-z0-9_$#@~.?]*:",r:0},{cN:"argument",b:"%[0-9]+",r:0},{cN:"built_in",b:"%!S+",r:0}]}});hljs.registerLanguage("roboconf",function(e){var n="[a-zA-Z-_][^\n{\r\n]+\\{";return{aliases:["graph","instances"],cI:!0,k:"import",c:[{cN:"facet",b:"^facet "+n,e:"}",k:"facet installer exports children extends",c:[e.HCM]},{cN:"instance-of",b:"^instance of "+n,e:"}",k:"name count channels instance-data instance-state instance of",c:[{cN:"keyword",b:"[a-zA-Z-_]+( |	)*:"},e.HCM]},{cN:"component",b:"^"+n,e:"}",l:"\\(?[a-zA-Z]+\\)?",k:"installer exports children extends imports facets alias (optional)",c:[{cN:"string",b:"\\.[a-zA-Z-_]+",e:"\\s|,|;",eE:!0},e.HCM]},e.HCM]}});hljs.registerLanguage("ruby",function(e){var c="[a-zA-Z_]\\w*[!?=]?|[-+~]\\@|<<|>>|=~|===?|<=>|[<>]=?|\\*\\*|[-/+%^&*~`|]|\\[\\]=?",r="and false then defined module in return redo if BEGIN retry end for true self when next until do begin unless END rescue nil else break undef not super class case require yield alias while ensure elsif or include attr_reader attr_writer attr_accessor",b={cN:"yardoctag",b:"@[A-Za-z]+"},a={cN:"value",b:"#<",e:">"},n=[e.C("#","$",{c:[b]}),e.C("^\\=begin","^\\=end",{c:[b],r:10}),e.C("^__END__","\\n$")],s={cN:"subst",b:"#\\{",e:"}",k:r},t={cN:"string",c:[e.BE,s],v:[{b:/'/,e:/'/},{b:/"/,e:/"/},{b:/`/,e:/`/},{b:"%[qQwWx]?\\(",e:"\\)"},{b:"%[qQwWx]?\\[",e:"\\]"},{b:"%[qQwWx]?{",e:"}"},{b:"%[qQwWx]?<",e:">"},{b:"%[qQwWx]?/",e:"/"},{b:"%[qQwWx]?%",e:"%"},{b:"%[qQwWx]?-",e:"-"},{b:"%[qQwWx]?\\|",e:"\\|"},{b:/\B\?(\\\d{1,3}|\\x[A-Fa-f0-9]{1,2}|\\u[A-Fa-f0-9]{4}|\\?\S)\b/}]},i={cN:"params",b:"\\(",e:"\\)",k:r},d=[t,a,{cN:"class",bK:"class module",e:"$|;",i:/=/,c:[e.inherit(e.TM,{b:"[A-Za-z_]\\w*(::\\w+)*(\\?|\\!)?"}),{cN:"inheritance",b:"<\\s*",c:[{cN:"parent",b:"("+e.IR+"::)?"+e.IR}]}].concat(n)},{cN:"function",bK:"def",e:" |$|;",r:0,c:[e.inherit(e.TM,{b:c}),i].concat(n)},{cN:"constant",b:"(::)?(\\b[A-Z]\\w*(::)?)+",r:0},{cN:"symbol",b:e.UIR+"(\\!|\\?)?:",r:0},{cN:"symbol",b:":",c:[t,{b:c}],r:0},{cN:"number",b:"(\\b0[0-7_]+)|(\\b0x[0-9a-fA-F_]+)|(\\b[1-9][0-9_]*(\\.[0-9_]+)?)|[0_]\\b",r:0},{cN:"variable",b:"(\\$\\W)|((\\$|\\@\\@?)(\\w+))"},{b:"("+e.RSR+")\\s*",c:[a,{cN:"regexp",c:[e.BE,s],i:/\n/,v:[{b:"/",e:"/[a-z]*"},{b:"%r{",e:"}[a-z]*"},{b:"%r\\(",e:"\\)[a-z]*"},{b:"%r!",e:"![a-z]*"},{b:"%r\\[",e:"\\][a-z]*"}]}].concat(n),r:0}].concat(n);s.c=d,i.c=d;var o="[>?]>",l="[\\w#]+\\(\\w+\\):\\d+:\\d+>",u="(\\w+-)?\\d+\\.\\d+\\.\\d(p\\d+)?[^>]+>",N=[{b:/^\s*=>/,cN:"status",starts:{e:"$",c:d}},{cN:"prompt",b:"^("+o+"|"+l+"|"+u+")",starts:{e:"$",c:d}}];return{aliases:["rb","gemspec","podspec","thor","irb"],k:r,c:n.concat(N).concat(d)}});hljs.registerLanguage("typescript",function(e){return{aliases:["ts"],k:{keyword:"in if for while finally var new function|0 do return void else break catch instanceof with throw case default try this switch continue typeof delete let yield const class public private get set super interface extendsstatic constructor implements enum export import declare type protected",literal:"true false null undefined NaN Infinity",built_in:"eval isFinite isNaN parseFloat parseInt decodeURI decodeURIComponent encodeURI encodeURIComponent escape unescape Object Function Boolean Error EvalError InternalError RangeError ReferenceError StopIteration SyntaxError TypeError URIError Number Math Date String RegExp Array Float32Array Float64Array Int16Array Int32Array Int8Array Uint16Array Uint32Array Uint8Array Uint8ClampedArray ArrayBuffer DataView JSON Intl arguments require module console window document any number boolean string void"},c:[{cN:"pi",b:/^\s*('|")use strict('|")/,r:0},e.ASM,e.QSM,e.CLCM,e.CBCM,e.CNM,{b:"("+e.RSR+"|\\b(case|return|throw)\\b)\\s*",k:"return throw case",c:[e.CLCM,e.CBCM,e.RM,{b:/</,e:/>;/,r:0,sL:"xml"}],r:0},{cN:"function",bK:"function",e:/\{/,eE:!0,c:[e.inherit(e.TM,{b:/[A-Za-z$_][0-9A-Za-z$_]*/}),{cN:"params",b:/\(/,e:/\)/,c:[e.CLCM,e.CBCM],i:/["'\(]/}],i:/\[|%/,r:0},{cN:"constructor",bK:"constructor",e:/\{/,eE:!0,r:10},{cN:"module",bK:"module",e:/\{/,eE:!0},{cN:"interface",bK:"interface",e:/\{/,eE:!0},{b:/\$[(.]/},{b:"\\."+e.IR,r:0}]}});hljs.registerLanguage("handlebars",function(e){var a="each in with if else unless bindattr action collection debugger log outlet template unbound view yield";return{aliases:["hbs","html.hbs","html.handlebars"],cI:!0,sL:"xml",subLanguageMode:"continuous",c:[{cN:"expression",b:"{{",e:"}}",c:[{cN:"begin-block",b:"#[a-zA-Z- .]+",k:a},{cN:"string",b:'"',e:'"'},{cN:"end-block",b:"\\/[a-zA-Z- .]+",k:a},{cN:"variable",b:"[a-zA-Z-.]+",k:a}]}]}});hljs.registerLanguage("mercury",function(e){var i={keyword:"module use_module import_module include_module end_module initialise mutable initialize finalize finalise interface implementation pred mode func type inst solver any_pred any_func is semidet det nondet multi erroneous failure cc_nondet cc_multi typeclass instance where pragma promise external trace atomic or_else require_complete_switch require_det require_semidet require_multi require_nondet require_cc_multi require_cc_nondet require_erroneous require_failure",pragma:"inline no_inline type_spec source_file fact_table obsolete memo loop_check minimal_model terminates does_not_terminate check_termination promise_equivalent_clauses",preprocessor:"foreign_proc foreign_decl foreign_code foreign_type foreign_import_module foreign_export_enum foreign_export foreign_enum may_call_mercury will_not_call_mercury thread_safe not_thread_safe maybe_thread_safe promise_pure promise_semipure tabled_for_io local untrailed trailed attach_to_io_state can_pass_as_mercury_type stable will_not_throw_exception may_modify_trail will_not_modify_trail may_duplicate may_not_duplicate affects_liveness does_not_affect_liveness doesnt_affect_liveness no_sharing unknown_sharing sharing",built_in:"some all not if then else true fail false try catch catch_any semidet_true semidet_false semidet_fail impure_true impure semipure"},r={cN:"label",b:"XXX",e:"$",eW:!0,r:0},t=e.inherit(e.CLCM,{b:"%"}),_=e.inherit(e.CBCM,{r:0});t.c.push(r),_.c.push(r);var n={cN:"number",b:"0'.\\|0[box][0-9a-fA-F]*"},a=e.inherit(e.ASM,{r:0}),o=e.inherit(e.QSM,{r:0}),l={cN:"constant",b:"\\\\[abfnrtv]\\|\\\\x[0-9a-fA-F]*\\\\\\|%[-+# *.0-9]*[dioxXucsfeEgGp]",r:0};o.c.push(l);var s={cN:"built_in",v:[{b:"<=>"},{b:"<=",r:0},{b:"=>",r:0},{b:"/\\\\"},{b:"\\\\/"}]},c={cN:"built_in",v:[{b:":-\\|-->"},{b:"=",r:0}]};return{aliases:["m","moo"],k:i,c:[s,c,t,_,n,e.NM,a,o,{b:/:-/}]}});hljs.registerLanguage("fix",function(u){return{c:[{b:/[^\u2401\u0001]+/,e:/[\u2401\u0001]/,eE:!0,rB:!0,rE:!1,c:[{b:/([^\u2401\u0001=]+)/,e:/=([^\u2401\u0001=]+)/,rE:!0,rB:!1,cN:"attribute"},{b:/=/,e:/([\u2401\u0001])/,eE:!0,eB:!0,cN:"string"}]}],cI:!0}});hljs.registerLanguage("clojure",function(e){var t={built_in:"def cond apply if-not if-let if not not= = < > <= >= == + / * - rem quot neg? pos? delay? symbol? keyword? true? false? integer? empty? coll? list? set? ifn? fn? associative? sequential? sorted? counted? reversible? number? decimal? class? distinct? isa? float? rational? reduced? ratio? odd? even? char? seq? vector? string? map? nil? contains? zero? instance? not-every? not-any? libspec? -> ->> .. . inc compare do dotimes mapcat take remove take-while drop letfn drop-last take-last drop-while while intern condp case reduced cycle split-at split-with repeat replicate iterate range merge zipmap declare line-seq sort comparator sort-by dorun doall nthnext nthrest partition eval doseq await await-for let agent atom send send-off release-pending-sends add-watch mapv filterv remove-watch agent-error restart-agent set-error-handler error-handler set-error-mode! error-mode shutdown-agents quote var fn loop recur throw try monitor-enter monitor-exit defmacro defn defn- macroexpand macroexpand-1 for dosync and or when when-not when-let comp juxt partial sequence memoize constantly complement identity assert peek pop doto proxy defstruct first rest cons defprotocol cast coll deftype defrecord last butlast sigs reify second ffirst fnext nfirst nnext defmulti defmethod meta with-meta ns in-ns create-ns import refer keys select-keys vals key val rseq name namespace promise into transient persistent! conj! assoc! dissoc! pop! disj! use class type num float double short byte boolean bigint biginteger bigdec print-method print-dup throw-if printf format load compile get-in update-in pr pr-on newline flush read slurp read-line subvec with-open memfn time re-find re-groups rand-int rand mod locking assert-valid-fdecl alias resolve ref deref refset swap! reset! set-validator! compare-and-set! alter-meta! reset-meta! commute get-validator alter ref-set ref-history-count ref-min-history ref-max-history ensure sync io! new next conj set! to-array future future-call into-array aset gen-class reduce map filter find empty hash-map hash-set sorted-map sorted-map-by sorted-set sorted-set-by vec vector seq flatten reverse assoc dissoc list disj get union difference intersection extend extend-type extend-protocol int nth delay count concat chunk chunk-buffer chunk-append chunk-first chunk-rest max min dec unchecked-inc-int unchecked-inc unchecked-dec-inc unchecked-dec unchecked-negate unchecked-add-int unchecked-add unchecked-subtract-int unchecked-subtract chunk-next chunk-cons chunked-seq? prn vary-meta lazy-seq spread list* str find-keyword keyword symbol gensym force rationalize"},r="a-zA-Z_\\-!.?+*=<>&#'",n="["+r+"]["+r+"0-9/;:]*",a="[-+]?\\d+(\\.\\d+)?",o={b:n,r:0},s={cN:"number",b:a,r:0},i=e.inherit(e.QSM,{i:null}),c=e.C(";","$",{r:0}),d={cN:"literal",b:/\b(true|false|nil)\b/},l={cN:"collection",b:"[\\[\\{]",e:"[\\]\\}]"},m={cN:"comment",b:"\\^"+n},p=e.C("\\^\\{","\\}"),u={cN:"attribute",b:"[:]"+n},f={cN:"list",b:"\\(",e:"\\)"},h={eW:!0,r:0},y={k:t,l:n,cN:"keyword",b:n,starts:h},b=[f,i,m,p,c,u,l,s,d,o];return f.c=[e.C("comment",""),y,h],h.c=b,l.c=b,{aliases:["clj"],i:/\S/,c:[f,i,m,p,c,u,l,s,d]}});hljs.registerLanguage("perl",function(e){var t="getpwent getservent quotemeta msgrcv scalar kill dbmclose undef lc ma syswrite tr send umask sysopen shmwrite vec qx utime local oct semctl localtime readpipe do return format read sprintf dbmopen pop getpgrp not getpwnam rewinddir qqfileno qw endprotoent wait sethostent bless s|0 opendir continue each sleep endgrent shutdown dump chomp connect getsockname die socketpair close flock exists index shmgetsub for endpwent redo lstat msgctl setpgrp abs exit select print ref gethostbyaddr unshift fcntl syscall goto getnetbyaddr join gmtime symlink semget splice x|0 getpeername recv log setsockopt cos last reverse gethostbyname getgrnam study formline endhostent times chop length gethostent getnetent pack getprotoent getservbyname rand mkdir pos chmod y|0 substr endnetent printf next open msgsnd readdir use unlink getsockopt getpriority rindex wantarray hex system getservbyport endservent int chr untie rmdir prototype tell listen fork shmread ucfirst setprotoent else sysseek link getgrgid shmctl waitpid unpack getnetbyname reset chdir grep split require caller lcfirst until warn while values shift telldir getpwuid my getprotobynumber delete and sort uc defined srand accept package seekdir getprotobyname semop our rename seek if q|0 chroot sysread setpwent no crypt getc chown sqrt write setnetent setpriority foreach tie sin msgget map stat getlogin unless elsif truncate exec keys glob tied closedirioctl socket readlink eval xor readline binmode setservent eof ord bind alarm pipe atan2 getgrent exp time push setgrent gt lt or ne m|0 break given say state when",r={cN:"subst",b:"[$@]\\{",e:"\\}",k:t},s={b:"->{",e:"}"},n={cN:"variable",v:[{b:/\$\d/},{b:/[\$%@](\^\w\b|#\w+(::\w+)*|{\w+}|\w+(::\w*)*)/},{b:/[\$%@][^\s\w{]/,r:0}]},i=e.C("^(__END__|__DATA__)","\\n$",{r:5}),o=[e.BE,r,n],a=[n,e.HCM,i,e.C("^\\=\\w","\\=cut",{eW:!0}),s,{cN:"string",c:o,v:[{b:"q[qwxr]?\\s*\\(",e:"\\)",r:5},{b:"q[qwxr]?\\s*\\[",e:"\\]",r:5},{b:"q[qwxr]?\\s*\\{",e:"\\}",r:5},{b:"q[qwxr]?\\s*\\|",e:"\\|",r:5},{b:"q[qwxr]?\\s*\\<",e:"\\>",r:5},{b:"qw\\s+q",e:"q",r:5},{b:"'",e:"'",c:[e.BE]},{b:'"',e:'"'},{b:"`",e:"`",c:[e.BE]},{b:"{\\w+}",c:[],r:0},{b:"-?\\w+\\s*\\=\\>",c:[],r:0}]},{cN:"number",b:"(\\b0[0-7_]+)|(\\b0x[0-9a-fA-F_]+)|(\\b[1-9][0-9_]*(\\.[0-9_]+)?)|[0_]\\b",r:0},{b:"(\\/\\/|"+e.RSR+"|\\b(split|return|print|reverse|grep)\\b)\\s*",k:"split return print reverse grep",r:0,c:[e.HCM,i,{cN:"regexp",b:"(s|tr|y)/(\\\\.|[^/])*/(\\\\.|[^/])*/[a-z]*",r:10},{cN:"regexp",b:"(m|qr)?/",e:"/[a-z]*",c:[e.BE],r:0}]},{cN:"sub",bK:"sub",e:"(\\s*\\(.*?\\))?[;{]",r:5},{cN:"operator",b:"-\\w\\b",r:0}];return r.c=a,s.c=a,{aliases:["pl"],k:t,c:a}});hljs.registerLanguage("twig",function(e){var t={cN:"params",b:"\\(",e:"\\)"},a="attribute block constant cycle date dump include max min parent random range source template_from_string",r={cN:"function",bK:a,r:0,c:[t]},c={cN:"filter",b:/\|[A-Za-z_]+:?/,k:"abs batch capitalize convert_encoding date date_modify default escape first format join json_encode keys last length lower merge nl2br number_format raw replace reverse round slice sort split striptags title trim upper url_encode",c:[r]},n="autoescape block do embed extends filter flush for if import include macro sandbox set spaceless use verbatim";return n=n+" "+n.split(" ").map(function(e){return"end"+e}).join(" "),{aliases:["craftcms"],cI:!0,sL:"xml",subLanguageMode:"continuous",c:[e.C(/\{#/,/#}/),{cN:"template_tag",b:/\{%/,e:/%}/,k:n,c:[c,r]},{cN:"variable",b:/\{\{/,e:/}}/,c:[c,r]}]}});hljs.registerLanguage("livecodeserver",function(e){var r={cN:"variable",b:"\\b[gtps][A-Z]+[A-Za-z0-9_\\-]*\\b|\\$_[A-Z]+",r:0},t=[e.CBCM,e.HCM,e.C("--","$"),e.C("[^:]//","$")],a=e.inherit(e.TM,{v:[{b:"\\b_*rig[A-Z]+[A-Za-z0-9_\\-]*"},{b:"\\b_[a-z0-9\\-]+"}]}),o=e.inherit(e.TM,{b:"\\b([A-Za-z0-9_\\-]+)\\b"});return{cI:!1,k:{keyword:"$_COOKIE $_FILES $_GET $_GET_BINARY $_GET_RAW $_POST $_POST_BINARY $_POST_RAW $_SESSION $_SERVER codepoint codepoints segment segments codeunit codeunits sentence sentences trueWord trueWords paragraph after byte bytes english the until http forever descending using line real8 with seventh for stdout finally element word words fourth before black ninth sixth characters chars stderr uInt1 uInt1s uInt2 uInt2s stdin string lines relative rel any fifth items from middle mid at else of catch then third it file milliseconds seconds second secs sec int1 int1s int4 int4s internet int2 int2s normal text item last long detailed effective uInt4 uInt4s repeat end repeat URL in try into switch to words https token binfile each tenth as ticks tick system real4 by dateItems without char character ascending eighth whole dateTime numeric short first ftp integer abbreviated abbr abbrev private case while if",constant:"SIX TEN FORMFEED NINE ZERO NONE SPACE FOUR FALSE COLON CRLF PI COMMA ENDOFFILE EOF EIGHT FIVE QUOTE EMPTY ONE TRUE RETURN CR LINEFEED RIGHT BACKSLASH NULL SEVEN TAB THREE TWO six ten formfeed nine zero none space four false colon crlf pi comma endoffile eof eight five quote empty one true return cr linefeed right backslash null seven tab three two RIVERSION RISTATE FILE_READ_MODE FILE_WRITE_MODE FILE_WRITE_MODE DIR_WRITE_MODE FILE_READ_UMASK FILE_WRITE_UMASK DIR_READ_UMASK DIR_WRITE_UMASK",operator:"div mod wrap and or bitAnd bitNot bitOr bitXor among not in a an within contains ends with begins the keys of keys",built_in:"put abs acos aliasReference annuity arrayDecode arrayEncode asin atan atan2 average avg avgDev base64Decode base64Encode baseConvert binaryDecode binaryEncode byteOffset byteToNum cachedURL cachedURLs charToNum cipherNames codepointOffset codepointProperty codepointToNum codeunitOffset commandNames compound compress constantNames cos date dateFormat decompress directories diskSpace DNSServers exp exp1 exp2 exp10 extents files flushEvents folders format functionNames geometricMean global globals hasMemory harmonicMean hostAddress hostAddressToName hostName hostNameToAddress isNumber ISOToMac itemOffset keys len length libURLErrorData libUrlFormData libURLftpCommand libURLLastHTTPHeaders libURLLastRHHeaders libUrlMultipartFormAddPart libUrlMultipartFormData libURLVersion lineOffset ln ln1 localNames log log2 log10 longFilePath lower macToISO matchChunk matchText matrixMultiply max md5Digest median merge millisec millisecs millisecond milliseconds min monthNames nativeCharToNum normalizeText num number numToByte numToChar numToCodepoint numToNativeChar offset open openfiles openProcesses openProcessIDs openSockets paragraphOffset paramCount param params peerAddress pendingMessages platform popStdDev populationStandardDeviation populationVariance popVariance processID random randomBytes replaceText result revCreateXMLTree revCreateXMLTreeFromFile revCurrentRecord revCurrentRecordIsFirst revCurrentRecordIsLast revDatabaseColumnCount revDatabaseColumnIsNull revDatabaseColumnLengths revDatabaseColumnNames revDatabaseColumnNamed revDatabaseColumnNumbered revDatabaseColumnTypes revDatabaseConnectResult revDatabaseCursors revDatabaseID revDatabaseTableNames revDatabaseType revDataFromQuery revdb_closeCursor revdb_columnbynumber revdb_columncount revdb_columnisnull revdb_columnlengths revdb_columnnames revdb_columntypes revdb_commit revdb_connect revdb_connections revdb_connectionerr revdb_currentrecord revdb_cursorconnection revdb_cursorerr revdb_cursors revdb_dbtype revdb_disconnect revdb_execute revdb_iseof revdb_isbof revdb_movefirst revdb_movelast revdb_movenext revdb_moveprev revdb_query revdb_querylist revdb_recordcount revdb_rollback revdb_tablenames revGetDatabaseDriverPath revNumberOfRecords revOpenDatabase revOpenDatabases revQueryDatabase revQueryDatabaseBlob revQueryResult revQueryIsAtStart revQueryIsAtEnd revUnixFromMacPath revXMLAttribute revXMLAttributes revXMLAttributeValues revXMLChildContents revXMLChildNames revXMLCreateTreeFromFileWithNamespaces revXMLCreateTreeWithNamespaces revXMLDataFromXPathQuery revXMLEvaluateXPath revXMLFirstChild revXMLMatchingNode revXMLNextSibling revXMLNodeContents revXMLNumberOfChildren revXMLParent revXMLPreviousSibling revXMLRootNode revXMLRPC_CreateRequest revXMLRPC_Documents revXMLRPC_Error revXMLRPC_GetHost revXMLRPC_GetMethod revXMLRPC_GetParam revXMLText revXMLRPC_Execute revXMLRPC_GetParamCount revXMLRPC_GetParamNode revXMLRPC_GetParamType revXMLRPC_GetPath revXMLRPC_GetPort revXMLRPC_GetProtocol revXMLRPC_GetRequest revXMLRPC_GetResponse revXMLRPC_GetSocket revXMLTree revXMLTrees revXMLValidateDTD revZipDescribeItem revZipEnumerateItems revZipOpenArchives round sampVariance sec secs seconds sentenceOffset sha1Digest shell shortFilePath sin specialFolderPath sqrt standardDeviation statRound stdDev sum sysError systemVersion tan tempName textDecode textEncode tick ticks time to tokenOffset toLower toUpper transpose truewordOffset trunc uniDecode uniEncode upper URLDecode URLEncode URLStatus uuid value variableNames variance version waitDepth weekdayNames wordOffset xsltApplyStylesheet xsltApplyStylesheetFromFile xsltLoadStylesheet xsltLoadStylesheetFromFile add breakpoint cancel clear local variable file word line folder directory URL close socket process combine constant convert create new alias folder directory decrypt delete variable word line folder directory URL dispatch divide do encrypt filter get include intersect kill libURLDownloadToFile libURLFollowHttpRedirects libURLftpUpload libURLftpUploadFile libURLresetAll libUrlSetAuthCallback libURLSetCustomHTTPHeaders libUrlSetExpect100 libURLSetFTPListCommand libURLSetFTPMode libURLSetFTPStopTime libURLSetStatusCallback load multiply socket prepare process post seek rel relative read from process rename replace require resetAll resolve revAddXMLNode revAppendXML revCloseCursor revCloseDatabase revCommitDatabase revCopyFile revCopyFolder revCopyXMLNode revDeleteFolder revDeleteXMLNode revDeleteAllXMLTrees revDeleteXMLTree revExecuteSQL revGoURL revInsertXMLNode revMoveFolder revMoveToFirstRecord revMoveToLastRecord revMoveToNextRecord revMoveToPreviousRecord revMoveToRecord revMoveXMLNode revPutIntoXMLNode revRollBackDatabase revSetDatabaseDriverPath revSetXMLAttribute revXMLRPC_AddParam revXMLRPC_DeleteAllDocuments revXMLAddDTD revXMLRPC_Free revXMLRPC_FreeAll revXMLRPC_DeleteDocument revXMLRPC_DeleteParam revXMLRPC_SetHost revXMLRPC_SetMethod revXMLRPC_SetPort revXMLRPC_SetProtocol revXMLRPC_SetSocket revZipAddItemWithData revZipAddItemWithFile revZipAddUncompressedItemWithData revZipAddUncompressedItemWithFile revZipCancel revZipCloseArchive revZipDeleteItem revZipExtractItemToFile revZipExtractItemToVariable revZipSetProgressCallback revZipRenameItem revZipReplaceItemWithData revZipReplaceItemWithFile revZipOpenArchive send set sort split start stop subtract union unload wait write"},c:[r,{cN:"keyword",b:"\\bend\\sif\\b"},{cN:"function",bK:"function",e:"$",c:[r,o,e.ASM,e.QSM,e.BNM,e.CNM,a]},{cN:"function",bK:"end",e:"$",c:[o,a]},{cN:"command",bK:"command on",e:"$",c:[r,o,e.ASM,e.QSM,e.BNM,e.CNM,a]},{cN:"command",bK:"end",e:"$",c:[o,a]},{cN:"preprocessor",b:"<\\?rev|<\\?lc|<\\?livecode",r:10},{cN:"preprocessor",b:"<\\?"},{cN:"preprocessor",b:"\\?>"},e.ASM,e.QSM,e.BNM,e.CNM,a].concat(t),i:";$|^\\[|^="}});hljs.registerLanguage("step21",function(e){var r="[A-Z_][A-Z0-9_.]*",i="END-ISO-10303-21;",l={literal:"",built_in:"",keyword:"HEADER ENDSEC DATA"},s={cN:"preprocessor",b:"ISO-10303-21;",r:10},t=[e.CLCM,e.CBCM,e.C("/\\*\\*!","\\*/"),e.CNM,e.inherit(e.ASM,{i:null}),e.inherit(e.QSM,{i:null}),{cN:"string",b:"'",e:"'"},{cN:"label",v:[{b:"#",e:"\\d+",i:"\\W"}]}];return{aliases:["p21","step","stp"],cI:!0,l:r,k:l,c:[{cN:"preprocessor",b:i,r:10},s].concat(t)}});hljs.registerLanguage("cpp",function(t){var i={keyword:"false int float while private char catch export virtual operator sizeof dynamic_cast|10 typedef const_cast|10 const struct for static_cast|10 union namespace unsigned long volatile static protected bool template mutable if public friend do goto auto void enum else break extern using true class asm case typeid short reinterpret_cast|10 default double register explicit signed typename try this switch continue wchar_t inline delete alignof char16_t char32_t constexpr decltype noexcept nullptr static_assert thread_local restrict _Bool complex _Complex _Imaginary intmax_t uintmax_t int8_t uint8_t int16_t uint16_t int32_t uint32_t  int64_t uint64_t int_least8_t uint_least8_t int_least16_t uint_least16_t int_least32_t uint_least32_t int_least64_t uint_least64_t int_fast8_t uint_fast8_t int_fast16_t uint_fast16_t int_fast32_t uint_fast32_t int_fast64_t uint_fast64_t intptr_t uintptr_t atomic_bool atomic_char atomic_schar atomic_uchar atomic_short atomic_ushort atomic_int atomic_uint atomic_long atomic_ulong atomic_llong atomic_ullong atomic_wchar_t atomic_char16_t atomic_char32_t atomic_intmax_t atomic_uintmax_t atomic_intptr_t atomic_uintptr_t atomic_size_t atomic_ptrdiff_t atomic_int_least8_t atomic_int_least16_t atomic_int_least32_t atomic_int_least64_t atomic_uint_least8_t atomic_uint_least16_t atomic_uint_least32_t atomic_uint_least64_t atomic_int_fast8_t atomic_int_fast16_t atomic_int_fast32_t atomic_int_fast64_t atomic_uint_fast8_t atomic_uint_fast16_t atomic_uint_fast32_t atomic_uint_fast64_t",built_in:"std string cin cout cerr clog stringstream istringstream ostringstream auto_ptr deque list queue stack vector map set bitset multiset multimap unordered_set unordered_map unordered_multiset unordered_multimap array shared_ptr abort abs acos asin atan2 atan calloc ceil cosh cos exit exp fabs floor fmod fprintf fputs free frexp fscanf isalnum isalpha iscntrl isdigit isgraph islower isprint ispunct isspace isupper isxdigit tolower toupper labs ldexp log10 log malloc memchr memcmp memcpy memset modf pow printf putchar puts scanf sinh sin snprintf sprintf sqrt sscanf strcat strchr strcmp strcpy strcspn strlen strncat strncmp strncpy strpbrk strrchr strspn strstr tanh tan vfprintf vprintf vsprintf"};return{aliases:["c","cc","h","c++","h++","hpp"],k:i,i:"</",c:[t.CLCM,t.CBCM,t.QSM,{cN:"string",b:"'\\\\?.",e:"'",i:"."},{cN:"number",b:"\\b(\\d+(\\.\\d*)?|\\.\\d+)(u|U|l|L|ul|UL|f|F)"},t.CNM,{cN:"preprocessor",b:"#",e:"$",k:"if else elif endif define undef warning error line pragma",c:[{b:/\\\n/,r:0},{b:'include\\s*[<"]',e:'[>"]',k:"include",i:"\\n"},t.CLCM]},{b:"\\b(deque|list|queue|stack|vector|map|set|bitset|multiset|multimap|unordered_map|unordered_set|unordered_multiset|unordered_multimap|array)\\s*<",e:">",k:i,c:["self"]},{b:t.IR+"::",k:i},{bK:"new throw return else",r:0},{cN:"function",b:"("+t.IR+"\\s+)+"+t.IR+"\\s*\\(",rB:!0,e:/[{;=]/,eE:!0,k:i,c:[{b:t.IR+"\\s*\\(",rB:!0,c:[t.TM],r:0},{cN:"params",b:/\(/,e:/\)/,k:i,r:0,c:[t.CBCM]},t.CLCM,t.CBCM]}]}});hljs.registerLanguage("vala",function(e){return{k:{keyword:"char uchar unichar int uint long ulong short ushort int8 int16 int32 int64 uint8 uint16 uint32 uint64 float double bool struct enum string void weak unowned owned async signal static abstract interface override while do for foreach else switch case break default return try catch public private protected internal using new this get set const stdout stdin stderr var",built_in:"DBus GLib CCode Gee Object",literal:"false true null"},c:[{cN:"class",bK:"class interface delegate namespace",e:"{",eE:!0,i:"[^,:\\n\\s\\.]",c:[e.UTM]},e.CLCM,e.CBCM,{cN:"string",b:'"""',e:'"""',r:5},e.ASM,e.QSM,e.CNM,{cN:"preprocessor",b:"^#",e:"$",r:2},{cN:"constant",b:" [A-Z_]+ ",r:0}]}});hljs.registerLanguage("http",function(t){return{aliases:["https"],i:"\\S",c:[{cN:"status",b:"^HTTP/[0-9\\.]+",e:"$",c:[{cN:"number",b:"\\b\\d{3}\\b"}]},{cN:"request",b:"^[A-Z]+ (.*?) HTTP/[0-9\\.]+$",rB:!0,e:"$",c:[{cN:"string",b:" ",e:" ",eB:!0,eE:!0}]},{cN:"attribute",b:"^\\w",e:": ",eE:!0,i:"\\n|\\s|=",starts:{cN:"string",e:"$"}},{b:"\\n\\n",starts:{sL:"",eW:!0}}]}});hljs.registerLanguage("avrasm",function(r){return{cI:!0,l:"\\.?"+r.IR,k:{keyword:"adc add adiw and andi asr bclr bld brbc brbs brcc brcs break breq brge brhc brhs brid brie brlo brlt brmi brne brpl brsh brtc brts brvc brvs bset bst call cbi cbr clc clh cli cln clr cls clt clv clz com cp cpc cpi cpse dec eicall eijmp elpm eor fmul fmuls fmulsu icall ijmp in inc jmp ld ldd ldi lds lpm lsl lsr mov movw mul muls mulsu neg nop or ori out pop push rcall ret reti rjmp rol ror sbc sbr sbrc sbrs sec seh sbi sbci sbic sbis sbiw sei sen ser ses set sev sez sleep spm st std sts sub subi swap tst wdr",built_in:"r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r13 r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29 r30 r31 x|0 xh xl y|0 yh yl z|0 zh zl ucsr1c udr1 ucsr1a ucsr1b ubrr1l ubrr1h ucsr0c ubrr0h tccr3c tccr3a tccr3b tcnt3h tcnt3l ocr3ah ocr3al ocr3bh ocr3bl ocr3ch ocr3cl icr3h icr3l etimsk etifr tccr1c ocr1ch ocr1cl twcr twdr twar twsr twbr osccal xmcra xmcrb eicra spmcsr spmcr portg ddrg ping portf ddrf sreg sph spl xdiv rampz eicrb eimsk gimsk gicr eifr gifr timsk tifr mcucr mcucsr tccr0 tcnt0 ocr0 assr tccr1a tccr1b tcnt1h tcnt1l ocr1ah ocr1al ocr1bh ocr1bl icr1h icr1l tccr2 tcnt2 ocr2 ocdr wdtcr sfior eearh eearl eedr eecr porta ddra pina portb ddrb pinb portc ddrc pinc portd ddrd pind spdr spsr spcr udr0 ucsr0a ucsr0b ubrr0l acsr admux adcsr adch adcl porte ddre pine pinf",preprocessor:".byte .cseg .db .def .device .dseg .dw .endmacro .equ .eseg .exit .include .list .listmac .macro .nolist .org .set"},c:[r.CBCM,r.C(";","$",{r:0}),r.CNM,r.BNM,{cN:"number",b:"\\b(\\$[a-zA-Z0-9]+|0o[0-7]+)"},r.QSM,{cN:"string",b:"'",e:"[^\\\\]'",i:"[^\\\\][^']"},{cN:"label",b:"^[A-Za-z0-9_.$]+:"},{cN:"preprocessor",b:"#",e:"$"},{cN:"localvars",b:"@[0-9]+"}]}});hljs.registerLanguage("aspectj",function(e){var t="false synchronized int abstract float private char boolean static null if const for true while long throw strictfp finally protected import native final return void enum else extends implements break transient new catch instanceof byte super volatile case assert short package default double public try this switch continue throws privileged aspectOf adviceexecution proceed cflowbelow cflow initialization preinitialization staticinitialization withincode target within execution getWithinTypeName handler thisJoinPoint thisJoinPointStaticPart thisEnclosingJoinPointStaticPart declare parents warning error soft precedence thisAspectInstance",i="get set args call";return{k:t,i:/<\//,c:[{cN:"javadoc",b:"/\\*\\*",e:"\\*/",r:0,c:[{cN:"javadoctag",b:"(^|\\s)@[A-Za-z]+"}]},e.CLCM,e.CBCM,e.ASM,e.QSM,{cN:"aspect",bK:"aspect",e:/[{;=]/,eE:!0,i:/[:;"\[\]]/,c:[{bK:"extends implements pertypewithin perthis pertarget percflowbelow percflow issingleton"},e.UTM,{b:/\([^\)]*/,e:/[)]+/,k:t+" "+i,eE:!1}]},{cN:"class",bK:"class interface",e:/[{;=]/,eE:!0,r:0,k:"class interface",i:/[:"\[\]]/,c:[{bK:"extends implements"},e.UTM]},{bK:"pointcut after before around throwing returning",e:/[)]/,eE:!1,i:/["\[\]]/,c:[{b:e.UIR+"\\s*\\(",rB:!0,c:[e.UTM]}]},{b:/[:]/,rB:!0,e:/[{;]/,r:0,eE:!1,k:t,i:/["\[\]]/,c:[{b:e.UIR+"\\s*\\(",k:t+" "+i},e.QSM]},{bK:"new throw",r:0},{cN:"function",b:/\w+ +\w+(\.)?\w+\s*\([^\)]*\)\s*((throws)[\w\s,]+)?[\{;]/,rB:!0,e:/[{;=]/,k:t,eE:!0,c:[{b:e.UIR+"\\s*\\(",rB:!0,r:0,c:[e.UTM]},{cN:"params",b:/\(/,e:/\)/,r:0,k:t,c:[e.ASM,e.QSM,e.CNM,e.CBCM]},e.CLCM,e.CBCM]},e.CNM,{cN:"annotation",b:"@[A-Za-z]+"}]}});hljs.registerLanguage("rib",function(e){return{k:"ArchiveRecord AreaLightSource Atmosphere Attribute AttributeBegin AttributeEnd Basis Begin Blobby Bound Clipping ClippingPlane Color ColorSamples ConcatTransform Cone CoordinateSystem CoordSysTransform CropWindow Curves Cylinder DepthOfField Detail DetailRange Disk Displacement Display End ErrorHandler Exposure Exterior Format FrameAspectRatio FrameBegin FrameEnd GeneralPolygon GeometricApproximation Geometry Hider Hyperboloid Identity Illuminate Imager Interior LightSource MakeCubeFaceEnvironment MakeLatLongEnvironment MakeShadow MakeTexture Matte MotionBegin MotionEnd NuPatch ObjectBegin ObjectEnd ObjectInstance Opacity Option Orientation Paraboloid Patch PatchMesh Perspective PixelFilter PixelSamples PixelVariance Points PointsGeneralPolygons PointsPolygons Polygon Procedural Projection Quantize ReadArchive RelativeDetail ReverseOrientation Rotate Scale ScreenWindow ShadingInterpolation ShadingRate Shutter Sides Skew SolidBegin SolidEnd Sphere SubdivisionMesh Surface TextureCoordinates Torus Transform TransformBegin TransformEnd TransformPoints Translate TrimCurve WorldBegin WorldEnd",i:"</",c:[e.HCM,e.CNM,e.ASM,e.QSM]}});hljs.registerLanguage("python",function(e){var r={cN:"prompt",b:/^(>>>|\.\.\.) /},b={cN:"string",c:[e.BE],v:[{b:/(u|b)?r?'''/,e:/'''/,c:[r],r:10},{b:/(u|b)?r?"""/,e:/"""/,c:[r],r:10},{b:/(u|r|ur)'/,e:/'/,r:10},{b:/(u|r|ur)"/,e:/"/,r:10},{b:/(b|br)'/,e:/'/},{b:/(b|br)"/,e:/"/},e.ASM,e.QSM]},l={cN:"number",r:0,v:[{b:e.BNR+"[lLjJ]?"},{b:"\\b(0o[0-7]+)[lLjJ]?"},{b:e.CNR+"[lLjJ]?"}]},c={cN:"params",b:/\(/,e:/\)/,c:["self",r,l,b]};return{aliases:["py","gyp"],k:{keyword:"and elif is global as in if from raise for except finally print import pass return exec else break not with class assert yield try while continue del or def lambda nonlocal|10 None True False",built_in:"Ellipsis NotImplemented"},i:/(<\/|->|\?)/,c:[r,l,b,e.HCM,{v:[{cN:"function",bK:"def",r:10},{cN:"class",bK:"class"}],e:/:/,i:/[${=;\n,]/,c:[e.UTM,c]},{cN:"decorator",b:/@/,e:/$/},{b:/\b(print|exec)\(/}]}});hljs.registerLanguage("axapta",function(e){return{k:"false int abstract private char boolean static null if for true while long throw finally protected final return void enum else break new catch byte super case short default double public try this switch continue reverse firstfast firstonly forupdate nofetch sum avg minof maxof count order group by asc desc index hint like dispaly edit client server ttsbegin ttscommit str real date container anytype common div mod",c:[e.CLCM,e.CBCM,e.ASM,e.QSM,e.CNM,{cN:"preprocessor",b:"#",e:"$"},{cN:"class",bK:"class interface",e:"{",eE:!0,i:":",c:[{bK:"extends implements"},e.UTM]}]}});hljs.registerLanguage("nix",function(e){var t={keyword:"rec with let in inherit assert if else then",constant:"true false or and null",built_in:"import abort baseNameOf dirOf isNull builtins map removeAttrs throw toString derivation"},i={cN:"subst",b:/\$\{/,e:/}/,k:t},r={cN:"variable",b:/[a-zA-Z0-9-_]+(\s*=)/},n={cN:"string",b:"''",e:"''",c:[i]},s={cN:"string",b:'"',e:'"',c:[i]},a=[e.NM,e.HCM,e.CBCM,n,s,r];return i.c=a,{aliases:["nixos"],k:t,c:a}});hljs.registerLanguage("diff",function(e){return{aliases:["patch"],c:[{cN:"chunk",r:10,v:[{b:/^@@ +\-\d+,\d+ +\+\d+,\d+ +@@$/},{b:/^\*\*\* +\d+,\d+ +\*\*\*\*$/},{b:/^\-\-\- +\d+,\d+ +\-\-\-\-$/}]},{cN:"header",v:[{b:/Index: /,e:/$/},{b:/=====/,e:/=====$/},{b:/^\-\-\-/,e:/$/},{b:/^\*{3} /,e:/$/},{b:/^\+\+\+/,e:/$/},{b:/\*{5}/,e:/\*{5}$/}]},{cN:"addition",b:"^\\+",e:"$"},{cN:"deletion",b:"^\\-",e:"$"},{cN:"change",b:"^\\!",e:"$"}]}});hljs.registerLanguage("parser3",function(r){var e=r.C("{","}",{c:["self"]});return{sL:"xml",r:0,c:[r.C("^#","$"),r.C("\\^rem{","}",{r:10,c:[e]}),{cN:"preprocessor",b:"^@(?:BASE|USE|CLASS|OPTIONS)$",r:10},{cN:"title",b:"@[\\w\\-]+\\[[\\w^;\\-]*\\](?:\\[[\\w^;\\-]*\\])?(?:.*)$"},{cN:"variable",b:"\\$\\{?[\\w\\-\\.\\:]+\\}?"},{cN:"keyword",b:"\\^[\\w\\-\\.\\:]+"},{cN:"number",b:"\\^#[0-9a-fA-F]+"},r.CNM]}});hljs.registerLanguage("django",function(e){var t={cN:"filter",b:/\|[A-Za-z]+:?/,k:"truncatewords removetags linebreaksbr yesno get_digit timesince random striptags filesizeformat escape linebreaks length_is ljust rjust cut urlize fix_ampersands title floatformat capfirst pprint divisibleby add make_list unordered_list urlencode timeuntil urlizetrunc wordcount stringformat linenumbers slice date dictsort dictsortreversed default_if_none pluralize lower join center default truncatewords_html upper length phone2numeric wordwrap time addslashes slugify first escapejs force_escape iriencode last safe safeseq truncatechars localize unlocalize localtime utc timezone",c:[{cN:"argument",b:/"/,e:/"/},{cN:"argument",b:/'/,e:/'/}]};return{aliases:["jinja"],cI:!0,sL:"xml",subLanguageMode:"continuous",c:[e.C(/\{%\s*comment\s*%}/,/\{%\s*endcomment\s*%}/),e.C(/\{#/,/#}/),{cN:"template_tag",b:/\{%/,e:/%}/,k:"comment endcomment load templatetag ifchanged endifchanged if endif firstof for endfor in ifnotequal endifnotequal widthratio extends include spaceless endspaceless regroup by as ifequal endifequal ssi now with cycle url filter endfilter debug block endblock else autoescape endautoescape csrf_token empty elif endwith static trans blocktrans endblocktrans get_static_prefix get_media_prefix plural get_current_language language get_available_languages get_current_language_bidi get_language_info get_language_info_list localize endlocalize localtime endlocaltime timezone endtimezone get_current_timezone verbatim",c:[t]},{cN:"variable",b:/\{\{/,e:/}}/,c:[t]}]}});hljs.registerLanguage("rust",function(e){var t=e.inherit(e.CBCM);return t.c.push("self"),{aliases:["rs"],k:{keyword:"alignof as be box break const continue crate do else enum extern false fn for if impl in let loop match mod mut offsetof once priv proc pub pure ref return self sizeof static struct super trait true type typeof unsafe unsized use virtual while yield int i8 i16 i32 i64 uint u8 u32 u64 float f32 f64 str char bool",built_in:"assert! assert_eq! bitflags! bytes! cfg! col! concat! concat_idents! debug_assert! debug_assert_eq! env! panic! file! format! format_args! include_bin! include_str! line! local_data_key! module_path! option_env! print! println! select! stringify! try! unimplemented! unreachable! vec! write! writeln!"},l:e.IR+"!?",i:"</",c:[e.CLCM,t,e.inherit(e.QSM,{i:null}),{cN:"string",b:/r(#*)".*?"\1(?!#)/},{cN:"string",b:/'\\?(x\w{2}|u\w{4}|U\w{8}|.)'/},{b:/'[a-zA-Z_][a-zA-Z0-9_]*/},{cN:"number",b:/\b(0[xbo][A-Fa-f0-9_]+|\d[\d_]*(\.[0-9_]+)?([eE][+-]?[0-9_]+)?)([uif](8|16|32|64|size))?/,r:0},{cN:"function",bK:"fn",e:"(\\(|<)",eE:!0,c:[e.UTM]},{cN:"preprocessor",b:"#\\!?\\[",e:"\\]"},{bK:"type",e:"(=|<)",c:[e.UTM],i:"\\S"},{bK:"trait enum",e:"({|<)",c:[e.UTM],i:"\\S"},{b:e.IR+"::"},{b:"->"}]}});hljs.registerLanguage("vhdl",function(e){var t="\\d(_|\\d)*",r="[eE][-+]?"+t,n=t+"(\\."+t+")?("+r+")?",o="\\w+",i=t+"#"+o+"(\\."+o+")?#("+r+")?",a="\\b("+i+"|"+n+")";return{cI:!0,k:{keyword:"abs access after alias all and architecture array assert attribute begin block body buffer bus case component configuration constant context cover disconnect downto default else elsif end entity exit fairness file for force function generate generic group guarded if impure in inertial inout is label library linkage literal loop map mod nand new next nor not null of on open or others out package port postponed procedure process property protected pure range record register reject release rem report restrict restrict_guarantee return rol ror select sequence severity shared signal sla sll sra srl strong subtype then to transport type unaffected units until use variable vmode vprop vunit wait when while with xnor xor",typename:"boolean bit character severity_level integer time delay_length natural positive string bit_vector file_open_kind file_open_status std_ulogic std_ulogic_vector std_logic std_logic_vector unsigned signed boolean_vector integer_vector real_vector time_vector"},i:"{",c:[e.CBCM,e.C("--","$"),e.QSM,{cN:"number",b:a,r:0},{cN:"literal",b:"'(U|X|0|1|Z|W|L|H|-)'",c:[e.BE]},{cN:"attribute",b:"'[A-Za-z](_?[A-Za-z0-9])*",c:[e.BE]}]}});hljs.registerLanguage("ocaml",function(e){return{aliases:["ml"],k:{keyword:"and as assert asr begin class constraint do done downto else end exception external for fun function functor if in include inherit! inherit initializer land lazy let lor lsl lsr lxor match method!|10 method mod module mutable new object of open! open or private rec sig struct then to try type val! val virtual when while with parser value",built_in:"array bool bytes char exn|5 float int int32 int64 list lazy_t|5 nativeint|5 string unit in_channel out_channel ref",literal:"true false"},i:/\/\/|>>/,l:"[a-z_]\\w*!?",c:[{cN:"literal",b:"\\[(\\|\\|)?\\]|\\(\\)"},e.C("\\(\\*","\\*\\)",{c:["self"]}),{cN:"symbol",b:"'[A-Za-z_](?!')[\\w']*"},{cN:"tag",b:"`[A-Z][\\w']*"},{cN:"type",b:"\\b[A-Z][\\w']*",r:0},{b:"[a-z_]\\w*'[\\w']*"},e.inherit(e.ASM,{cN:"char",r:0}),e.inherit(e.QSM,{i:null}),{cN:"number",b:"\\b(0[xX][a-fA-F0-9_]+[Lln]?|0[oO][0-7_]+[Lln]?|0[bB][01_]+[Lln]?|[0-9][0-9_]*([Lln]|(\\.[0-9_]*)?([eE][-+]?[0-9_]+)?)?)",r:0},{b:/[-=]>/}]}});hljs.registerLanguage("cmake",function(e){return{aliases:["cmake.in"],cI:!0,k:{keyword:"add_custom_command add_custom_target add_definitions add_dependencies add_executable add_library add_subdirectory add_test aux_source_directory break build_command cmake_minimum_required cmake_policy configure_file create_test_sourcelist define_property else elseif enable_language enable_testing endforeach endfunction endif endmacro endwhile execute_process export find_file find_library find_package find_path find_program fltk_wrap_ui foreach function get_cmake_property get_directory_property get_filename_component get_property get_source_file_property get_target_property get_test_property if include include_directories include_external_msproject include_regular_expression install link_directories load_cache load_command macro mark_as_advanced message option output_required_files project qt_wrap_cpp qt_wrap_ui remove_definitions return separate_arguments set set_directory_properties set_property set_source_files_properties set_target_properties set_tests_properties site_name source_group string target_link_libraries try_compile try_run unset variable_watch while build_name exec_program export_library_dependencies install_files install_programs install_targets link_libraries make_directory remove subdir_depends subdirs use_mangled_mesa utility_source variable_requires write_file qt5_use_modules qt5_use_package qt5_wrap_cpp on off true false and or",operator:"equal less greater strless strgreater strequal matches"},c:[{cN:"envvar",b:"\\${",e:"}"},e.HCM,e.QSM,e.NM]}});hljs.registerLanguage("1c",function(c){var e="[a-zA-Zа-яА-Я][a-zA-Z0-9_а-яА-Я]*",r="возврат дата для если и или иначе иначеесли исключение конецесли конецпопытки конецпроцедуры конецфункции конеццикла константа не перейти перем перечисление по пока попытка прервать продолжить процедура строка тогда фс функция цикл число экспорт",t="ansitooem oemtoansi ввестивидсубконто ввестидату ввестизначение ввестиперечисление ввестипериод ввестиплансчетов ввестистроку ввестичисло вопрос восстановитьзначение врег выбранныйплансчетов вызватьисключение датагод датамесяц датачисло добавитьмесяц завершитьработусистемы заголовоксистемы записьжурналарегистрации запуститьприложение зафиксироватьтранзакцию значениевстроку значениевстрокувнутр значениевфайл значениеизстроки значениеизстрокивнутр значениеизфайла имякомпьютера имяпользователя каталогвременныхфайлов каталогиб каталогпользователя каталогпрограммы кодсимв командасистемы конгода конецпериодаби конецрассчитанногопериодаби конецстандартногоинтервала конквартала конмесяца коннедели лев лог лог10 макс максимальноеколичествосубконто мин монопольныйрежим названиеинтерфейса названиенабораправ назначитьвид назначитьсчет найти найтипомеченныенаудаление найтиссылки началопериодаби началостандартногоинтервала начатьтранзакцию начгода начквартала начмесяца начнедели номерднягода номерднянедели номернеделигода нрег обработкаожидания окр описаниеошибки основнойжурналрасчетов основнойплансчетов основнойязык открытьформу открытьформумодально отменитьтранзакцию очиститьокносообщений периодстр полноеимяпользователя получитьвремята получитьдатута получитьдокументта получитьзначенияотбора получитьпозициюта получитьпустоезначение получитьта прав праводоступа предупреждение префиксавтонумерации пустаястрока пустоезначение рабочаядаттьпустоезначение рабочаядата разделительстраниц разделительстрок разм разобратьпозициюдокумента рассчитатьрегистрына рассчитатьрегистрыпо сигнал симв символтабуляции создатьобъект сокрл сокрлп сокрп сообщить состояние сохранитьзначение сред статусвозврата стрдлина стрзаменить стрколичествострок стрполучитьстроку  стрчисловхождений сформироватьпозициюдокумента счетпокоду текущаядата текущеевремя типзначения типзначениястр удалитьобъекты установитьтана установитьтапо фиксшаблон формат цел шаблон",i={cN:"dquote",b:'""'},n={cN:"string",b:'"',e:'"|$',c:[i]},a={cN:"string",b:"\\|",e:'"|$',c:[i]};return{cI:!0,l:e,k:{keyword:r,built_in:t},c:[c.CLCM,c.NM,n,a,{cN:"function",b:"(процедура|функция)",e:"$",l:e,k:"процедура функция",c:[c.inherit(c.TM,{b:e}),{cN:"tail",eW:!0,c:[{cN:"params",b:"\\(",e:"\\)",l:e,k:"знач",c:[n,a]},{cN:"export",b:"экспорт",eW:!0,l:e,k:"экспорт",c:[c.CLCM]}]},c.CLCM]},{cN:"preprocessor",b:"#",e:"$"},{cN:"date",b:"'\\d{2}\\.\\d{2}\\.(\\d{2}|\\d{4})'"}]}});hljs.registerLanguage("tcl",function(e){return{aliases:["tk"],k:"after append apply array auto_execok auto_import auto_load auto_mkindex auto_mkindex_old auto_qualify auto_reset bgerror binary break catch cd chan clock close concat continue dde dict encoding eof error eval exec exit expr fblocked fconfigure fcopy file fileevent filename flush for foreach format gets glob global history http if incr info interp join lappend|10 lassign|10 lindex|10 linsert|10 list llength|10 load lrange|10 lrepeat|10 lreplace|10 lreverse|10 lsearch|10 lset|10 lsort|10 mathfunc mathop memory msgcat namespace open package parray pid pkg::create pkg_mkIndex platform platform::shell proc puts pwd read refchan regexp registry regsub|10 rename return safe scan seek set socket source split string subst switch tcl_endOfWord tcl_findLibrary tcl_startOfNextWord tcl_startOfPreviousWord tcl_wordBreakAfter tcl_wordBreakBefore tcltest tclvars tell time tm trace unknown unload unset update uplevel upvar variable vwait while",c:[e.C(";[ \\t]*#","$"),e.C("^[ \\t]*#","$"),{bK:"proc",e:"[\\{]",eE:!0,c:[{cN:"symbol",b:"[ \\t\\n\\r]+(::)?[a-zA-Z_]((::)?[a-zA-Z0-9_])*",e:"[ \\t\\n\\r]",eW:!0,eE:!0}]},{cN:"variable",eE:!0,v:[{b:"\\$(\\{)?(::)?[a-zA-Z_]((::)?[a-zA-Z0-9_])*\\(([a-zA-Z0-9_])*\\)",e:"[^a-zA-Z0-9_\\}\\$]"},{b:"\\$(\\{)?(::)?[a-zA-Z_]((::)?[a-zA-Z0-9_])*",e:"(\\))?[^a-zA-Z0-9_\\}\\$]"}]},{cN:"string",c:[e.BE],v:[e.inherit(e.ASM,{i:null}),e.inherit(e.QSM,{i:null})]},{cN:"number",v:[e.BNM,e.CNM]}]}});hljs.registerLanguage("groovy",function(e){return{k:{typename:"byte short char int long boolean float double void",literal:"true false null",keyword:"def as in assert trait super this abstract static volatile transient public private protected synchronized final class interface enum if else for while switch case break default continue throw throws try catch finally implements extends new import package return instanceof"},c:[e.CLCM,{cN:"javadoc",b:"/\\*\\*",e:"\\*//*",r:0,c:[{cN:"javadoctag",b:"(^|\\s)@[A-Za-z]+"}]},e.CBCM,{cN:"string",b:'"""',e:'"""'},{cN:"string",b:"'''",e:"'''"},{cN:"string",b:"\\$/",e:"/\\$",r:10},e.ASM,{cN:"regexp",b:/~?\/[^\/\n]+\//,c:[e.BE]},e.QSM,{cN:"shebang",b:"^#!/usr/bin/env",e:"$",i:"\n"},e.BNM,{cN:"class",bK:"class interface trait enum",e:"{",i:":",c:[{bK:"extends implements"},e.UTM]},e.CNM,{cN:"annotation",b:"@[A-Za-z]+"},{cN:"string",b:/[^\?]{0}[A-Za-z0-9_$]+ *:/},{b:/\?/,e:/\:/},{cN:"label",b:"^\\s*[A-Za-z0-9_$]+:",r:0}]}});hljs.registerLanguage("erlang-repl",function(r){return{k:{special_functions:"spawn spawn_link self",reserved:"after and andalso|10 band begin bnot bor bsl bsr bxor case catch cond div end fun if let not of or orelse|10 query receive rem try when xor"},c:[{cN:"prompt",b:"^[0-9]+> ",r:10},r.C("%","$"),{cN:"number",b:"\\b(\\d+#[a-fA-F0-9]+|\\d+(\\.\\d+)?([eE][-+]?\\d+)?)",r:0},r.ASM,r.QSM,{cN:"constant",b:"\\?(::)?([A-Z]\\w*(::)?)+"},{cN:"arrow",b:"->"},{cN:"ok",b:"ok"},{cN:"exclamation_mark",b:"!"},{cN:"function_or_atom",b:"(\\b[a-z'][a-zA-Z0-9_']*:[a-z'][a-zA-Z0-9_']*)|(\\b[a-z'][a-zA-Z0-9_']*)",r:0},{cN:"variable",b:"[A-Z][a-zA-Z0-9_']*",r:0}]}});hljs.registerLanguage("nginx",function(e){var r={cN:"variable",v:[{b:/\$\d+/},{b:/\$\{/,e:/}/},{b:"[\\$\\@]"+e.UIR}]},b={eW:!0,l:"[a-z/_]+",k:{built_in:"on off yes no true false none blocked debug info notice warn error crit select break last permanent redirect kqueue rtsig epoll poll /dev/poll"},r:0,i:"=>",c:[e.HCM,{cN:"string",c:[e.BE,r],v:[{b:/"/,e:/"/},{b:/'/,e:/'/}]},{cN:"url",b:"([a-z]+):/",e:"\\s",eW:!0,eE:!0,c:[r]},{cN:"regexp",c:[e.BE,r],v:[{b:"\\s\\^",e:"\\s|{|;",rE:!0},{b:"~\\*?\\s+",e:"\\s|{|;",rE:!0},{b:"\\*(\\.[a-z\\-]+)+"},{b:"([a-z\\-]+\\.)+\\*"}]},{cN:"number",b:"\\b\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}(:\\d{1,5})?\\b"},{cN:"number",b:"\\b\\d+[kKmMgGdshdwy]*\\b",r:0},r]};return{aliases:["nginxconf"],c:[e.HCM,{b:e.UIR+"\\s",e:";|{",rB:!0,c:[{cN:"title",b:e.UIR,starts:b}],r:0}],i:"[^\\s\\}]"}});hljs.registerLanguage("mathematica",function(e){return{aliases:["mma"],l:"(\\$|\\b)"+e.IR+"\\b",k:"AbelianGroup Abort AbortKernels AbortProtect Above Abs Absolute AbsoluteCorrelation AbsoluteCorrelationFunction AbsoluteCurrentValue AbsoluteDashing AbsoluteFileName AbsoluteOptions AbsolutePointSize AbsoluteThickness AbsoluteTime AbsoluteTiming AccountingForm Accumulate Accuracy AccuracyGoal ActionDelay ActionMenu ActionMenuBox ActionMenuBoxOptions Active ActiveItem ActiveStyle AcyclicGraphQ AddOnHelpPath AddTo AdjacencyGraph AdjacencyList AdjacencyMatrix AdjustmentBox AdjustmentBoxOptions AdjustTimeSeriesForecast AffineTransform After AiryAi AiryAiPrime AiryAiZero AiryBi AiryBiPrime AiryBiZero AlgebraicIntegerQ AlgebraicNumber AlgebraicNumberDenominator AlgebraicNumberNorm AlgebraicNumberPolynomial AlgebraicNumberTrace AlgebraicRules AlgebraicRulesData Algebraics AlgebraicUnitQ Alignment AlignmentMarker AlignmentPoint All AllowedDimensions AllowGroupClose AllowInlineCells AllowKernelInitialization AllowReverseGroupClose AllowScriptLevelChange AlphaChannel AlternatingGroup AlternativeHypothesis Alternatives AmbientLight Analytic AnchoredSearch And AndersonDarlingTest AngerJ AngleBracket AngularGauge Animate AnimationCycleOffset AnimationCycleRepetitions AnimationDirection AnimationDisplayTime AnimationRate AnimationRepetitions AnimationRunning Animator AnimatorBox AnimatorBoxOptions AnimatorElements Annotation Annuity AnnuityDue Antialiasing Antisymmetric Apart ApartSquareFree Appearance AppearanceElements AppellF1 Append AppendTo Apply ArcCos ArcCosh ArcCot ArcCoth ArcCsc ArcCsch ArcSec ArcSech ArcSin ArcSinDistribution ArcSinh ArcTan ArcTanh Arg ArgMax ArgMin ArgumentCountQ ARIMAProcess ArithmeticGeometricMean ARMAProcess ARProcess Array ArrayComponents ArrayDepth ArrayFlatten ArrayPad ArrayPlot ArrayQ ArrayReshape ArrayRules Arrays Arrow Arrow3DBox ArrowBox Arrowheads AspectRatio AspectRatioFixed Assert Assuming Assumptions AstronomicalData Asynchronous AsynchronousTaskObject AsynchronousTasks AtomQ Attributes AugmentedSymmetricPolynomial AutoAction AutoDelete AutoEvaluateEvents AutoGeneratedPackage AutoIndent AutoIndentSpacings AutoItalicWords AutoloadPath AutoMatch Automatic AutomaticImageSize AutoMultiplicationSymbol AutoNumberFormatting AutoOpenNotebooks AutoOpenPalettes AutorunSequencing AutoScaling AutoScroll AutoSpacing AutoStyleOptions AutoStyleWords Axes AxesEdge AxesLabel AxesOrigin AxesStyle Axis BabyMonsterGroupB Back Background BackgroundTasksSettings Backslash Backsubstitution Backward Band BandpassFilter BandstopFilter BarabasiAlbertGraphDistribution BarChart BarChart3D BarLegend BarlowProschanImportance BarnesG BarOrigin BarSpacing BartlettHannWindow BartlettWindow BaseForm Baseline BaselinePosition BaseStyle BatesDistribution BattleLemarieWavelet Because BeckmannDistribution Beep Before Begin BeginDialogPacket BeginFrontEndInteractionPacket BeginPackage BellB BellY Below BenfordDistribution BeniniDistribution BenktanderGibratDistribution BenktanderWeibullDistribution BernoulliB BernoulliDistribution BernoulliGraphDistribution BernoulliProcess BernsteinBasis BesselFilterModel BesselI BesselJ BesselJZero BesselK BesselY BesselYZero Beta BetaBinomialDistribution BetaDistribution BetaNegativeBinomialDistribution BetaPrimeDistribution BetaRegularized BetweennessCentrality BezierCurve BezierCurve3DBox BezierCurve3DBoxOptions BezierCurveBox BezierCurveBoxOptions BezierFunction BilateralFilter Binarize BinaryFormat BinaryImageQ BinaryRead BinaryReadList BinaryWrite BinCounts BinLists Binomial BinomialDistribution BinomialProcess BinormalDistribution BiorthogonalSplineWavelet BipartiteGraphQ BirnbaumImportance BirnbaumSaundersDistribution BitAnd BitClear BitGet BitLength BitNot BitOr BitSet BitShiftLeft BitShiftRight BitXor Black BlackmanHarrisWindow BlackmanNuttallWindow BlackmanWindow Blank BlankForm BlankNullSequence BlankSequence Blend Block BlockRandom BlomqvistBeta BlomqvistBetaTest Blue Blur BodePlot BohmanWindow Bold Bookmarks Boole BooleanConsecutiveFunction BooleanConvert BooleanCountingFunction BooleanFunction BooleanGraph BooleanMaxterms BooleanMinimize BooleanMinterms Booleans BooleanTable BooleanVariables BorderDimensions BorelTannerDistribution Bottom BottomHatTransform BoundaryStyle Bounds Box BoxBaselineShift BoxData BoxDimensions Boxed Boxes BoxForm BoxFormFormatTypes BoxFrame BoxID BoxMargins BoxMatrix BoxRatios BoxRotation BoxRotationPoint BoxStyle BoxWhiskerChart Bra BracketingBar BraKet BrayCurtisDistance BreadthFirstScan Break Brown BrownForsytheTest BrownianBridgeProcess BrowserCategory BSplineBasis BSplineCurve BSplineCurve3DBox BSplineCurveBox BSplineCurveBoxOptions BSplineFunction BSplineSurface BSplineSurface3DBox BubbleChart BubbleChart3D BubbleScale BubbleSizes BulletGauge BusinessDayQ ButterflyGraph ButterworthFilterModel Button ButtonBar ButtonBox ButtonBoxOptions ButtonCell ButtonContents ButtonData ButtonEvaluator ButtonExpandable ButtonFrame ButtonFunction ButtonMargins ButtonMinHeight ButtonNote ButtonNotebook ButtonSource ButtonStyle ButtonStyleMenuListing Byte ByteCount ByteOrdering C CachedValue CacheGraphics CalendarData CalendarType CallPacket CanberraDistance Cancel CancelButton CandlestickChart Cap CapForm CapitalDifferentialD CardinalBSplineBasis CarmichaelLambda Cases Cashflow Casoratian Catalan CatalanNumber Catch CauchyDistribution CauchyWindow CayleyGraph CDF CDFDeploy CDFInformation CDFWavelet Ceiling Cell CellAutoOverwrite CellBaseline CellBoundingBox CellBracketOptions CellChangeTimes CellContents CellContext CellDingbat CellDynamicExpression CellEditDuplicate CellElementsBoundingBox CellElementSpacings CellEpilog CellEvaluationDuplicate CellEvaluationFunction CellEventActions CellFrame CellFrameColor CellFrameLabelMargins CellFrameLabels CellFrameMargins CellGroup CellGroupData CellGrouping CellGroupingRules CellHorizontalScrolling CellID CellLabel CellLabelAutoDelete CellLabelMargins CellLabelPositioning CellMargins CellObject CellOpen CellPrint CellProlog Cells CellSize CellStyle CellTags CellularAutomaton CensoredDistribution Censoring Center CenterDot CentralMoment CentralMomentGeneratingFunction CForm ChampernowneNumber ChanVeseBinarize Character CharacterEncoding CharacterEncodingsPath CharacteristicFunction CharacteristicPolynomial CharacterRange Characters ChartBaseStyle ChartElementData ChartElementDataFunction ChartElementFunction ChartElements ChartLabels ChartLayout ChartLegends ChartStyle Chebyshev1FilterModel Chebyshev2FilterModel ChebyshevDistance ChebyshevT ChebyshevU Check CheckAbort CheckAll Checkbox CheckboxBar CheckboxBox CheckboxBoxOptions ChemicalData ChessboardDistance ChiDistribution ChineseRemainder ChiSquareDistribution ChoiceButtons ChoiceDialog CholeskyDecomposition Chop Circle CircleBox CircleDot CircleMinus CirclePlus CircleTimes CirculantGraph CityData Clear ClearAll ClearAttributes ClearSystemCache ClebschGordan ClickPane Clip ClipboardNotebook ClipFill ClippingStyle ClipPlanes ClipRange Clock ClockGauge ClockwiseContourIntegral Close Closed CloseKernels ClosenessCentrality Closing ClosingAutoSave ClosingEvent ClusteringComponents CMYKColor Coarse Coefficient CoefficientArrays CoefficientDomain CoefficientList CoefficientRules CoifletWavelet Collect Colon ColonForm ColorCombine ColorConvert ColorData ColorDataFunction ColorFunction ColorFunctionScaling Colorize ColorNegate ColorOutput ColorProfileData ColorQuantize ColorReplace ColorRules ColorSelectorSettings ColorSeparate ColorSetter ColorSetterBox ColorSetterBoxOptions ColorSlider ColorSpace Column ColumnAlignments ColumnBackgrounds ColumnForm ColumnLines ColumnsEqual ColumnSpacings ColumnWidths CommonDefaultFormatTypes Commonest CommonestFilter CommonUnits CommunityBoundaryStyle CommunityGraphPlot CommunityLabels CommunityRegionStyle CompatibleUnitQ CompilationOptions CompilationTarget Compile Compiled CompiledFunction Complement CompleteGraph CompleteGraphQ CompleteKaryTree CompletionsListPacket Complex Complexes ComplexExpand ComplexInfinity ComplexityFunction ComponentMeasurements ComponentwiseContextMenu Compose ComposeList ComposeSeries Composition CompoundExpression CompoundPoissonDistribution CompoundPoissonProcess CompoundRenewalProcess Compress CompressedData Condition ConditionalExpression Conditioned Cone ConeBox ConfidenceLevel ConfidenceRange ConfidenceTransform ConfigurationPath Congruent Conjugate ConjugateTranspose Conjunction Connect ConnectedComponents ConnectedGraphQ ConnesWindow ConoverTest ConsoleMessage ConsoleMessagePacket ConsolePrint Constant ConstantArray Constants ConstrainedMax ConstrainedMin ContentPadding ContentsBoundingBox ContentSelectable ContentSize Context ContextMenu Contexts ContextToFilename ContextToFileName Continuation Continue ContinuedFraction ContinuedFractionK ContinuousAction ContinuousMarkovProcess ContinuousTimeModelQ ContinuousWaveletData ContinuousWaveletTransform ContourDetect ContourGraphics ContourIntegral ContourLabels ContourLines ContourPlot ContourPlot3D Contours ContourShading ContourSmoothing ContourStyle ContraharmonicMean Control ControlActive ControlAlignment ControllabilityGramian ControllabilityMatrix ControllableDecomposition ControllableModelQ ControllerDuration ControllerInformation ControllerInformationData ControllerLinking ControllerManipulate ControllerMethod ControllerPath ControllerState ControlPlacement ControlsRendering ControlType Convergents ConversionOptions ConversionRules ConvertToBitmapPacket ConvertToPostScript ConvertToPostScriptPacket Convolve ConwayGroupCo1 ConwayGroupCo2 ConwayGroupCo3 CoordinateChartData CoordinatesToolOptions CoordinateTransform CoordinateTransformData CoprimeQ Coproduct CopulaDistribution Copyable CopyDirectory CopyFile CopyTag CopyToClipboard CornerFilter CornerNeighbors Correlation CorrelationDistance CorrelationFunction CorrelationTest Cos Cosh CoshIntegral CosineDistance CosineWindow CosIntegral Cot Coth Count CounterAssignments CounterBox CounterBoxOptions CounterClockwiseContourIntegral CounterEvaluator CounterFunction CounterIncrements CounterStyle CounterStyleMenuListing CountRoots CountryData Covariance CovarianceEstimatorFunction CovarianceFunction CoxianDistribution CoxIngersollRossProcess CoxModel CoxModelFit CramerVonMisesTest CreateArchive CreateDialog CreateDirectory CreateDocument CreateIntermediateDirectories CreatePalette CreatePalettePacket CreateScheduledTask CreateTemporary CreateWindow CriticalityFailureImportance CriticalitySuccessImportance CriticalSection Cross CrossingDetect CrossMatrix Csc Csch CubeRoot Cubics Cuboid CuboidBox Cumulant CumulantGeneratingFunction Cup CupCap Curl CurlyDoubleQuote CurlyQuote CurrentImage CurrentlySpeakingPacket CurrentValue CurvatureFlowFilter CurveClosed Cyan CycleGraph CycleIndexPolynomial Cycles CyclicGroup Cyclotomic Cylinder CylinderBox CylindricalDecomposition D DagumDistribution DamerauLevenshteinDistance DampingFactor Darker Dashed Dashing DataCompression DataDistribution DataRange DataReversed Date DateDelimiters DateDifference DateFunction DateList DateListLogPlot DateListPlot DatePattern DatePlus DateRange DateString DateTicksFormat DaubechiesWavelet DavisDistribution DawsonF DayCount DayCountConvention DayMatchQ DayName DayPlus DayRange DayRound DeBruijnGraph Debug DebugTag Decimal DeclareKnownSymbols DeclarePackage Decompose Decrement DedekindEta Default DefaultAxesStyle DefaultBaseStyle DefaultBoxStyle DefaultButton DefaultColor DefaultControlPlacement DefaultDuplicateCellStyle DefaultDuration DefaultElement DefaultFaceGridsStyle DefaultFieldHintStyle DefaultFont DefaultFontProperties DefaultFormatType DefaultFormatTypeForStyle DefaultFrameStyle DefaultFrameTicksStyle DefaultGridLinesStyle DefaultInlineFormatType DefaultInputFormatType DefaultLabelStyle DefaultMenuStyle DefaultNaturalLanguage DefaultNewCellStyle DefaultNewInlineCellStyle DefaultNotebook DefaultOptions DefaultOutputFormatType DefaultStyle DefaultStyleDefinitions DefaultTextFormatType DefaultTextInlineFormatType DefaultTicksStyle DefaultTooltipStyle DefaultValues Defer DefineExternal DefineInputStreamMethod DefineOutputStreamMethod Definition Degree DegreeCentrality DegreeGraphDistribution DegreeLexicographic DegreeReverseLexicographic Deinitialization Del Deletable Delete DeleteBorderComponents DeleteCases DeleteContents DeleteDirectory DeleteDuplicates DeleteFile DeleteSmallComponents DeleteWithContents DeletionWarning Delimiter DelimiterFlashTime DelimiterMatching Delimiters Denominator DensityGraphics DensityHistogram DensityPlot DependentVariables Deploy Deployed Depth DepthFirstScan Derivative DerivativeFilter DescriptorStateSpace DesignMatrix Det DGaussianWavelet DiacriticalPositioning Diagonal DiagonalMatrix Dialog DialogIndent DialogInput DialogLevel DialogNotebook DialogProlog DialogReturn DialogSymbols Diamond DiamondMatrix DiceDissimilarity DictionaryLookup DifferenceDelta DifferenceOrder DifferenceRoot DifferenceRootReduce Differences DifferentialD DifferentialRoot DifferentialRootReduce DifferentiatorFilter DigitBlock DigitBlockMinimum DigitCharacter DigitCount DigitQ DihedralGroup Dilation Dimensions DiracComb DiracDelta DirectedEdge DirectedEdges DirectedGraph DirectedGraphQ DirectedInfinity Direction Directive Directory DirectoryName DirectoryQ DirectoryStack DirichletCharacter DirichletConvolve DirichletDistribution DirichletL DirichletTransform DirichletWindow DisableConsolePrintPacket DiscreteChirpZTransform DiscreteConvolve DiscreteDelta DiscreteHadamardTransform DiscreteIndicator DiscreteLQEstimatorGains DiscreteLQRegulatorGains DiscreteLyapunovSolve DiscreteMarkovProcess DiscretePlot DiscretePlot3D DiscreteRatio DiscreteRiccatiSolve DiscreteShift DiscreteTimeModelQ DiscreteUniformDistribution DiscreteVariables DiscreteWaveletData DiscreteWaveletPacketTransform DiscreteWaveletTransform Discriminant Disjunction Disk DiskBox DiskMatrix Dispatch DispersionEstimatorFunction Display DisplayAllSteps DisplayEndPacket DisplayFlushImagePacket DisplayForm DisplayFunction DisplayPacket DisplayRules DisplaySetSizePacket DisplayString DisplayTemporary DisplayWith DisplayWithRef DisplayWithVariable DistanceFunction DistanceTransform Distribute Distributed DistributedContexts DistributeDefinitions DistributionChart DistributionDomain DistributionFitTest DistributionParameterAssumptions DistributionParameterQ Dithering Div Divergence Divide DivideBy Dividers Divisible Divisors DivisorSigma DivisorSum DMSList DMSString Do DockedCells DocumentNotebook DominantColors DOSTextFormat Dot DotDashed DotEqual Dotted DoubleBracketingBar DoubleContourIntegral DoubleDownArrow DoubleLeftArrow DoubleLeftRightArrow DoubleLeftTee DoubleLongLeftArrow DoubleLongLeftRightArrow DoubleLongRightArrow DoubleRightArrow DoubleRightTee DoubleUpArrow DoubleUpDownArrow DoubleVerticalBar DoublyInfinite Down DownArrow DownArrowBar DownArrowUpArrow DownLeftRightVector DownLeftTeeVector DownLeftVector DownLeftVectorBar DownRightTeeVector DownRightVector DownRightVectorBar Downsample DownTee DownTeeArrow DownValues DragAndDrop DrawEdges DrawFrontFaces DrawHighlighted Drop DSolve Dt DualLinearProgramming DualSystemsModel DumpGet DumpSave DuplicateFreeQ Dynamic DynamicBox DynamicBoxOptions DynamicEvaluationTimeout DynamicLocation DynamicModule DynamicModuleBox DynamicModuleBoxOptions DynamicModuleParent DynamicModuleValues DynamicName DynamicNamespace DynamicReference DynamicSetting DynamicUpdating DynamicWrapper DynamicWrapperBox DynamicWrapperBoxOptions E EccentricityCentrality EdgeAdd EdgeBetweennessCentrality EdgeCapacity EdgeCapForm EdgeColor EdgeConnectivity EdgeCost EdgeCount EdgeCoverQ EdgeDashing EdgeDelete EdgeDetect EdgeForm EdgeIndex EdgeJoinForm EdgeLabeling EdgeLabels EdgeLabelStyle EdgeList EdgeOpacity EdgeQ EdgeRenderingFunction EdgeRules EdgeShapeFunction EdgeStyle EdgeThickness EdgeWeight Editable EditButtonSettings EditCellTagsSettings EditDistance EffectiveInterest Eigensystem Eigenvalues EigenvectorCentrality Eigenvectors Element ElementData Eliminate EliminationOrder EllipticE EllipticExp EllipticExpPrime EllipticF EllipticFilterModel EllipticK EllipticLog EllipticNomeQ EllipticPi EllipticReducedHalfPeriods EllipticTheta EllipticThetaPrime EmitSound EmphasizeSyntaxErrors EmpiricalDistribution Empty EmptyGraphQ EnableConsolePrintPacket Enabled Encode End EndAdd EndDialogPacket EndFrontEndInteractionPacket EndOfFile EndOfLine EndOfString EndPackage EngineeringForm Enter EnterExpressionPacket EnterTextPacket Entropy EntropyFilter Environment Epilog Equal EqualColumns EqualRows EqualTilde EquatedTo Equilibrium EquirippleFilterKernel Equivalent Erf Erfc Erfi ErlangB ErlangC ErlangDistribution Erosion ErrorBox ErrorBoxOptions ErrorNorm ErrorPacket ErrorsDialogSettings EstimatedDistribution EstimatedProcess EstimatorGains EstimatorRegulator EuclideanDistance EulerE EulerGamma EulerianGraphQ EulerPhi Evaluatable Evaluate Evaluated EvaluatePacket EvaluationCell EvaluationCompletionAction EvaluationElements EvaluationMode EvaluationMonitor EvaluationNotebook EvaluationObject EvaluationOrder Evaluator EvaluatorNames EvenQ EventData EventEvaluator EventHandler EventHandlerTag EventLabels ExactBlackmanWindow ExactNumberQ ExactRootIsolation ExampleData Except ExcludedForms ExcludePods Exclusions ExclusionsStyle Exists Exit ExitDialog Exp Expand ExpandAll ExpandDenominator ExpandFileName ExpandNumerator Expectation ExpectationE ExpectedValue ExpGammaDistribution ExpIntegralE ExpIntegralEi Exponent ExponentFunction ExponentialDistribution ExponentialFamily ExponentialGeneratingFunction ExponentialMovingAverage ExponentialPowerDistribution ExponentPosition ExponentStep Export ExportAutoReplacements ExportPacket ExportString Expression ExpressionCell ExpressionPacket ExpToTrig ExtendedGCD Extension ExtentElementFunction ExtentMarkers ExtentSize ExternalCall ExternalDataCharacterEncoding Extract ExtractArchive ExtremeValueDistribution FaceForm FaceGrids FaceGridsStyle Factor FactorComplete Factorial Factorial2 FactorialMoment FactorialMomentGeneratingFunction FactorialPower FactorInteger FactorList FactorSquareFree FactorSquareFreeList FactorTerms FactorTermsList Fail FailureDistribution False FARIMAProcess FEDisableConsolePrintPacket FeedbackSector FeedbackSectorStyle FeedbackType FEEnableConsolePrintPacket Fibonacci FieldHint FieldHintStyle FieldMasked FieldSize File FileBaseName FileByteCount FileDate FileExistsQ FileExtension FileFormat FileHash FileInformation FileName FileNameDepth FileNameDialogSettings FileNameDrop FileNameJoin FileNames FileNameSetter FileNameSplit FileNameTake FilePrint FileType FilledCurve FilledCurveBox Filling FillingStyle FillingTransform FilterRules FinancialBond FinancialData FinancialDerivative FinancialIndicator Find FindArgMax FindArgMin FindClique FindClusters FindCurvePath FindDistributionParameters FindDivisions FindEdgeCover FindEdgeCut FindEulerianCycle FindFaces FindFile FindFit FindGeneratingFunction FindGeoLocation FindGeometricTransform FindGraphCommunities FindGraphIsomorphism FindGraphPartition FindHamiltonianCycle FindIndependentEdgeSet FindIndependentVertexSet FindInstance FindIntegerNullVector FindKClan FindKClique FindKClub FindKPlex FindLibrary FindLinearRecurrence FindList FindMaximum FindMaximumFlow FindMaxValue FindMinimum FindMinimumCostFlow FindMinimumCut FindMinValue FindPermutation FindPostmanTour FindProcessParameters FindRoot FindSequenceFunction FindSettings FindShortestPath FindShortestTour FindThreshold FindVertexCover FindVertexCut Fine FinishDynamic FiniteAbelianGroupCount FiniteGroupCount FiniteGroupData First FirstPassageTimeDistribution FischerGroupFi22 FischerGroupFi23 FischerGroupFi24Prime FisherHypergeometricDistribution FisherRatioTest FisherZDistribution Fit FitAll FittedModel FixedPoint FixedPointList FlashSelection Flat Flatten FlattenAt FlatTopWindow FlipView Floor FlushPrintOutputPacket Fold FoldList Font FontColor FontFamily FontForm FontName FontOpacity FontPostScriptName FontProperties FontReencoding FontSize FontSlant FontSubstitutions FontTracking FontVariations FontWeight For ForAll Format FormatRules FormatType FormatTypeAutoConvert FormatValues FormBox FormBoxOptions FortranForm Forward ForwardBackward Fourier FourierCoefficient FourierCosCoefficient FourierCosSeries FourierCosTransform FourierDCT FourierDCTFilter FourierDCTMatrix FourierDST FourierDSTMatrix FourierMatrix FourierParameters FourierSequenceTransform FourierSeries FourierSinCoefficient FourierSinSeries FourierSinTransform FourierTransform FourierTrigSeries FractionalBrownianMotionProcess FractionalPart FractionBox FractionBoxOptions FractionLine Frame FrameBox FrameBoxOptions Framed FrameInset FrameLabel Frameless FrameMargins FrameStyle FrameTicks FrameTicksStyle FRatioDistribution FrechetDistribution FreeQ FrequencySamplingFilterKernel FresnelC FresnelS Friday FrobeniusNumber FrobeniusSolve FromCharacterCode FromCoefficientRules FromContinuedFraction FromDate FromDigits FromDMS Front FrontEndDynamicExpression FrontEndEventActions FrontEndExecute FrontEndObject FrontEndResource FrontEndResourceString FrontEndStackSize FrontEndToken FrontEndTokenExecute FrontEndValueCache FrontEndVersion FrontFaceColor FrontFaceOpacity Full FullAxes FullDefinition FullForm FullGraphics FullOptions FullSimplify Function FunctionExpand FunctionInterpolation FunctionSpace FussellVeselyImportance GaborFilter GaborMatrix GaborWavelet GainMargins GainPhaseMargins Gamma GammaDistribution GammaRegularized GapPenalty Gather GatherBy GaugeFaceElementFunction GaugeFaceStyle GaugeFrameElementFunction GaugeFrameSize GaugeFrameStyle GaugeLabels GaugeMarkers GaugeStyle GaussianFilter GaussianIntegers GaussianMatrix GaussianWindow GCD GegenbauerC General GeneralizedLinearModelFit GenerateConditions GeneratedCell GeneratedParameters GeneratingFunction Generic GenericCylindricalDecomposition GenomeData GenomeLookup GeodesicClosing GeodesicDilation GeodesicErosion GeodesicOpening GeoDestination GeodesyData GeoDirection GeoDistance GeoGridPosition GeometricBrownianMotionProcess GeometricDistribution GeometricMean GeometricMeanFilter GeometricTransformation GeometricTransformation3DBox GeometricTransformation3DBoxOptions GeometricTransformationBox GeometricTransformationBoxOptions GeoPosition GeoPositionENU GeoPositionXYZ GeoProjectionData GestureHandler GestureHandlerTag Get GetBoundingBoxSizePacket GetContext GetEnvironment GetFileName GetFrontEndOptionsDataPacket GetLinebreakInformationPacket GetMenusPacket GetPageBreakInformationPacket Glaisher GlobalClusteringCoefficient GlobalPreferences GlobalSession Glow GoldenRatio GompertzMakehamDistribution GoodmanKruskalGamma GoodmanKruskalGammaTest Goto Grad Gradient GradientFilter GradientOrientationFilter Graph GraphAssortativity GraphCenter GraphComplement GraphData GraphDensity GraphDiameter GraphDifference GraphDisjointUnion GraphDistance GraphDistanceMatrix GraphElementData GraphEmbedding GraphHighlight GraphHighlightStyle GraphHub Graphics Graphics3D Graphics3DBox Graphics3DBoxOptions GraphicsArray GraphicsBaseline GraphicsBox GraphicsBoxOptions GraphicsColor GraphicsColumn GraphicsComplex GraphicsComplex3DBox GraphicsComplex3DBoxOptions GraphicsComplexBox GraphicsComplexBoxOptions GraphicsContents GraphicsData GraphicsGrid GraphicsGridBox GraphicsGroup GraphicsGroup3DBox GraphicsGroup3DBoxOptions GraphicsGroupBox GraphicsGroupBoxOptions GraphicsGrouping GraphicsHighlightColor GraphicsRow GraphicsSpacing GraphicsStyle GraphIntersection GraphLayout GraphLinkEfficiency GraphPeriphery GraphPlot GraphPlot3D GraphPower GraphPropertyDistribution GraphQ GraphRadius GraphReciprocity GraphRoot GraphStyle GraphUnion Gray GrayLevel GreatCircleDistance Greater GreaterEqual GreaterEqualLess GreaterFullEqual GreaterGreater GreaterLess GreaterSlantEqual GreaterTilde Green Grid GridBaseline GridBox GridBoxAlignment GridBoxBackground GridBoxDividers GridBoxFrame GridBoxItemSize GridBoxItemStyle GridBoxOptions GridBoxSpacings GridCreationSettings GridDefaultElement GridElementStyleOptions GridFrame GridFrameMargins GridGraph GridLines GridLinesStyle GroebnerBasis GroupActionBase GroupCentralizer GroupElementFromWord GroupElementPosition GroupElementQ GroupElements GroupElementToWord GroupGenerators GroupMultiplicationTable GroupOrbits GroupOrder GroupPageBreakWithin GroupSetwiseStabilizer GroupStabilizer GroupStabilizerChain Gudermannian GumbelDistribution HaarWavelet HadamardMatrix HalfNormalDistribution HamiltonianGraphQ HammingDistance HammingWindow HankelH1 HankelH2 HankelMatrix HannPoissonWindow HannWindow HaradaNortonGroupHN HararyGraph HarmonicMean HarmonicMeanFilter HarmonicNumber Hash HashTable Haversine HazardFunction Head HeadCompose Heads HeavisideLambda HeavisidePi HeavisideTheta HeldGroupHe HeldPart HelpBrowserLookup HelpBrowserNotebook HelpBrowserSettings HermiteDecomposition HermiteH HermitianMatrixQ HessenbergDecomposition Hessian HexadecimalCharacter Hexahedron HexahedronBox HexahedronBoxOptions HiddenSurface HighlightGraph HighlightImage HighpassFilter HigmanSimsGroupHS HilbertFilter HilbertMatrix Histogram Histogram3D HistogramDistribution HistogramList HistogramTransform HistogramTransformInterpolation HitMissTransform HITSCentrality HodgeDual HoeffdingD HoeffdingDTest Hold HoldAll HoldAllComplete HoldComplete HoldFirst HoldForm HoldPattern HoldRest HolidayCalendar HomeDirectory HomePage Horizontal HorizontalForm HorizontalGauge HorizontalScrollPosition HornerForm HotellingTSquareDistribution HoytDistribution HTMLSave Hue HumpDownHump HumpEqual HurwitzLerchPhi HurwitzZeta HyperbolicDistribution HypercubeGraph HyperexponentialDistribution Hyperfactorial Hypergeometric0F1 Hypergeometric0F1Regularized Hypergeometric1F1 Hypergeometric1F1Regularized Hypergeometric2F1 Hypergeometric2F1Regularized HypergeometricDistribution HypergeometricPFQ HypergeometricPFQRegularized HypergeometricU Hyperlink HyperlinkCreationSettings Hyphenation HyphenationOptions HypoexponentialDistribution HypothesisTestData I Identity IdentityMatrix If IgnoreCase Im Image Image3D Image3DSlices ImageAccumulate ImageAdd ImageAdjust ImageAlign ImageApply ImageAspectRatio ImageAssemble ImageCache ImageCacheValid ImageCapture ImageChannels ImageClip ImageColorSpace ImageCompose ImageConvolve ImageCooccurrence ImageCorners ImageCorrelate ImageCorrespondingPoints ImageCrop ImageData ImageDataPacket ImageDeconvolve ImageDemosaic ImageDifference ImageDimensions ImageDistance ImageEffect ImageFeatureTrack ImageFileApply ImageFileFilter ImageFileScan ImageFilter ImageForestingComponents ImageForwardTransformation ImageHistogram ImageKeypoints ImageLevels ImageLines ImageMargins ImageMarkers ImageMeasurements ImageMultiply ImageOffset ImagePad ImagePadding ImagePartition ImagePeriodogram ImagePerspectiveTransformation ImageQ ImageRangeCache ImageReflect ImageRegion ImageResize ImageResolution ImageRotate ImageRotated ImageScaled ImageScan ImageSize ImageSizeAction ImageSizeCache ImageSizeMultipliers ImageSizeRaw ImageSubtract ImageTake ImageTransformation ImageTrim ImageType ImageValue ImageValuePositions Implies Import ImportAutoReplacements ImportString ImprovementImportance In IncidenceGraph IncidenceList IncidenceMatrix IncludeConstantBasis IncludeFileExtension IncludePods IncludeSingularTerm Increment Indent IndentingNewlineSpacings IndentMaxFraction IndependenceTest IndependentEdgeSetQ IndependentUnit IndependentVertexSetQ Indeterminate IndexCreationOptions Indexed IndexGraph IndexTag Inequality InexactNumberQ InexactNumbers Infinity Infix Information Inherited InheritScope Initialization InitializationCell InitializationCellEvaluation InitializationCellWarning InlineCounterAssignments InlineCounterIncrements InlineRules Inner Inpaint Input InputAliases InputAssumptions InputAutoReplacements InputField InputFieldBox InputFieldBoxOptions InputForm InputGrouping InputNamePacket InputNotebook InputPacket InputSettings InputStream InputString InputStringPacket InputToBoxFormPacket Insert InsertionPointObject InsertResults Inset Inset3DBox Inset3DBoxOptions InsetBox InsetBoxOptions Install InstallService InString Integer IntegerDigits IntegerExponent IntegerLength IntegerPart IntegerPartitions IntegerQ Integers IntegerString Integral Integrate Interactive InteractiveTradingChart Interlaced Interleaving InternallyBalancedDecomposition InterpolatingFunction InterpolatingPolynomial Interpolation InterpolationOrder InterpolationPoints InterpolationPrecision Interpretation InterpretationBox InterpretationBoxOptions InterpretationFunction InterpretTemplate InterquartileRange Interrupt InterruptSettings Intersection Interval IntervalIntersection IntervalMemberQ IntervalUnion Inverse InverseBetaRegularized InverseCDF InverseChiSquareDistribution InverseContinuousWaveletTransform InverseDistanceTransform InverseEllipticNomeQ InverseErf InverseErfc InverseFourier InverseFourierCosTransform InverseFourierSequenceTransform InverseFourierSinTransform InverseFourierTransform InverseFunction InverseFunctions InverseGammaDistribution InverseGammaRegularized InverseGaussianDistribution InverseGudermannian InverseHaversine InverseJacobiCD InverseJacobiCN InverseJacobiCS InverseJacobiDC InverseJacobiDN InverseJacobiDS InverseJacobiNC InverseJacobiND InverseJacobiNS InverseJacobiSC InverseJacobiSD InverseJacobiSN InverseLaplaceTransform InversePermutation InverseRadon InverseSeries InverseSurvivalFunction InverseWaveletTransform InverseWeierstrassP InverseZTransform Invisible InvisibleApplication InvisibleTimes IrreduciblePolynomialQ IsolatingInterval IsomorphicGraphQ IsotopeData Italic Item ItemBox ItemBoxOptions ItemSize ItemStyle ItoProcess JaccardDissimilarity JacobiAmplitude Jacobian JacobiCD JacobiCN JacobiCS JacobiDC JacobiDN JacobiDS JacobiNC JacobiND JacobiNS JacobiP JacobiSC JacobiSD JacobiSN JacobiSymbol JacobiZeta JankoGroupJ1 JankoGroupJ2 JankoGroupJ3 JankoGroupJ4 JarqueBeraALMTest JohnsonDistribution Join Joined JoinedCurve JoinedCurveBox JoinForm JordanDecomposition JordanModelDecomposition K KagiChart KaiserBesselWindow KaiserWindow KalmanEstimator KalmanFilter KarhunenLoeveDecomposition KaryTree KatzCentrality KCoreComponents KDistribution KelvinBei KelvinBer KelvinKei KelvinKer KendallTau KendallTauTest KernelExecute KernelMixtureDistribution KernelObject Kernels Ket Khinchin KirchhoffGraph KirchhoffMatrix KleinInvariantJ KnightTourGraph KnotData KnownUnitQ KolmogorovSmirnovTest KroneckerDelta KroneckerModelDecomposition KroneckerProduct KroneckerSymbol KuiperTest KumaraswamyDistribution Kurtosis KuwaharaFilter Label Labeled LabeledSlider LabelingFunction LabelStyle LaguerreL LambdaComponents LambertW LanczosWindow LandauDistribution Language LanguageCategory LaplaceDistribution LaplaceTransform Laplacian LaplacianFilter LaplacianGaussianFilter Large Larger Last Latitude LatitudeLongitude LatticeData LatticeReduce Launch LaunchKernels LayeredGraphPlot LayerSizeFunction LayoutInformation LCM LeafCount LeapYearQ LeastSquares LeastSquaresFilterKernel Left LeftArrow LeftArrowBar LeftArrowRightArrow LeftDownTeeVector LeftDownVector LeftDownVectorBar LeftRightArrow LeftRightVector LeftTee LeftTeeArrow LeftTeeVector LeftTriangle LeftTriangleBar LeftTriangleEqual LeftUpDownVector LeftUpTeeVector LeftUpVector LeftUpVectorBar LeftVector LeftVectorBar LegendAppearance Legended LegendFunction LegendLabel LegendLayout LegendMargins LegendMarkers LegendMarkerSize LegendreP LegendreQ LegendreType Length LengthWhile LerchPhi Less LessEqual LessEqualGreater LessFullEqual LessGreater LessLess LessSlantEqual LessTilde LetterCharacter LetterQ Level LeveneTest LeviCivitaTensor LevyDistribution Lexicographic LibraryFunction LibraryFunctionError LibraryFunctionInformation LibraryFunctionLoad LibraryFunctionUnload LibraryLoad LibraryUnload LicenseID LiftingFilterData LiftingWaveletTransform LightBlue LightBrown LightCyan Lighter LightGray LightGreen Lighting LightingAngle LightMagenta LightOrange LightPink LightPurple LightRed LightSources LightYellow Likelihood Limit LimitsPositioning LimitsPositioningTokens LindleyDistribution Line Line3DBox LinearFilter LinearFractionalTransform LinearModelFit LinearOffsetFunction LinearProgramming LinearRecurrence LinearSolve LinearSolveFunction LineBox LineBreak LinebreakAdjustments LineBreakChart LineBreakWithin LineColor LineForm LineGraph LineIndent LineIndentMaxFraction LineIntegralConvolutionPlot LineIntegralConvolutionScale LineLegend LineOpacity LineSpacing LineWrapParts LinkActivate LinkClose LinkConnect LinkConnectedQ LinkCreate LinkError LinkFlush LinkFunction LinkHost LinkInterrupt LinkLaunch LinkMode LinkObject LinkOpen LinkOptions LinkPatterns LinkProtocol LinkRead LinkReadHeld LinkReadyQ Links LinkWrite LinkWriteHeld LiouvilleLambda List Listable ListAnimate ListContourPlot ListContourPlot3D ListConvolve ListCorrelate ListCurvePathPlot ListDeconvolve ListDensityPlot Listen ListFourierSequenceTransform ListInterpolation ListLineIntegralConvolutionPlot ListLinePlot ListLogLinearPlot ListLogLogPlot ListLogPlot ListPicker ListPickerBox ListPickerBoxBackground ListPickerBoxOptions ListPlay ListPlot ListPlot3D ListPointPlot3D ListPolarPlot ListQ ListStreamDensityPlot ListStreamPlot ListSurfacePlot3D ListVectorDensityPlot ListVectorPlot ListVectorPlot3D ListZTransform Literal LiteralSearch LocalClusteringCoefficient LocalizeVariables LocationEquivalenceTest LocationTest Locator LocatorAutoCreate LocatorBox LocatorBoxOptions LocatorCentering LocatorPane LocatorPaneBox LocatorPaneBoxOptions LocatorRegion Locked Log Log10 Log2 LogBarnesG LogGamma LogGammaDistribution LogicalExpand LogIntegral LogisticDistribution LogitModelFit LogLikelihood LogLinearPlot LogLogisticDistribution LogLogPlot LogMultinormalDistribution LogNormalDistribution LogPlot LogRankTest LogSeriesDistribution LongEqual Longest LongestAscendingSequence LongestCommonSequence LongestCommonSequencePositions LongestCommonSubsequence LongestCommonSubsequencePositions LongestMatch LongForm Longitude LongLeftArrow LongLeftRightArrow LongRightArrow Loopback LoopFreeGraphQ LowerCaseQ LowerLeftArrow LowerRightArrow LowerTriangularize LowpassFilter LQEstimatorGains LQGRegulator LQOutputRegulatorGains LQRegulatorGains LUBackSubstitution LucasL LuccioSamiComponents LUDecomposition LyapunovSolve LyonsGroupLy MachineID MachineName MachineNumberQ MachinePrecision MacintoshSystemPageSetup Magenta Magnification Magnify MainSolve MaintainDynamicCaches Majority MakeBoxes MakeExpression MakeRules MangoldtLambda ManhattanDistance Manipulate Manipulator MannWhitneyTest MantissaExponent Manual Map MapAll MapAt MapIndexed MAProcess MapThread MarcumQ MardiaCombinedTest MardiaKurtosisTest MardiaSkewnessTest MarginalDistribution MarkovProcessProperties Masking MatchingDissimilarity MatchLocalNameQ MatchLocalNames MatchQ Material MathematicaNotation MathieuC MathieuCharacteristicA MathieuCharacteristicB MathieuCharacteristicExponent MathieuCPrime MathieuGroupM11 MathieuGroupM12 MathieuGroupM22 MathieuGroupM23 MathieuGroupM24 MathieuS MathieuSPrime MathMLForm MathMLText Matrices MatrixExp MatrixForm MatrixFunction MatrixLog MatrixPlot MatrixPower MatrixQ MatrixRank Max MaxBend MaxDetect MaxExtraBandwidths MaxExtraConditions MaxFeatures MaxFilter Maximize MaxIterations MaxMemoryUsed MaxMixtureKernels MaxPlotPoints MaxPoints MaxRecursion MaxStableDistribution MaxStepFraction MaxSteps MaxStepSize MaxValue MaxwellDistribution McLaughlinGroupMcL Mean MeanClusteringCoefficient MeanDegreeConnectivity MeanDeviation MeanFilter MeanGraphDistance MeanNeighborDegree MeanShift MeanShiftFilter Median MedianDeviation MedianFilter Medium MeijerG MeixnerDistribution MemberQ MemoryConstrained MemoryInUse Menu MenuAppearance MenuCommandKey MenuEvaluator MenuItem MenuPacket MenuSortingValue MenuStyle MenuView MergeDifferences Mesh MeshFunctions MeshRange MeshShading MeshStyle Message MessageDialog MessageList MessageName MessageOptions MessagePacket Messages MessagesNotebook MetaCharacters MetaInformation Method MethodOptions MexicanHatWavelet MeyerWavelet Min MinDetect MinFilter MinimalPolynomial MinimalStateSpaceModel Minimize Minors MinRecursion MinSize MinStableDistribution Minus MinusPlus MinValue Missing MissingDataMethod MittagLefflerE MixedRadix MixedRadixQuantity MixtureDistribution Mod Modal Mode Modular ModularLambda Module Modulus MoebiusMu Moment Momentary MomentConvert MomentEvaluate MomentGeneratingFunction Monday Monitor MonomialList MonomialOrder MonsterGroupM MorletWavelet MorphologicalBinarize MorphologicalBranchPoints MorphologicalComponents MorphologicalEulerNumber MorphologicalGraph MorphologicalPerimeter MorphologicalTransform Most MouseAnnotation MouseAppearance MouseAppearanceTag MouseButtons Mouseover MousePointerNote MousePosition MovingAverage MovingMedian MoyalDistribution MultiedgeStyle MultilaunchWarning MultiLetterItalics MultiLetterStyle MultilineFunction Multinomial MultinomialDistribution MultinormalDistribution MultiplicativeOrder Multiplicity Multiselection MultivariateHypergeometricDistribution MultivariatePoissonDistribution MultivariateTDistribution N NakagamiDistribution NameQ Names NamespaceBox Nand NArgMax NArgMin NBernoulliB NCache NDSolve NDSolveValue Nearest NearestFunction NeedCurrentFrontEndPackagePacket NeedCurrentFrontEndSymbolsPacket NeedlemanWunschSimilarity Needs Negative NegativeBinomialDistribution NegativeMultinomialDistribution NeighborhoodGraph Nest NestedGreaterGreater NestedLessLess NestedScriptRules NestList NestWhile NestWhileList NevilleThetaC NevilleThetaD NevilleThetaN NevilleThetaS NewPrimitiveStyle NExpectation Next NextPrime NHoldAll NHoldFirst NHoldRest NicholsGridLines NicholsPlot NIntegrate NMaximize NMaxValue NMinimize NMinValue NominalVariables NonAssociative NoncentralBetaDistribution NoncentralChiSquareDistribution NoncentralFRatioDistribution NoncentralStudentTDistribution NonCommutativeMultiply NonConstants None NonlinearModelFit NonlocalMeansFilter NonNegative NonPositive Nor NorlundB Norm Normal NormalDistribution NormalGrouping Normalize NormalizedSquaredEuclideanDistance NormalsFunction NormFunction Not NotCongruent NotCupCap NotDoubleVerticalBar Notebook NotebookApply NotebookAutoSave NotebookClose NotebookConvertSettings NotebookCreate NotebookCreateReturnObject NotebookDefault NotebookDelete NotebookDirectory NotebookDynamicExpression NotebookEvaluate NotebookEventActions NotebookFileName NotebookFind NotebookFindReturnObject NotebookGet NotebookGetLayoutInformationPacket NotebookGetMisspellingsPacket NotebookInformation NotebookInterfaceObject NotebookLocate NotebookObject NotebookOpen NotebookOpenReturnObject NotebookPath NotebookPrint NotebookPut NotebookPutReturnObject NotebookRead NotebookResetGeneratedCells Notebooks NotebookSave NotebookSaveAs NotebookSelection NotebookSetupLayoutInformationPacket NotebooksMenu NotebookWrite NotElement NotEqualTilde NotExists NotGreater NotGreaterEqual NotGreaterFullEqual NotGreaterGreater NotGreaterLess NotGreaterSlantEqual NotGreaterTilde NotHumpDownHump NotHumpEqual NotLeftTriangle NotLeftTriangleBar NotLeftTriangleEqual NotLess NotLessEqual NotLessFullEqual NotLessGreater NotLessLess NotLessSlantEqual NotLessTilde NotNestedGreaterGreater NotNestedLessLess NotPrecedes NotPrecedesEqual NotPrecedesSlantEqual NotPrecedesTilde NotReverseElement NotRightTriangle NotRightTriangleBar NotRightTriangleEqual NotSquareSubset NotSquareSubsetEqual NotSquareSuperset NotSquareSupersetEqual NotSubset NotSubsetEqual NotSucceeds NotSucceedsEqual NotSucceedsSlantEqual NotSucceedsTilde NotSuperset NotSupersetEqual NotTilde NotTildeEqual NotTildeFullEqual NotTildeTilde NotVerticalBar NProbability NProduct NProductFactors NRoots NSolve NSum NSumTerms Null NullRecords NullSpace NullWords Number NumberFieldClassNumber NumberFieldDiscriminant NumberFieldFundamentalUnits NumberFieldIntegralBasis NumberFieldNormRepresentatives NumberFieldRegulator NumberFieldRootsOfUnity NumberFieldSignature NumberForm NumberFormat NumberMarks NumberMultiplier NumberPadding NumberPoint NumberQ NumberSeparator NumberSigns NumberString Numerator NumericFunction NumericQ NuttallWindow NValues NyquistGridLines NyquistPlot O ObservabilityGramian ObservabilityMatrix ObservableDecomposition ObservableModelQ OddQ Off Offset OLEData On ONanGroupON OneIdentity Opacity Open OpenAppend Opener OpenerBox OpenerBoxOptions OpenerView OpenFunctionInspectorPacket Opening OpenRead OpenSpecialOptions OpenTemporary OpenWrite Operate OperatingSystem OptimumFlowData Optional OptionInspectorSettings OptionQ Options OptionsPacket OptionsPattern OptionValue OptionValueBox OptionValueBoxOptions Or Orange Order OrderDistribution OrderedQ Ordering Orderless OrnsteinUhlenbeckProcess Orthogonalize Out Outer OutputAutoOverwrite OutputControllabilityMatrix OutputControllableModelQ OutputForm OutputFormData OutputGrouping OutputMathEditExpression OutputNamePacket OutputResponse OutputSizeLimit OutputStream Over OverBar OverDot Overflow OverHat Overlaps Overlay OverlayBox OverlayBoxOptions Overscript OverscriptBox OverscriptBoxOptions OverTilde OverVector OwenT OwnValues PackingMethod PaddedForm Padding PadeApproximant PadLeft PadRight PageBreakAbove PageBreakBelow PageBreakWithin PageFooterLines PageFooters PageHeaderLines PageHeaders PageHeight PageRankCentrality PageWidth PairedBarChart PairedHistogram PairedSmoothHistogram PairedTTest PairedZTest PaletteNotebook PalettePath Pane PaneBox PaneBoxOptions Panel PanelBox PanelBoxOptions Paneled PaneSelector PaneSelectorBox PaneSelectorBoxOptions PaperWidth ParabolicCylinderD ParagraphIndent ParagraphSpacing ParallelArray ParallelCombine ParallelDo ParallelEvaluate Parallelization Parallelize ParallelMap ParallelNeeds ParallelProduct ParallelSubmit ParallelSum ParallelTable ParallelTry Parameter ParameterEstimator ParameterMixtureDistribution ParameterVariables ParametricFunction ParametricNDSolve ParametricNDSolveValue ParametricPlot ParametricPlot3D ParentConnect ParentDirectory ParentForm Parenthesize ParentList ParetoDistribution Part PartialCorrelationFunction PartialD ParticleData Partition PartitionsP PartitionsQ ParzenWindow PascalDistribution PassEventsDown PassEventsUp Paste PasteBoxFormInlineCells PasteButton Path PathGraph PathGraphQ Pattern PatternSequence PatternTest PauliMatrix PaulWavelet Pause PausedTime PDF PearsonChiSquareTest PearsonCorrelationTest PearsonDistribution PerformanceGoal PeriodicInterpolation Periodogram PeriodogramArray PermutationCycles PermutationCyclesQ PermutationGroup PermutationLength PermutationList PermutationListQ PermutationMax PermutationMin PermutationOrder PermutationPower PermutationProduct PermutationReplace Permutations PermutationSupport Permute PeronaMalikFilter Perpendicular PERTDistribution PetersenGraph PhaseMargins Pi Pick PIDData PIDDerivativeFilter PIDFeedforward PIDTune Piecewise PiecewiseExpand PieChart PieChart3D PillaiTrace PillaiTraceTest Pink Pivoting PixelConstrained PixelValue PixelValuePositions Placed Placeholder PlaceholderReplace Plain PlanarGraphQ Play PlayRange Plot Plot3D Plot3Matrix PlotDivision PlotJoined PlotLabel PlotLayout PlotLegends PlotMarkers PlotPoints PlotRange PlotRangeClipping PlotRangePadding PlotRegion PlotStyle Plus PlusMinus Pochhammer PodStates PodWidth Point Point3DBox PointBox PointFigureChart PointForm PointLegend PointSize PoissonConsulDistribution PoissonDistribution PoissonProcess PoissonWindow PolarAxes PolarAxesOrigin PolarGridLines PolarPlot PolarTicks PoleZeroMarkers PolyaAeppliDistribution PolyGamma Polygon Polygon3DBox Polygon3DBoxOptions PolygonBox PolygonBoxOptions PolygonHoleScale PolygonIntersections PolygonScale PolyhedronData PolyLog PolynomialExtendedGCD PolynomialForm PolynomialGCD PolynomialLCM PolynomialMod PolynomialQ PolynomialQuotient PolynomialQuotientRemainder PolynomialReduce PolynomialRemainder Polynomials PopupMenu PopupMenuBox PopupMenuBoxOptions PopupView PopupWindow Position Positive PositiveDefiniteMatrixQ PossibleZeroQ Postfix PostScript Power PowerDistribution PowerExpand PowerMod PowerModList PowerSpectralDensity PowersRepresentations PowerSymmetricPolynomial Precedence PrecedenceForm Precedes PrecedesEqual PrecedesSlantEqual PrecedesTilde Precision PrecisionGoal PreDecrement PredictionRoot PreemptProtect PreferencesPath Prefix PreIncrement Prepend PrependTo PreserveImageOptions Previous PriceGraphDistribution PrimaryPlaceholder Prime PrimeNu PrimeOmega PrimePi PrimePowerQ PrimeQ Primes PrimeZetaP PrimitiveRoot PrincipalComponents PrincipalValue Print PrintAction PrintForm PrintingCopies PrintingOptions PrintingPageRange PrintingStartingPageNumber PrintingStyleEnvironment PrintPrecision PrintTemporary Prism PrismBox PrismBoxOptions PrivateCellOptions PrivateEvaluationOptions PrivateFontOptions PrivateFrontEndOptions PrivateNotebookOptions PrivatePaths Probability ProbabilityDistribution ProbabilityPlot ProbabilityPr ProbabilityScalePlot ProbitModelFit ProcessEstimator ProcessParameterAssumptions ProcessParameterQ ProcessStateDomain ProcessTimeDomain Product ProductDistribution ProductLog ProgressIndicator ProgressIndicatorBox ProgressIndicatorBoxOptions Projection Prolog PromptForm Properties Property PropertyList PropertyValue Proportion Proportional Protect Protected ProteinData Pruning PseudoInverse Purple Put PutAppend Pyramid PyramidBox PyramidBoxOptions QBinomial QFactorial QGamma QHypergeometricPFQ QPochhammer QPolyGamma QRDecomposition QuadraticIrrationalQ Quantile QuantilePlot Quantity QuantityForm QuantityMagnitude QuantityQ QuantityUnit Quartics QuartileDeviation Quartiles QuartileSkewness QueueingNetworkProcess QueueingProcess QueueProperties Quiet Quit Quotient QuotientRemainder RadialityCentrality RadicalBox RadicalBoxOptions RadioButton RadioButtonBar RadioButtonBox RadioButtonBoxOptions Radon RamanujanTau RamanujanTauL RamanujanTauTheta RamanujanTauZ Random RandomChoice RandomComplex RandomFunction RandomGraph RandomImage RandomInteger RandomPermutation RandomPrime RandomReal RandomSample RandomSeed RandomVariate RandomWalkProcess Range RangeFilter RangeSpecification RankedMax RankedMin Raster Raster3D Raster3DBox Raster3DBoxOptions RasterArray RasterBox RasterBoxOptions Rasterize RasterSize Rational RationalFunctions Rationalize Rationals Ratios Raw RawArray RawBoxes RawData RawMedium RayleighDistribution Re Read ReadList ReadProtected Real RealBlockDiagonalForm RealDigits RealExponent Reals Reap Record RecordLists RecordSeparators Rectangle RectangleBox RectangleBoxOptions RectangleChart RectangleChart3D RecurrenceFilter RecurrenceTable RecurringDigitsForm Red Reduce RefBox ReferenceLineStyle ReferenceMarkers ReferenceMarkerStyle Refine ReflectionMatrix ReflectionTransform Refresh RefreshRate RegionBinarize RegionFunction RegionPlot RegionPlot3D RegularExpression Regularization Reinstall Release ReleaseHold ReliabilityDistribution ReliefImage ReliefPlot Remove RemoveAlphaChannel RemoveAsynchronousTask Removed RemoveInputStreamMethod RemoveOutputStreamMethod RemoveProperty RemoveScheduledTask RenameDirectory RenameFile RenderAll RenderingOptions RenewalProcess RenkoChart Repeated RepeatedNull RepeatedString Replace ReplaceAll ReplaceHeldPart ReplaceImageValue ReplaceList ReplacePart ReplacePixelValue ReplaceRepeated Resampling Rescale RescalingTransform ResetDirectory ResetMenusPacket ResetScheduledTask Residue Resolve Rest Resultant ResumePacket Return ReturnExpressionPacket ReturnInputFormPacket ReturnPacket ReturnTextPacket Reverse ReverseBiorthogonalSplineWavelet ReverseElement ReverseEquilibrium ReverseGraph ReverseUpEquilibrium RevolutionAxis RevolutionPlot3D RGBColor RiccatiSolve RiceDistribution RidgeFilter RiemannR RiemannSiegelTheta RiemannSiegelZ Riffle Right RightArrow RightArrowBar RightArrowLeftArrow RightCosetRepresentative RightDownTeeVector RightDownVector RightDownVectorBar RightTee RightTeeArrow RightTeeVector RightTriangle RightTriangleBar RightTriangleEqual RightUpDownVector RightUpTeeVector RightUpVector RightUpVectorBar RightVector RightVectorBar RiskAchievementImportance RiskReductionImportance RogersTanimotoDissimilarity Root RootApproximant RootIntervals RootLocusPlot RootMeanSquare RootOfUnityQ RootReduce Roots RootSum Rotate RotateLabel RotateLeft RotateRight RotationAction RotationBox RotationBoxOptions RotationMatrix RotationTransform Round RoundImplies RoundingRadius Row RowAlignments RowBackgrounds RowBox RowHeights RowLines RowMinHeight RowReduce RowsEqual RowSpacings RSolve RudvalisGroupRu Rule RuleCondition RuleDelayed RuleForm RulerUnits Run RunScheduledTask RunThrough RuntimeAttributes RuntimeOptions RussellRaoDissimilarity SameQ SameTest SampleDepth SampledSoundFunction SampledSoundList SampleRate SamplingPeriod SARIMAProcess SARMAProcess SatisfiabilityCount SatisfiabilityInstances SatisfiableQ Saturday Save Saveable SaveAutoDelete SaveDefinitions SawtoothWave Scale Scaled ScaleDivisions ScaledMousePosition ScaleOrigin ScalePadding ScaleRanges ScaleRangeStyle ScalingFunctions ScalingMatrix ScalingTransform Scan ScheduledTaskActiveQ ScheduledTaskData ScheduledTaskObject ScheduledTasks SchurDecomposition ScientificForm ScreenRectangle ScreenStyleEnvironment ScriptBaselineShifts ScriptLevel ScriptMinSize ScriptRules ScriptSizeMultipliers Scrollbars ScrollingOptions ScrollPosition Sec Sech SechDistribution SectionGrouping SectorChart SectorChart3D SectorOrigin SectorSpacing SeedRandom Select Selectable SelectComponents SelectedCells SelectedNotebook Selection SelectionAnimate SelectionCell SelectionCellCreateCell SelectionCellDefaultStyle SelectionCellParentStyle SelectionCreateCell SelectionDebuggerTag SelectionDuplicateCell SelectionEvaluate SelectionEvaluateCreateCell SelectionMove SelectionPlaceholder SelectionSetStyle SelectWithContents SelfLoops SelfLoopStyle SemialgebraicComponentInstances SendMail Sequence SequenceAlignment SequenceForm SequenceHold SequenceLimit Series SeriesCoefficient SeriesData SessionTime Set SetAccuracy SetAlphaChannel SetAttributes Setbacks SetBoxFormNamesPacket SetDelayed SetDirectory SetEnvironment SetEvaluationNotebook SetFileDate SetFileLoadingContext SetNotebookStatusLine SetOptions SetOptionsPacket SetPrecision SetProperty SetSelectedNotebook SetSharedFunction SetSharedVariable SetSpeechParametersPacket SetStreamPosition SetSystemOptions Setter SetterBar SetterBox SetterBoxOptions Setting SetValue Shading Shallow ShannonWavelet ShapiroWilkTest Share Sharpen ShearingMatrix ShearingTransform ShenCastanMatrix Short ShortDownArrow Shortest ShortestMatch ShortestPathFunction ShortLeftArrow ShortRightArrow ShortUpArrow Show ShowAutoStyles ShowCellBracket ShowCellLabel ShowCellTags ShowClosedCellArea ShowContents ShowControls ShowCursorTracker ShowGroupOpenCloseIcon ShowGroupOpener ShowInvisibleCharacters ShowPageBreaks ShowPredictiveInterface ShowSelection ShowShortBoxForm ShowSpecialCharacters ShowStringCharacters ShowSyntaxStyles ShrinkingDelay ShrinkWrapBoundingBox SiegelTheta SiegelTukeyTest Sign Signature SignedRankTest SignificanceLevel SignPadding SignTest SimilarityRules SimpleGraph SimpleGraphQ Simplify Sin Sinc SinghMaddalaDistribution SingleEvaluation SingleLetterItalics SingleLetterStyle SingularValueDecomposition SingularValueList SingularValuePlot SingularValues Sinh SinhIntegral SinIntegral SixJSymbol Skeleton SkeletonTransform SkellamDistribution Skewness SkewNormalDistribution Skip SliceDistribution Slider Slider2D Slider2DBox Slider2DBoxOptions SliderBox SliderBoxOptions SlideView Slot SlotSequence Small SmallCircle Smaller SmithDelayCompensator SmithWatermanSimilarity SmoothDensityHistogram SmoothHistogram SmoothHistogram3D SmoothKernelDistribution SocialMediaData Socket SokalSneathDissimilarity Solve SolveAlways SolveDelayed Sort SortBy Sound SoundAndGraphics SoundNote SoundVolume Sow Space SpaceForm Spacer Spacings Span SpanAdjustments SpanCharacterRounding SpanFromAbove SpanFromBoth SpanFromLeft SpanLineThickness SpanMaxSize SpanMinSize SpanningCharacters SpanSymmetric SparseArray SpatialGraphDistribution Speak SpeakTextPacket SpearmanRankTest SpearmanRho Spectrogram SpectrogramArray Specularity SpellingCorrection SpellingDictionaries SpellingDictionariesPath SpellingOptions SpellingSuggestionsPacket Sphere SphereBox SphericalBesselJ SphericalBesselY SphericalHankelH1 SphericalHankelH2 SphericalHarmonicY SphericalPlot3D SphericalRegion SpheroidalEigenvalue SpheroidalJoiningFactor SpheroidalPS SpheroidalPSPrime SpheroidalQS SpheroidalQSPrime SpheroidalRadialFactor SpheroidalS1 SpheroidalS1Prime SpheroidalS2 SpheroidalS2Prime Splice SplicedDistribution SplineClosed SplineDegree SplineKnots SplineWeights Split SplitBy SpokenString Sqrt SqrtBox SqrtBoxOptions Square SquaredEuclideanDistance SquareFreeQ SquareIntersection SquaresR SquareSubset SquareSubsetEqual SquareSuperset SquareSupersetEqual SquareUnion SquareWave StabilityMargins StabilityMarginsStyle StableDistribution Stack StackBegin StackComplete StackInhibit StandardDeviation StandardDeviationFilter StandardForm Standardize StandbyDistribution Star StarGraph StartAsynchronousTask StartingStepSize StartOfLine StartOfString StartScheduledTask StartupSound StateDimensions StateFeedbackGains StateOutputEstimator StateResponse StateSpaceModel StateSpaceRealization StateSpaceTransform StationaryDistribution StationaryWaveletPacketTransform StationaryWaveletTransform StatusArea StatusCentrality StepMonitor StieltjesGamma StirlingS1 StirlingS2 StopAsynchronousTask StopScheduledTask StrataVariables StratonovichProcess StreamColorFunction StreamColorFunctionScaling StreamDensityPlot StreamPlot StreamPoints StreamPosition Streams StreamScale StreamStyle String StringBreak StringByteCount StringCases StringCount StringDrop StringExpression StringForm StringFormat StringFreeQ StringInsert StringJoin StringLength StringMatchQ StringPosition StringQ StringReplace StringReplaceList StringReplacePart StringReverse StringRotateLeft StringRotateRight StringSkeleton StringSplit StringTake StringToStream StringTrim StripBoxes StripOnInput StripWrapperBoxes StrokeForm StructuralImportance StructuredArray StructuredSelection StruveH StruveL Stub StudentTDistribution Style StyleBox StyleBoxAutoDelete StyleBoxOptions StyleData StyleDefinitions StyleForm StyleKeyMapping StyleMenuListing StyleNameDialogSettings StyleNames StylePrint StyleSheetPath Subfactorial Subgraph SubMinus SubPlus SubresultantPolynomialRemainders SubresultantPolynomials Subresultants Subscript SubscriptBox SubscriptBoxOptions Subscripted Subset SubsetEqual Subsets SubStar Subsuperscript SubsuperscriptBox SubsuperscriptBoxOptions Subtract SubtractFrom SubValues Succeeds SucceedsEqual SucceedsSlantEqual SucceedsTilde SuchThat Sum SumConvergence Sunday SuperDagger SuperMinus SuperPlus Superscript SuperscriptBox SuperscriptBoxOptions Superset SupersetEqual SuperStar Surd SurdForm SurfaceColor SurfaceGraphics SurvivalDistribution SurvivalFunction SurvivalModel SurvivalModelFit SuspendPacket SuzukiDistribution SuzukiGroupSuz SwatchLegend Switch Symbol SymbolName SymletWavelet Symmetric SymmetricGroup SymmetricMatrixQ SymmetricPolynomial SymmetricReduction Symmetrize SymmetrizedArray SymmetrizedArrayRules SymmetrizedDependentComponents SymmetrizedIndependentComponents SymmetrizedReplacePart SynchronousInitialization SynchronousUpdating Syntax SyntaxForm SyntaxInformation SyntaxLength SyntaxPacket SyntaxQ SystemDialogInput SystemException SystemHelpPath SystemInformation SystemInformationData SystemOpen SystemOptions SystemsModelDelay SystemsModelDelayApproximate SystemsModelDelete SystemsModelDimensions SystemsModelExtract SystemsModelFeedbackConnect SystemsModelLabels SystemsModelOrder SystemsModelParallelConnect SystemsModelSeriesConnect SystemsModelStateFeedbackConnect SystemStub Tab TabFilling Table TableAlignments TableDepth TableDirections TableForm TableHeadings TableSpacing TableView TableViewBox TabSpacings TabView TabViewBox TabViewBoxOptions TagBox TagBoxNote TagBoxOptions TaggingRules TagSet TagSetDelayed TagStyle TagUnset Take TakeWhile Tally Tan Tanh TargetFunctions TargetUnits TautologyQ TelegraphProcess TemplateBox TemplateBoxOptions TemplateSlotSequence TemporalData Temporary TemporaryVariable TensorContract TensorDimensions TensorExpand TensorProduct TensorQ TensorRank TensorReduce TensorSymmetry TensorTranspose TensorWedge Tetrahedron TetrahedronBox TetrahedronBoxOptions TeXForm TeXSave Text Text3DBox Text3DBoxOptions TextAlignment TextBand TextBoundingBox TextBox TextCell TextClipboardType TextData TextForm TextJustification TextLine TextPacket TextParagraph TextRecognize TextRendering TextStyle Texture TextureCoordinateFunction TextureCoordinateScaling Therefore ThermometerGauge Thick Thickness Thin Thinning ThisLink ThompsonGroupTh Thread ThreeJSymbol Threshold Through Throw Thumbnail Thursday Ticks TicksStyle Tilde TildeEqual TildeFullEqual TildeTilde TimeConstrained TimeConstraint Times TimesBy TimeSeriesForecast TimeSeriesInvertibility TimeUsed TimeValue TimeZone Timing Tiny TitleGrouping TitsGroupT ToBoxes ToCharacterCode ToColor ToContinuousTimeModel ToDate ToDiscreteTimeModel ToeplitzMatrix ToExpression ToFileName Together Toggle ToggleFalse Toggler TogglerBar TogglerBox TogglerBoxOptions ToHeldExpression ToInvertibleTimeSeries TokenWords Tolerance ToLowerCase ToNumberField TooBig Tooltip TooltipBox TooltipBoxOptions TooltipDelay TooltipStyle Top TopHatTransform TopologicalSort ToRadicals ToRules ToString Total TotalHeight TotalVariationFilter TotalWidth TouchscreenAutoZoom TouchscreenControlPlacement ToUpperCase Tr Trace TraceAbove TraceAction TraceBackward TraceDepth TraceDialog TraceForward TraceInternal TraceLevel TraceOff TraceOn TraceOriginal TracePrint TraceScan TrackedSymbols TradingChart TraditionalForm TraditionalFunctionNotation TraditionalNotation TraditionalOrder TransferFunctionCancel TransferFunctionExpand TransferFunctionFactor TransferFunctionModel TransferFunctionPoles TransferFunctionTransform TransferFunctionZeros TransformationFunction TransformationFunctions TransformationMatrix TransformedDistribution TransformedField Translate TranslationTransform TransparentColor Transpose TreeForm TreeGraph TreeGraphQ TreePlot TrendStyle TriangleWave TriangularDistribution Trig TrigExpand TrigFactor TrigFactorList Trigger TrigReduce TrigToExp TrimmedMean True TrueQ TruncatedDistribution TsallisQExponentialDistribution TsallisQGaussianDistribution TTest Tube TubeBezierCurveBox TubeBezierCurveBoxOptions TubeBox TubeBSplineCurveBox TubeBSplineCurveBoxOptions Tuesday TukeyLambdaDistribution TukeyWindow Tuples TuranGraph TuringMachine Transparent UnateQ Uncompress Undefined UnderBar Underflow Underlined Underoverscript UnderoverscriptBox UnderoverscriptBoxOptions Underscript UnderscriptBox UnderscriptBoxOptions UndirectedEdge UndirectedGraph UndirectedGraphQ UndocumentedTestFEParserPacket UndocumentedTestGetSelectionPacket Unequal Unevaluated UniformDistribution UniformGraphDistribution UniformSumDistribution Uninstall Union UnionPlus Unique UnitBox UnitConvert UnitDimensions Unitize UnitRootTest UnitSimplify UnitStep UnitTriangle UnitVector Unprotect UnsameQ UnsavedVariables Unset UnsetShared UntrackedVariables Up UpArrow UpArrowBar UpArrowDownArrow Update UpdateDynamicObjects UpdateDynamicObjectsSynchronous UpdateInterval UpDownArrow UpEquilibrium UpperCaseQ UpperLeftArrow UpperRightArrow UpperTriangularize Upsample UpSet UpSetDelayed UpTee UpTeeArrow UpValues URL URLFetch URLFetchAsynchronous URLSave URLSaveAsynchronous UseGraphicsRange Using UsingFrontEnd V2Get ValidationLength Value ValueBox ValueBoxOptions ValueForm ValueQ ValuesData Variables Variance VarianceEquivalenceTest VarianceEstimatorFunction VarianceGammaDistribution VarianceTest VectorAngle VectorColorFunction VectorColorFunctionScaling VectorDensityPlot VectorGlyphData VectorPlot VectorPlot3D VectorPoints VectorQ Vectors VectorScale VectorStyle Vee Verbatim Verbose VerboseConvertToPostScriptPacket VerifyConvergence VerifySolutions VerifyTestAssumptions Version VersionNumber VertexAdd VertexCapacity VertexColors VertexComponent VertexConnectivity VertexCoordinateRules VertexCoordinates VertexCorrelationSimilarity VertexCosineSimilarity VertexCount VertexCoverQ VertexDataCoordinates VertexDegree VertexDelete VertexDiceSimilarity VertexEccentricity VertexInComponent VertexInDegree VertexIndex VertexJaccardSimilarity VertexLabeling VertexLabels VertexLabelStyle VertexList VertexNormals VertexOutComponent VertexOutDegree VertexQ VertexRenderingFunction VertexReplace VertexShape VertexShapeFunction VertexSize VertexStyle VertexTextureCoordinates VertexWeight Vertical VerticalBar VerticalForm VerticalGauge VerticalSeparator VerticalSlider VerticalTilde ViewAngle ViewCenter ViewMatrix ViewPoint ViewPointSelectorSettings ViewPort ViewRange ViewVector ViewVertical VirtualGroupData Visible VisibleCell VoigtDistribution VonMisesDistribution WaitAll WaitAsynchronousTask WaitNext WaitUntil WakebyDistribution WalleniusHypergeometricDistribution WaringYuleDistribution WatershedComponents WatsonUSquareTest WattsStrogatzGraphDistribution WaveletBestBasis WaveletFilterCoefficients WaveletImagePlot WaveletListPlot WaveletMapIndexed WaveletMatrixPlot WaveletPhi WaveletPsi WaveletScale WaveletScalogram WaveletThreshold WeaklyConnectedComponents WeaklyConnectedGraphQ WeakStationarity WeatherData WeberE Wedge Wednesday WeibullDistribution WeierstrassHalfPeriods WeierstrassInvariants WeierstrassP WeierstrassPPrime WeierstrassSigma WeierstrassZeta WeightedAdjacencyGraph WeightedAdjacencyMatrix WeightedData WeightedGraphQ Weights WelchWindow WheelGraph WhenEvent Which While White Whitespace WhitespaceCharacter WhittakerM WhittakerW WienerFilter WienerProcess WignerD WignerSemicircleDistribution WilksW WilksWTest WindowClickSelect WindowElements WindowFloating WindowFrame WindowFrameElements WindowMargins WindowMovable WindowOpacity WindowSelected WindowSize WindowStatusArea WindowTitle WindowToolbars WindowWidth With WolframAlpha WolframAlphaDate WolframAlphaQuantity WolframAlphaResult Word WordBoundary WordCharacter WordData WordSearch WordSeparators WorkingPrecision Write WriteString Wronskian XMLElement XMLObject Xnor Xor Yellow YuleDissimilarity ZernikeR ZeroSymmetric ZeroTest ZeroWidthTimes Zeta ZetaZero ZipfDistribution ZTest ZTransform $Aborted $ActivationGroupID $ActivationKey $ActivationUserRegistered $AddOnsDirectory $AssertFunction $Assumptions $AsynchronousTask $BaseDirectory $BatchInput $BatchOutput $BoxForms $ByteOrdering $Canceled $CharacterEncoding $CharacterEncodings $CommandLine $CompilationTarget $ConditionHold $ConfiguredKernels $Context $ContextPath $ControlActiveSetting $CreationDate $CurrentLink $DateStringFormat $DefaultFont $DefaultFrontEnd $DefaultImagingDevice $DefaultPath $Display $DisplayFunction $DistributedContexts $DynamicEvaluation $Echo $Epilog $ExportFormats $Failed $FinancialDataSource $FormatType $FrontEnd $FrontEndSession $GeoLocation $HistoryLength $HomeDirectory $HTTPCookies $IgnoreEOF $ImagingDevices $ImportFormats $InitialDirectory $Input $InputFileName $InputStreamMethods $Inspector $InstallationDate $InstallationDirectory $InterfaceEnvironment $IterationLimit $KernelCount $KernelID $Language $LaunchDirectory $LibraryPath $LicenseExpirationDate $LicenseID $LicenseProcesses $LicenseServer $LicenseSubprocesses $LicenseType $Line $Linked $LinkSupported $LoadedFiles $MachineAddresses $MachineDomain $MachineDomains $MachineEpsilon $MachineID $MachineName $MachinePrecision $MachineType $MaxExtraPrecision $MaxLicenseProcesses $MaxLicenseSubprocesses $MaxMachineNumber $MaxNumber $MaxPiecewiseCases $MaxPrecision $MaxRootDegree $MessageGroups $MessageList $MessagePrePrint $Messages $MinMachineNumber $MinNumber $MinorReleaseNumber $MinPrecision $ModuleNumber $NetworkLicense $NewMessage $NewSymbol $Notebooks $NumberMarks $Off $OperatingSystem $Output $OutputForms $OutputSizeLimit $OutputStreamMethods $Packages $ParentLink $ParentProcessID $PasswordFile $PatchLevelID $Path $PathnameSeparator $PerformanceGoal $PipeSupported $Post $Pre $PreferencesDirectory $PrePrint $PreRead $PrintForms $PrintLiteral $ProcessID $ProcessorCount $ProcessorType $ProductInformation $ProgramName $RandomState $RecursionLimit $ReleaseNumber $RootDirectory $ScheduledTask $ScriptCommandLine $SessionID $SetParentLink $SharedFunctions $SharedVariables $SoundDisplay $SoundDisplayFunction $SuppressInputFormHeads $SynchronousEvaluation $SyntaxHandler $System $SystemCharacterEncoding $SystemID $SystemWordLength $TemporaryDirectory $TemporaryPrefix $TextStyle $TimedOut $TimeUnit $TimeZone $TopDirectory $TraceOff $TraceOn $TracePattern $TracePostAction $TracePreAction $Urgent $UserAddOnsDirectory $UserBaseDirectory $UserDocumentsDirectory $UserName $Version $VersionNumber",
+c:[{cN:"comment",b:/\(\*/,e:/\*\)/},e.ASM,e.QSM,e.CNM,{cN:"list",b:/\{/,e:/\}/,i:/:/}]}});hljs.registerLanguage("fsharp",function(e){var t={b:"<",e:">",c:[e.inherit(e.TM,{b:/'[a-zA-Z0-9_]+/})]};return{aliases:["fs"],k:"yield! return! let! do!abstract and as assert base begin class default delegate do done downcast downto elif else end exception extern false finally for fun function global if in inherit inline interface internal lazy let match member module mutable namespace new null of open or override private public rec return sig static struct then to true try type upcast use val void when while with yield",c:[{cN:"string",b:'@"',e:'"',c:[{b:'""'}]},{cN:"string",b:'"""',e:'"""'},e.C("\\(\\*","\\*\\)"),{cN:"class",bK:"type",e:"\\(|=|$",eE:!0,c:[e.UTM,t]},{cN:"annotation",b:"\\[<",e:">\\]",r:10},{cN:"attribute",b:"\\B('[A-Za-z])\\b",c:[e.BE]},e.CLCM,e.inherit(e.QSM,{i:null}),e.CNM]}});hljs.registerLanguage("verilog",function(e){return{aliases:["v"],cI:!0,k:{keyword:"always and assign begin buf bufif0 bufif1 case casex casez cmos deassign default defparam disable edge else end endcase endfunction endmodule endprimitive endspecify endtable endtask event for force forever fork function if ifnone initial inout input join macromodule module nand negedge nmos nor not notif0 notif1 or output parameter pmos posedge primitive pulldown pullup rcmos release repeat rnmos rpmos rtran rtranif0 rtranif1 specify specparam table task timescale tran tranif0 tranif1 wait while xnor xor",typename:"highz0 highz1 integer large medium pull0 pull1 real realtime reg scalared signed small strong0 strong1 supply0 supply0 supply1 supply1 time tri tri0 tri1 triand trior trireg vectored wand weak0 weak1 wire wor"},c:[e.CBCM,e.CLCM,e.QSM,{cN:"number",b:"\\b(\\d+'(b|h|o|d|B|H|O|D))?[0-9xzXZ]+",c:[e.BE],r:0},{cN:"typename",b:"\\.\\w+",r:0},{cN:"value",b:"#\\((?!parameter).+\\)"},{cN:"keyword",b:"\\+|-|\\*|/|%|<|>|=|#|`|\\!|&|\\||@|:|\\^|~|\\{|\\}",r:0}]}});hljs.registerLanguage("dos",function(e){var r=e.C(/@?rem\b/,/$/,{r:10}),t={cN:"label",b:"^\\s*[A-Za-z._?][A-Za-z0-9_$#@~.?]*(:|\\s+label)",r:0};return{aliases:["bat","cmd"],cI:!0,k:{flow:"if else goto for in do call exit not exist errorlevel defined",operator:"equ neq lss leq gtr geq",keyword:"shift cd dir echo setlocal endlocal set pause copy",stream:"prn nul lpt3 lpt2 lpt1 con com4 com3 com2 com1 aux",winutils:"ping net ipconfig taskkill xcopy ren del",built_in:"append assoc at attrib break cacls cd chcp chdir chkdsk chkntfs cls cmd color comp compact convert date dir diskcomp diskcopy doskey erase fs find findstr format ftype graftabl help keyb label md mkdir mode more move path pause print popd pushd promt rd recover rem rename replace restore rmdir shiftsort start subst time title tree type ver verify vol"},c:[{cN:"envvar",b:/%%[^ ]|%[^ ]+?%|![^ ]+?!/},{cN:"function",b:t.b,e:"goto:eof",c:[e.inherit(e.TM,{b:"([_a-zA-Z]\\w*\\.)*([_a-zA-Z]\\w*:)?[_a-zA-Z]\\w*"}),r]},{cN:"number",b:"\\b\\d+",r:0},r]}});hljs.registerLanguage("gherkin",function(e){return{aliases:["feature"],k:"Feature Background Ability Business Need Scenario Scenarios Scenario Outline Scenario Template Examples Given And Then But When",c:[{cN:"keyword",b:"\\*"},e.C("@[^@\r\n	 ]+","$"),{cN:"string",b:"\\|",e:"\\$"},{cN:"variable",b:"<",e:">"},e.HCM,{cN:"string",b:'"""',e:'"""'},e.QSM]}});hljs.registerLanguage("xml",function(t){var e="[A-Za-z0-9\\._:-]+",s={b:/<\?(php)?(?!\w)/,e:/\?>/,sL:"php",subLanguageMode:"continuous"},c={eW:!0,i:/</,r:0,c:[s,{cN:"attribute",b:e,r:0},{b:"=",r:0,c:[{cN:"value",c:[s],v:[{b:/"/,e:/"/},{b:/'/,e:/'/},{b:/[^\s\/>]+/}]}]}]};return{aliases:["html","xhtml","rss","atom","xsl","plist"],cI:!0,c:[{cN:"doctype",b:"<!DOCTYPE",e:">",r:10,c:[{b:"\\[",e:"\\]"}]},t.C("<!--","-->",{r:10}),{cN:"cdata",b:"<\\!\\[CDATA\\[",e:"\\]\\]>",r:10},{cN:"tag",b:"<style(?=\\s|>|$)",e:">",k:{title:"style"},c:[c],starts:{e:"</style>",rE:!0,sL:"css"}},{cN:"tag",b:"<script(?=\\s|>|$)",e:">",k:{title:"script"},c:[c],starts:{e:"</script>",rE:!0,sL:""}},s,{cN:"pi",b:/<\?\w+/,e:/\?>/,r:10},{cN:"tag",b:"</?",e:"/?>",c:[{cN:"title",b:/[^ \/><\n\t]+/,r:0},c]}]}});hljs.registerLanguage("autohotkey",function(e){var r={cN:"escape",b:"`[\\s\\S]"},c=e.C(";","$",{r:0}),n=[{cN:"built_in",b:"A_[a-zA-Z0-9]+"},{cN:"built_in",bK:"ComSpec Clipboard ClipboardAll ErrorLevel"}];return{cI:!0,k:{keyword:"Break Continue Else Gosub If Loop Return While",literal:"A true false NOT AND OR"},c:n.concat([r,e.inherit(e.QSM,{c:[r]}),c,{cN:"number",b:e.NR,r:0},{cN:"var_expand",b:"%",e:"%",i:"\\n",c:[r]},{cN:"label",c:[r],v:[{b:'^[^\\n";]+::(?!=)'},{b:'^[^\\n";]+:(?!=)',r:0}]},{b:",\\s*,",r:10}])}});hljs.registerLanguage("r",function(e){var r="([a-zA-Z]|\\.[a-zA-Z.])[a-zA-Z0-9._]*";return{c:[e.HCM,{b:r,l:r,k:{keyword:"function if in break next repeat else for return switch while try tryCatch stop warning require library attach detach source setMethod setGeneric setGroupGeneric setClass ...",literal:"NULL NA TRUE FALSE T F Inf NaN NA_integer_|10 NA_real_|10 NA_character_|10 NA_complex_|10"},r:0},{cN:"number",b:"0[xX][0-9a-fA-F]+[Li]?\\b",r:0},{cN:"number",b:"\\d+(?:[eE][+\\-]?\\d*)?L\\b",r:0},{cN:"number",b:"\\d+\\.(?!\\d)(?:i\\b)?",r:0},{cN:"number",b:"\\d+(?:\\.\\d*)?(?:[eE][+\\-]?\\d*)?i?\\b",r:0},{cN:"number",b:"\\.\\d+(?:[eE][+\\-]?\\d*)?i?\\b",r:0},{b:"`",e:"`",r:0},{cN:"string",c:[e.BE],v:[{b:'"',e:'"'},{b:"'",e:"'"}]}]}});hljs.registerLanguage("cs",function(e){var r="abstract as base bool break byte case catch char checked const continue decimal dynamic default delegate do double else enum event explicit extern false finally fixed float for foreach goto if implicit in int interface internal is lock long null when object operator out override params private protected public readonly ref sbyte sealed short sizeof stackalloc static string struct switch this true try typeof uint ulong unchecked unsafe ushort using virtual volatile void while async protected public private internal ascending descending from get group into join let orderby partial select set value var where yield",t=e.IR+"(<"+e.IR+">)?";return{aliases:["csharp"],k:r,i:/::/,c:[e.C("///","$",{rB:!0,c:[{cN:"xmlDocTag",v:[{b:"///",r:0},{b:"<!--|-->"},{b:"</?",e:">"}]}]}),e.CLCM,e.CBCM,{cN:"preprocessor",b:"#",e:"$",k:"if else elif endif define undef warning error line region endregion pragma checksum"},{cN:"string",b:'@"',e:'"',c:[{b:'""'}]},e.ASM,e.QSM,e.CNM,{bK:"class namespace interface",e:/[{;=]/,i:/[^\s:]/,c:[e.TM,e.CLCM,e.CBCM]},{bK:"new return throw await",r:0},{cN:"function",b:"("+t+"\\s+)+"+e.IR+"\\s*\\(",rB:!0,e:/[{;=]/,eE:!0,k:r,c:[{b:e.IR+"\\s*\\(",rB:!0,c:[e.TM],r:0},{cN:"params",b:/\(/,e:/\)/,k:r,r:0,c:[e.ASM,e.QSM,e.CNM,e.CBCM]},e.CLCM,e.CBCM]}]}});hljs.registerLanguage("nsis",function(e){var t={cN:"symbol",b:"\\$(ADMINTOOLS|APPDATA|CDBURN_AREA|CMDLINE|COMMONFILES32|COMMONFILES64|COMMONFILES|COOKIES|DESKTOP|DOCUMENTS|EXEDIR|EXEFILE|EXEPATH|FAVORITES|FONTS|HISTORY|HWNDPARENT|INSTDIR|INTERNET_CACHE|LANGUAGE|LOCALAPPDATA|MUSIC|NETHOOD|OUTDIR|PICTURES|PLUGINSDIR|PRINTHOOD|PROFILE|PROGRAMFILES32|PROGRAMFILES64|PROGRAMFILES|QUICKLAUNCH|RECENT|RESOURCES_LOCALIZED|RESOURCES|SENDTO|SMPROGRAMS|SMSTARTUP|STARTMENU|SYSDIR|TEMP|TEMPLATES|VIDEOS|WINDIR)"},n={cN:"constant",b:"\\$+{[a-zA-Z0-9_]+}"},i={cN:"variable",b:"\\$+[a-zA-Z0-9_]+",i:"\\(\\){}"},r={cN:"constant",b:"\\$+\\([a-zA-Z0-9_]+\\)"},o={cN:"params",b:"(ARCHIVE|FILE_ATTRIBUTE_ARCHIVE|FILE_ATTRIBUTE_NORMAL|FILE_ATTRIBUTE_OFFLINE|FILE_ATTRIBUTE_READONLY|FILE_ATTRIBUTE_SYSTEM|FILE_ATTRIBUTE_TEMPORARY|HKCR|HKCU|HKDD|HKEY_CLASSES_ROOT|HKEY_CURRENT_CONFIG|HKEY_CURRENT_USER|HKEY_DYN_DATA|HKEY_LOCAL_MACHINE|HKEY_PERFORMANCE_DATA|HKEY_USERS|HKLM|HKPD|HKU|IDABORT|IDCANCEL|IDIGNORE|IDNO|IDOK|IDRETRY|IDYES|MB_ABORTRETRYIGNORE|MB_DEFBUTTON1|MB_DEFBUTTON2|MB_DEFBUTTON3|MB_DEFBUTTON4|MB_ICONEXCLAMATION|MB_ICONINFORMATION|MB_ICONQUESTION|MB_ICONSTOP|MB_OK|MB_OKCANCEL|MB_RETRYCANCEL|MB_RIGHT|MB_RTLREADING|MB_SETFOREGROUND|MB_TOPMOST|MB_USERICON|MB_YESNO|NORMAL|OFFLINE|READONLY|SHCTX|SHELL_CONTEXT|SYSTEM|TEMPORARY)"},l={cN:"constant",b:"\\!(addincludedir|addplugindir|appendfile|cd|define|delfile|echo|else|endif|error|execute|finalize|getdllversionsystem|ifdef|ifmacrodef|ifmacrondef|ifndef|if|include|insertmacro|macroend|macro|makensis|packhdr|searchparse|searchreplace|tempfile|undef|verbose|warning)"};return{cI:!1,k:{keyword:"Abort AddBrandingImage AddSize AllowRootDirInstall AllowSkipFiles AutoCloseWindow BGFont BGGradient BrandingText BringToFront Call CallInstDLL Caption ChangeUI CheckBitmap ClearErrors CompletedText ComponentText CopyFiles CRCCheck CreateDirectory CreateFont CreateShortCut Delete DeleteINISec DeleteINIStr DeleteRegKey DeleteRegValue DetailPrint DetailsButtonText DirText DirVar DirVerify EnableWindow EnumRegKey EnumRegValue Exch Exec ExecShell ExecWait ExpandEnvStrings File FileBufSize FileClose FileErrorText FileOpen FileRead FileReadByte FileReadUTF16LE FileReadWord FileSeek FileWrite FileWriteByte FileWriteUTF16LE FileWriteWord FindClose FindFirst FindNext FindWindow FlushINI FunctionEnd GetCurInstType GetCurrentAddress GetDlgItem GetDLLVersion GetDLLVersionLocal GetErrorLevel GetFileTime GetFileTimeLocal GetFullPathName GetFunctionAddress GetInstDirError GetLabelAddress GetTempFileName Goto HideWindow Icon IfAbort IfErrors IfFileExists IfRebootFlag IfSilent InitPluginsDir InstallButtonText InstallColors InstallDir InstallDirRegKey InstProgressFlags InstType InstTypeGetText InstTypeSetText IntCmp IntCmpU IntFmt IntOp IsWindow LangString LicenseBkColor LicenseData LicenseForceSelection LicenseLangString LicenseText LoadLanguageFile LockWindow LogSet LogText ManifestDPIAware ManifestSupportedOS MessageBox MiscButtonText Name Nop OutFile Page PageCallbacks PageExEnd Pop Push Quit ReadEnvStr ReadINIStr ReadRegDWORD ReadRegStr Reboot RegDLL Rename RequestExecutionLevel ReserveFile Return RMDir SearchPath SectionEnd SectionGetFlags SectionGetInstTypes SectionGetSize SectionGetText SectionGroupEnd SectionIn SectionSetFlags SectionSetInstTypes SectionSetSize SectionSetText SendMessage SetAutoClose SetBrandingImage SetCompress SetCompressor SetCompressorDictSize SetCtlColors SetCurInstType SetDatablockOptimize SetDateSave SetDetailsPrint SetDetailsView SetErrorLevel SetErrors SetFileAttributes SetFont SetOutPath SetOverwrite SetPluginUnload SetRebootFlag SetRegView SetShellVarContext SetSilent ShowInstDetails ShowUninstDetails ShowWindow SilentInstall SilentUnInstall Sleep SpaceTexts StrCmp StrCmpS StrCpy StrLen SubCaption SubSectionEnd Unicode UninstallButtonText UninstallCaption UninstallIcon UninstallSubCaption UninstallText UninstPage UnRegDLL Var VIAddVersionKey VIFileVersion VIProductVersion WindowIcon WriteINIStr WriteRegBin WriteRegDWORD WriteRegExpandStr WriteRegStr WriteUninstaller XPStyle",literal:"admin all auto both colored current false force hide highest lastused leave listonly none normal notset off on open print show silent silentlog smooth textonly true user "},c:[e.HCM,e.CBCM,{cN:"string",b:'"',e:'"',i:"\\n",c:[{cN:"symbol",b:"\\$(\\\\(n|r|t)|\\$)"},t,n,i,r]},e.C(";","$",{r:0}),{cN:"function",bK:"Function PageEx Section SectionGroup SubSection",e:"$"},l,n,i,r,o,e.NM,{cN:"literal",b:e.IR+"::"+e.IR}]}});hljs.registerLanguage("less",function(e){var r="[\\w-]+",t="("+r+"|@{"+r+"})",a=[],c=[],n=function(e){return{cN:"string",b:"~?"+e+".*?"+e}},i=function(e,r,t){return{cN:e,b:r,r:t}},s=function(r,t,a){return e.inherit({cN:r,b:t+"\\(",e:"\\(",rB:!0,eE:!0,r:0},a)},b={b:"\\(",e:"\\)",c:c,r:0};c.push(e.CLCM,e.CBCM,n("'"),n('"'),e.CSSNM,i("hexcolor","#[0-9A-Fa-f]+\\b"),s("function","(url|data-uri)",{starts:{cN:"string",e:"[\\)\\n]",eE:!0}}),s("function",r),b,i("variable","@@?"+r,10),i("variable","@{"+r+"}"),i("built_in","~?`[^`]*?`"),{cN:"attribute",b:r+"\\s*:",e:":",rB:!0,eE:!0});var o=c.concat({b:"{",e:"}",c:a}),u={bK:"when",eW:!0,c:[{bK:"and not"}].concat(c)},C={cN:"attribute",b:t,e:":",eE:!0,c:[e.CLCM,e.CBCM],i:/\S/,starts:{e:"[;}]",rE:!0,c:c,i:"[<=$]"}},l={cN:"at_rule",b:"@(import|media|charset|font-face|(-[a-z]+-)?keyframes|supports|document|namespace|page|viewport|host)\\b",starts:{e:"[;{}]",rE:!0,c:c,r:0}},d={cN:"variable",v:[{b:"@"+r+"\\s*:",r:15},{b:"@"+r}],starts:{e:"[;}]",rE:!0,c:o}},p={v:[{b:"[\\.#:&\\[]",e:"[;{}]"},{b:t+"[^;]*{",e:"{"}],rB:!0,rE:!0,i:"[<='$\"]",c:[e.CLCM,e.CBCM,u,i("keyword","all\\b"),i("variable","@{"+r+"}"),i("tag",t+"%?",0),i("id","#"+t),i("class","\\."+t,0),i("keyword","&",0),s("pseudo",":not"),s("keyword",":extend"),i("pseudo","::?"+t),{cN:"attr_selector",b:"\\[",e:"\\]"},{b:"\\(",e:"\\)",c:o},{b:"!important"}]};return a.push(e.CLCM,e.CBCM,l,d,p,C),{cI:!0,i:"[=>'/<($\"]",c:a}});hljs.registerLanguage("pf",function(t){var o={cN:"variable",b:/\$[\w\d#@][\w\d_]*/},e={cN:"variable",b:/</,e:/>/};return{aliases:["pf.conf"],l:/[a-z0-9_<>-]+/,k:{built_in:"block match pass load anchor|5 antispoof|10 set table",keyword:"in out log quick on rdomain inet inet6 proto from port os to routeallow-opts divert-packet divert-reply divert-to flags group icmp-typeicmp6-type label once probability recieved-on rtable prio queuetos tag tagged user keep fragment for os dropaf-to|10 binat-to|10 nat-to|10 rdr-to|10 bitmask least-stats random round-robinsource-hash static-portdup-to reply-to route-toparent bandwidth default min max qlimitblock-policy debug fingerprints hostid limit loginterface optimizationreassemble ruleset-optimization basic none profile skip state-defaultsstate-policy timeoutconst counters persistno modulate synproxy state|5 floating if-bound no-sync pflow|10 sloppysource-track global rule max-src-nodes max-src-states max-src-connmax-src-conn-rate overload flushscrub|5 max-mss min-ttl no-df|10 random-id",literal:"all any no-route self urpf-failed egress|5 unknown"},c:[t.HCM,t.NM,t.QSM,o,e]}});hljs.registerLanguage("lasso",function(e){var r="[a-zA-Z_][a-zA-Z0-9_.]*",a="<\\?(lasso(script)?|=)",t="\\]|\\?>",s={literal:"true false none minimal full all void and or not bw nbw ew new cn ncn lt lte gt gte eq neq rx nrx ft",built_in:"array date decimal duration integer map pair string tag xml null boolean bytes keyword list locale queue set stack staticarray local var variable global data self inherited",keyword:"error_code error_msg error_pop error_push error_reset cache database_names database_schemanames database_tablenames define_tag define_type email_batch encode_set html_comment handle handle_error header if inline iterate ljax_target link link_currentaction link_currentgroup link_currentrecord link_detail link_firstgroup link_firstrecord link_lastgroup link_lastrecord link_nextgroup link_nextrecord link_prevgroup link_prevrecord log loop namespace_using output_none portal private protect records referer referrer repeating resultset rows search_args search_arguments select sort_args sort_arguments thread_atomic value_list while abort case else if_empty if_false if_null if_true loop_abort loop_continue loop_count params params_up return return_value run_children soap_definetag soap_lastrequest soap_lastresponse tag_name ascending average by define descending do equals frozen group handle_failure import in into join let match max min on order parent protected provide public require returnhome skip split_thread sum take thread to trait type where with yield yieldhome"},n=e.C("<!--","-->",{r:0}),o={cN:"preprocessor",b:"\\[noprocess\\]",starts:{cN:"markup",e:"\\[/noprocess\\]",rE:!0,c:[n]}},i={cN:"preprocessor",b:"\\[/noprocess|"+a},l={cN:"variable",b:"'"+r+"'"},c=[e.CLCM,{cN:"javadoc",b:"/\\*\\*!",e:"\\*/",c:[e.PWM]},e.CBCM,e.inherit(e.CNM,{b:e.CNR+"|(-?infinity|nan)\\b"}),e.inherit(e.ASM,{i:null}),e.inherit(e.QSM,{i:null}),{cN:"string",b:"`",e:"`"},{cN:"variable",v:[{b:"[#$]"+r},{b:"#",e:"\\d+",i:"\\W"}]},{cN:"tag",b:"::\\s*",e:r,i:"\\W"},{cN:"attribute",v:[{b:"-"+e.UIR,r:0},{b:"(\\.\\.\\.)"}]},{cN:"subst",v:[{b:"->\\s*",c:[l]},{b:":=|/(?!\\w)=?|[-+*%=<>&|!?\\\\]+",r:0}]},{cN:"built_in",b:"\\.\\.?\\s*",r:0,c:[l]},{cN:"class",bK:"define",rE:!0,e:"\\(|=>",c:[e.inherit(e.TM,{b:e.UIR+"(=(?!>))?"})]}];return{aliases:["ls","lassoscript"],cI:!0,l:r+"|&[lg]t;",k:s,c:[{cN:"preprocessor",b:t,r:0,starts:{cN:"markup",e:"\\[|"+a,rE:!0,r:0,c:[n]}},o,i,{cN:"preprocessor",b:"\\[no_square_brackets",starts:{e:"\\[/no_square_brackets\\]",l:r+"|&[lg]t;",k:s,c:[{cN:"preprocessor",b:t,r:0,starts:{cN:"markup",e:"\\[noprocess\\]|"+a,rE:!0,c:[n]}},o,i].concat(c)}},{cN:"preprocessor",b:"\\[",r:0},{cN:"shebang",b:"^#!.+lasso9\\b",r:10}].concat(c)}});hljs.registerLanguage("prolog",function(c){var r={cN:"atom",b:/[a-z][A-Za-z0-9_]*/,r:0},b={cN:"name",v:[{b:/[A-Z][a-zA-Z0-9_]*/},{b:/_[A-Za-z0-9_]*/}],r:0},a={b:/\(/,e:/\)/,r:0},e={b:/\[/,e:/\]/},n={cN:"comment",b:/%/,e:/$/,c:[c.PWM]},t={cN:"string",b:/`/,e:/`/,c:[c.BE]},g={cN:"string",b:/0\'(\\\'|.)/},N={cN:"string",b:/0\'\\s/},o={b:/:-/},s=[r,b,a,o,e,n,c.CBCM,c.QSM,c.ASM,t,g,N,c.CNM];return a.c=s,e.c=s,{c:s.concat([{b:/\.$/}])}});hljs.registerLanguage("oxygene",function(e){var r="abstract add and array as asc aspect assembly async begin break block by case class concat const copy constructor continue create default delegate desc distinct div do downto dynamic each else empty end ensure enum equals event except exit extension external false final finalize finalizer finally flags for forward from function future global group has if implementation implements implies in index inherited inline interface into invariants is iterator join locked locking loop matching method mod module namespace nested new nil not notify nullable of old on operator or order out override parallel params partial pinned private procedure property protected public queryable raise read readonly record reintroduce remove repeat require result reverse sealed select self sequence set shl shr skip static step soft take then to true try tuple type union unit unsafe until uses using var virtual raises volatile where while with write xor yield await mapped deprecated stdcall cdecl pascal register safecall overload library platform reference packed strict published autoreleasepool selector strong weak unretained",t=e.C("{","}",{r:0}),a=e.C("\\(\\*","\\*\\)",{r:10}),n={cN:"string",b:"'",e:"'",c:[{b:"''"}]},o={cN:"string",b:"(#\\d+)+"},i={cN:"function",bK:"function constructor destructor procedure method",e:"[:;]",k:"function constructor|10 destructor|10 procedure|10 method|10",c:[e.TM,{cN:"params",b:"\\(",e:"\\)",k:r,c:[n,o]},t,a]};return{cI:!0,k:r,i:'("|\\$[G-Zg-z]|\\/\\*|</|=>|->)',c:[t,a,e.CLCM,n,o,e.NM,i,{cN:"class",b:"=\\bclass\\b",e:"end;",k:r,c:[n,o,t,a,e.CLCM,i]}]}});hljs.registerLanguage("applescript",function(e){var t=e.inherit(e.QSM,{i:""}),r={cN:"params",b:"\\(",e:"\\)",c:["self",e.CNM,t]},o=e.C("--","$"),n=e.C("\\(\\*","\\*\\)",{c:["self",o]}),a=[o,n,e.HCM];return{aliases:["osascript"],k:{keyword:"about above after against and around as at back before beginning behind below beneath beside between but by considering contain contains continue copy div does eighth else end equal equals error every exit fifth first for fourth from front get given global if ignoring in into is it its last local me middle mod my ninth not of on onto or over prop property put ref reference repeat returning script second set seventh since sixth some tell tenth that the|0 then third through thru timeout times to transaction try until where while whose with without",constant:"AppleScript false linefeed return pi quote result space tab true",type:"alias application boolean class constant date file integer list number real record string text",command:"activate beep count delay launch log offset read round run say summarize write",property:"character characters contents day frontmost id item length month name paragraph paragraphs rest reverse running time version weekday word words year"},c:[t,e.CNM,{cN:"type",b:"\\bPOSIX file\\b"},{cN:"command",b:"\\b(clipboard info|the clipboard|info for|list (disks|folder)|mount volume|path to|(close|open for) access|(get|set) eof|current date|do shell script|get volume settings|random number|set volume|system attribute|system info|time to GMT|(load|run|store) script|scripting components|ASCII (character|number)|localized string|choose (application|color|file|file name|folder|from list|remote application|URL)|display (alert|dialog))\\b|^\\s*return\\b"},{cN:"constant",b:"\\b(text item delimiters|current application|missing value)\\b"},{cN:"keyword",b:"\\b(apart from|aside from|instead of|out of|greater than|isn't|(doesn't|does not) (equal|come before|come after|contain)|(greater|less) than( or equal)?|(starts?|ends|begins?) with|contained by|comes (before|after)|a (ref|reference))\\b"},{cN:"property",b:"\\b(POSIX path|(date|time) string|quoted form)\\b"},{cN:"function_start",bK:"on",i:"[${=;\\n]",c:[e.UTM,r]}].concat(a),i:"//|->|=>"}});hljs.registerLanguage("makefile",function(e){var a={cN:"variable",b:/\$\(/,e:/\)/,c:[e.BE]};return{aliases:["mk","mak"],c:[e.HCM,{b:/^\w+\s*\W*=/,rB:!0,r:0,starts:{cN:"constant",e:/\s*\W*=/,eE:!0,starts:{e:/$/,r:0,c:[a]}}},{cN:"title",b:/^[\w]+:\s*$/},{cN:"phony",b:/^\.PHONY:/,e:/$/,k:".PHONY",l:/[\.\w]+/},{b:/^\t+/,e:/$/,r:0,c:[e.QSM,a]}]}});hljs.registerLanguage("dust",function(e){var a="if eq ne lt lte gt gte select default math sep";return{aliases:["dst"],cI:!0,sL:"xml",subLanguageMode:"continuous",c:[{cN:"expression",b:"{",e:"}",r:0,c:[{cN:"begin-block",b:"#[a-zA-Z- .]+",k:a},{cN:"string",b:'"',e:'"'},{cN:"end-block",b:"\\/[a-zA-Z- .]+",k:a},{cN:"variable",b:"[a-zA-Z-.]+",k:a,r:0}]}]}});hljs.registerLanguage("clojure-repl",function(e){return{c:[{cN:"prompt",b:/^([\w.-]+|\s*#_)=>/,starts:{e:/$/,sL:"clojure",subLanguageMode:"continuous"}}]}});hljs.registerLanguage("dart",function(e){var t={cN:"subst",b:"\\$\\{",e:"}",k:"true false null this is new super"},r={cN:"string",v:[{b:"r'''",e:"'''"},{b:'r"""',e:'"""'},{b:"r'",e:"'",i:"\\n"},{b:'r"',e:'"',i:"\\n"},{b:"'''",e:"'''",c:[e.BE,t]},{b:'"""',e:'"""',c:[e.BE,t]},{b:"'",e:"'",i:"\\n",c:[e.BE,t]},{b:'"',e:'"',i:"\\n",c:[e.BE,t]}]};t.c=[e.CNM,r];var n={keyword:"assert break case catch class const continue default do else enum extends false final finally for if in is new null rethrow return super switch this throw true try var void while with",literal:"abstract as dynamic export external factory get implements import library operator part set static typedef",built_in:"print Comparable DateTime Duration Function Iterable Iterator List Map Match Null Object Pattern RegExp Set Stopwatch String StringBuffer StringSink Symbol Type Uri bool double int num document window querySelector querySelectorAll Element ElementList"};return{k:n,c:[r,{cN:"dartdoc",b:"/\\*\\*",e:"\\*/",sL:"markdown",subLanguageMode:"continuous"},{cN:"dartdoc",b:"///",e:"$",sL:"markdown",subLanguageMode:"continuous"},e.CLCM,e.CBCM,{cN:"class",bK:"class interface",e:"{",eE:!0,c:[{bK:"extends implements"},e.UTM]},e.CNM,{cN:"annotation",b:"@[A-Za-z]+"},{b:"=>"}]}});
\ No newline at end of file
diff --git a/docs/js/theme.js b/docs/js/theme.js
index aecbb861..dda9975e 100644
--- a/docs/js/theme.js
+++ b/docs/js/theme.js
@@ -13,27 +13,19 @@ $( document ).ready(function() {
 
     // Keyboard navigation
     document.addEventListener("keydown", function(e) {
-      var key = e.which || e.keyCode || window.event && window.event.keyCode;
-      var page;
-      switch (key) {
-          case 78:  // n
-              page = $('[role="navigation"] a:contains(Next):first').prop('href');
-              break;
-          case 80:  // p
-              page = $('[role="navigation"] a:contains(Previous):first').prop('href');
-              break;
-          case 13:  // enter
-              if (e.target === document.getElementById('mkdocs-search-query')) {
-                e.preventDefault();
-              }
-              break;
-          default: break;
-      }
-      if ($(e.target).is(':input')) {
-        return true;
-      } else if (page) {
-        window.location.href = page;
-      }
+        if ($(e.target).is(':input')) return true;
+        var key = e.which || e.keyCode || window.event && window.event.keyCode;
+        var page;
+        switch (key) {
+            case 39:  // right arrow
+                page = $('[role="navigation"] a:contains(Next):first').prop('href');
+                break;
+            case 37:  // left arrow
+                page = $('[role="navigation"] a:contains(Previous):first').prop('href');
+                break;
+            default: break;
+        }
+        if (page) window.location.href = page;
     });
 
     $(document).on('click', "[data-toggle='rst-current-version']", function() {
@@ -43,6 +35,8 @@ $( document ).ready(function() {
     // Make tables responsive
     $("table.docutils:not(.field-list)").wrap("<div class='wy-table-responsive'></div>");
 
+    hljs.initHighlightingOnLoad();
+
     $('table').addClass('docutils');
 });
 
diff --git a/docs/related/index.html b/docs/related/index.html
index 63b90669..251dcca8 100644
--- a/docs/related/index.html
+++ b/docs/related/index.html
@@ -13,19 +13,18 @@
 
   <link rel="stylesheet" href="../css/theme.css" type="text/css" />
   <link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
-  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
+  <link rel="stylesheet" href="../css/highlight.css">
   
   <script>
     // Current page data
     var mkdocs_page_name = "Related";
     var mkdocs_page_input_path = "related.md";
-    var mkdocs_page_url = "/tpot/related/";
+    var mkdocs_page_url = "/related/";
   </script>
   
-  <script src="../js/jquery-2.1.1.min.js" defer></script>
-  <script src="../js/modernizr-2.8.3.min.js" defer></script>
-  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
-  <script>hljs.initHighlightingOnLoad();</script> 
+  <script src="../js/jquery-2.1.1.min.js"></script>
+  <script src="../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../js/highlight.pack.js"></script> 
   
 </head>
 
@@ -39,7 +38,7 @@
         <a href=".." class="icon icon-home"> TPOT</a>
         <div role="search">
   <form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
-    <input type="text" name="q" placeholder="Search docs" title="Type search term here" />
+    <input type="text" name="q" placeholder="Search docs" />
   </form>
 </div>
       </div>
@@ -243,8 +242,9 @@
     </span>
 </div>
     <script>var base_url = '..';</script>
-    <script src="../js/theme.js" defer></script>
-      <script src="../search/main.js" defer></script>
+    <script src="../js/theme.js"></script>
+      <script src="../search/require.js"></script>
+      <script src="../search/search.js"></script>
 
 </body>
 </html>
diff --git a/docs/releases/index.html b/docs/releases/index.html
index abe149d6..2bbbcab6 100644
--- a/docs/releases/index.html
+++ b/docs/releases/index.html
@@ -13,19 +13,18 @@
 
   <link rel="stylesheet" href="../css/theme.css" type="text/css" />
   <link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
-  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
+  <link rel="stylesheet" href="../css/highlight.css">
   
   <script>
     // Current page data
     var mkdocs_page_name = "Release Notes";
     var mkdocs_page_input_path = "releases.md";
-    var mkdocs_page_url = "/tpot/releases/";
+    var mkdocs_page_url = "/releases/";
   </script>
   
-  <script src="../js/jquery-2.1.1.min.js" defer></script>
-  <script src="../js/modernizr-2.8.3.min.js" defer></script>
-  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
-  <script>hljs.initHighlightingOnLoad();</script> 
+  <script src="../js/jquery-2.1.1.min.js"></script>
+  <script src="../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../js/highlight.pack.js"></script> 
   
 </head>
 
@@ -39,7 +38,7 @@
         <a href=".." class="icon icon-home"> TPOT</a>
         <div role="search">
   <form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
-    <input type="text" name="q" placeholder="Search docs" title="Type search term here" />
+    <input type="text" name="q" placeholder="Search docs" />
   </form>
 </div>
       </div>
@@ -83,6 +82,24 @@
     <a class="current" href="./">Release Notes</a>
     <ul class="subnav">
             
+    <li class="toctree-l2"><a href="#version-0110">Version 0.11.0</a></li>
+    
+
+    <li class="toctree-l2"><a href="#version-0102">Version 0.10.2</a></li>
+    
+
+    <li class="toctree-l2"><a href="#version-0101">Version 0.10.1</a></li>
+    
+
+    <li class="toctree-l2"><a href="#version-0100">Version 0.10.0</a></li>
+    
+
+    <li class="toctree-l2"><a href="#version-096">Version 0.9.6</a></li>
+    
+
+    <li class="toctree-l2"><a href="#version-095">Version 0.9.5</a></li>
+    
+
     <li class="toctree-l2"><a href="#version-09">Version 0.9</a></li>
     
 
@@ -163,7 +180,73 @@
           <div role="main">
             <div class="section">
               
-                <h1 id="version-09">Version 0.9</h1>
+                <h1 id="version-0110">Version 0.11.0</h1>
+<ul>
+<li><strong>Support for Python 3.4 and below has been officially dropped.</strong> Also support for scikit-learn 0.20 or below has been dropped.</li>
+<li>The support of a metric function with the signature <code>score_func(y_true, y_pred)</code> for <code>scoring parameter</code> has been dropped.</li>
+<li>Refine <code>StackingEstimator</code> for not stacking NaN/Infinity predication probabilities.</li>
+<li>Fix a bug that population doesn't persist by <code>warm_start=True</code> when <code>max_time_mins</code> is not default value.</li>
+<li>Now the <code>random_state</code> parameter in TPOT is used for pipeline evaluation instead of using a fixed random seed of 42 before. The <code>set_param_recursive</code> function has been moved to <code>export_utils.py</code> and it can be used in exported codes for setting <code>random_state</code> recursively in scikit-learn Pipeline. It is used to set <code>random_state</code> in <code>fitted_pipeline_</code> attribute and exported pipelines.</li>
+<li>TPOT can independently use <code>generations</code> and <code>max_time_mins</code> to limit the optimization process through using one of the parameters or both.</li>
+<li><code>.export()</code> function will return string of exported pipeline if output filename is not specified.</li>
+<li>Add <a href="https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html"><code>SGDClassifier</code></a> and <a href="https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html"><code>SGDRegressor</code></a> into TPOT default configs.</li>
+<li>Documentation has been updated.</li>
+</ul>
+<h1 id="version-0102">Version 0.10.2</h1>
+<ul>
+<li><strong>TPOT v0.10.2 is the last version to support Python 2.7 and Python 3.4.</strong></li>
+<li>Minor updates for fixing compatibility issues with the latest version of scikit-learn (version &gt; 0.21) and xgboost (v0.90)</li>
+<li>Default value of <code>template</code> parameter is changed to <code>None</code> instead.</li>
+<li>Fix errors in documentation</li>
+</ul>
+<h1 id="version-0101">Version 0.10.1</h1>
+<ul>
+<li>Add <code>data_file_path</code> option into <code>expert</code> function for replacing <code>'PATH/TO/DATA/FILE'</code> to customized dataset path in exported scripts. (Related issue #838)</li>
+<li>Change python version in CI tests to 3.7</li>
+<li>Add CI tests for macOS.</li>
+</ul>
+<h1 id="version-0100">Version 0.10.0</h1>
+<ul>
+<li>Add a new <code>template</code> option to specify a desired structure for machine learning pipeline in TPOT. Check <a href="https://epistasislab.github.io/tpot/api/">TPOT API</a> (it will be updated once it is merge to master branch).</li>
+<li>Add <code>FeatureSetSelector</code> operator into TPOT for feature selection based on <em>priori</em> export knowledge. Please check our <a href="https://www.biorxiv.org/content/10.1101/502484v1.article-info">preprint paper</a> for more details (<em>Note: it was named <code>DatasetSelector</code> in 1st version paper but we will rename to FeatureSetSelector in next version of the paper</em>)</li>
+<li>Refine <code>n_jobs</code> parameter to accept value below -1. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used.</li>
+<li>Now <code>memory</code>  parameter can create memory cache directory if it does not exist.</li>
+<li>Fix minor bugs.</li>
+</ul>
+<h1 id="version-096">Version 0.9.6</h1>
+<ul>
+<li>Fix a bug causing that <code>max_time_mins</code> parameter doesn't work when <code>use_dask=True</code> in TPOT 0.9.5</li>
+<li>Now TPOT saves best pareto values best pareto pipeline s in checkpoint folder</li>
+<li>TPOT raises <code>ImportError</code> if operators in the TPOT configuration are not available when <code>verbosity&gt;2</code></li>
+<li>Thank @PGijsbers for the suggestions. Now TPOT can save scores of individuals already evaluated in any generation even the evaluation process of that generation is interrupted/stopped. But it is noted that, in this case, TPOT will raise this <strong>warning message</strong>: <code>WARNING: TPOT may not provide a good pipeline if TPOT is stopped/interrupted in a early generation.</code>, because the pipelines in early generation, e.g. 1st generation, are evolved/modified very limited times via evolutionary algorithm.</li>
+<li>Fix bugs in configuration of <code>TPOTRegressor</code></li>
+<li>Error fixes in documentation</li>
+</ul>
+<h1 id="version-095">Version 0.9.5</h1>
+<ul>
+<li>
+<p><strong>TPOT now supports integration with Dask for parallelization + smart caching</strong>. Big thanks to the Dask dev team for making this happen!</p>
+</li>
+<li>
+<p>TPOT now supports for imputation/sparse matrices into <code>predict</code> and <code>predict_proba</code> functions.</p>
+</li>
+<li>
+<p><code>TPOTClassifier</code> and <code>TPOTRegressor</code> now follows scikit-learn estimator API.</p>
+</li>
+<li>
+<p>We refined scoring parameter in TPOT API for accepting <a href="http://jaquesgrobler.github.io/online-sklearn-build/modules/generated/sklearn.metrics.Scorer.html"><code>Scorer</code> object</a>.</p>
+</li>
+<li>
+<p>We refined parameters in VarianceThreshold and FeatureAgglomeration.</p>
+</li>
+<li>
+<p>TPOT now supports using memory caching within a Pipeline via a optional <code>memory</code> parameter.</p>
+</li>
+<li>
+<p>We improved documentation of TPOT.</p>
+</li>
+</ul>
+<h1 id="version-09">Version 0.9</h1>
 <ul>
 <li>
 <p><strong>TPOT now supports sparse matrices</strong> with a new built-in TPOT configuration, "TPOT sparse". We are using a custom OneHotEncoder implementation that supports missing values and continuous features.</p>
@@ -214,13 +297,13 @@ <h1 id="version-08">Version 0.8</h1>
 <p>TPOT now allows you to set a subsample ratio of the training instance with the <code>subsample</code> parameter. For example, setting <code>subsample</code>=0.5 tells TPOT to create a fixed subsample of half of the training data for the pipeline optimization process. This parameter can be useful for speeding up the pipeline optimization process, but may give less accurate performance estimates from cross-validation.</p>
 </li>
 <li>
-<p><strong>TPOT now has more <a href="/using/#built-in-tpot-configurations">built-in configurations</a></strong>, including TPOT MDR and TPOT light, for both classification and regression problems.</p>
+<p><strong>TPOT now has more <a href="../using/#built-in-tpot-configurations">built-in configurations</a></strong>, including TPOT MDR and TPOT light, for both classification and regression problems.</p>
 </li>
 <li>
-<p><code>TPOTClassifier</code> and <code>TPOTRegressor</code> now expose three useful internal attributes, <code>fitted_pipeline_</code>, <code>pareto_front_fitted_pipelines_</code>, and <code>evaluated_individuals_</code>. These attributes are described in the <a href="/api/">API documentation</a>.</p>
+<p><code>TPOTClassifier</code> and <code>TPOTRegressor</code> now expose three useful internal attributes, <code>fitted_pipeline_</code>, <code>pareto_front_fitted_pipelines_</code>, and <code>evaluated_individuals_</code>. These attributes are described in the <a href="../api/">API documentation</a>.</p>
 </li>
 <li>
-<p>Oh, <strong>TPOT now has <a href="/api/">thorough API documentation</a></strong>. Check it out!</p>
+<p>Oh, <strong>TPOT now has <a href="../api/">thorough API documentation</a></strong>. Check it out!</p>
 </li>
 <li>
 <p>Fixed a reproducibility issue where setting <code>random_seed</code> didn't necessarily result in the same results every time. This bug was present since TPOT v0.7.</p>
@@ -238,7 +321,7 @@ <h1 id="version-07">Version 0.7</h1>
 <p><strong>TPOT now has multiprocessing support.</strong> TPOT allows you to use multiple processes in parallel to accelerate the pipeline optimization process in TPOT with the <code>n_jobs</code> parameter.</p>
 </li>
 <li>
-<p>TPOT now allows you to <strong>customize the operators and parameters considered during the optimization process</strong>, which can be accomplished with the new <code>config_dict</code> parameter. The format of this customized dictionary can be found in the <a href="/using/#customizing-tpots-operators-and-parameters">online documentation</a>, along with a list of <a href="/using/#built-in-tpot-configurations">built-in configurations</a>.</p>
+<p>TPOT now allows you to <strong>customize the operators and parameters considered during the optimization process</strong>, which can be accomplished with the new <code>config_dict</code> parameter. The format of this customized dictionary can be found in the <a href="../using/#customizing-tpots-operators-and-parameters">online documentation</a>, along with a list of <a href="../using/#built-in-tpot-configurations">built-in configurations</a>.</p>
 </li>
 <li>
 <p>TPOT now allows you to <strong>specify a time limit for evaluating a single pipeline</strong>  (default limit is 5 minutes) in optimization process with the <code>max_eval_time_mins</code> parameter, so TPOT won't spend hours evaluating overly-complex pipelines.</p>
@@ -259,7 +342,7 @@ <h1 id="version-07">Version 0.7</h1>
 <h1 id="version-06">Version 0.6</h1>
 <ul>
 <li>
-<p><strong>TPOT now supports regression problems!</strong> We have created two separate <code>TPOTClassifier</code> and <code>TPOTRegressor</code> classes to support classification and regression problems, respectively. The <a href="/using/#tpot-on-the-command-line">command-line interface</a> also supports this feature through the <code>-mode</code> parameter.</p>
+<p><strong>TPOT now supports regression problems!</strong> We have created two separate <code>TPOTClassifier</code> and <code>TPOTRegressor</code> classes to support classification and regression problems, respectively. The <a href="../using/#tpot-on-the-command-line">command-line interface</a> also supports this feature through the <code>-mode</code> parameter.</p>
 </li>
 <li>
 <p>TPOT now allows you to <strong>specify a time limit</strong> for the optimization process with the <code>max_time_mins</code> parameter, so you don't need to guess how long TPOT will take any more to recommend a pipeline to you.</p>
@@ -403,8 +486,9 @@ <h1 id="version-01">Version 0.1</h1>
     </span>
 </div>
     <script>var base_url = '..';</script>
-    <script src="../js/theme.js" defer></script>
-      <script src="../search/main.js" defer></script>
+    <script src="../js/theme.js"></script>
+      <script src="../search/require.js"></script>
+      <script src="../search/search.js"></script>
 
 </body>
 </html>
diff --git a/docs/search.html b/docs/search.html
index 0028bfc6..8fa30ed9 100644
--- a/docs/search.html
+++ b/docs/search.html
@@ -13,12 +13,11 @@
 
   <link rel="stylesheet" href="./css/theme.css" type="text/css" />
   <link rel="stylesheet" href="./css/theme_extra.css" type="text/css" />
-  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
+  <link rel="stylesheet" href="./css/highlight.css">
   
-  <script src="./js/jquery-2.1.1.min.js" defer></script>
-  <script src="./js/modernizr-2.8.3.min.js" defer></script>
-  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
-  <script>hljs.initHighlightingOnLoad();</script> 
+  <script src="./js/jquery-2.1.1.min.js"></script>
+  <script src="./js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="./js/highlight.pack.js"></script> 
   
 </head>
 
@@ -29,10 +28,10 @@
     
     <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
       <div class="wy-side-nav-search">
-        <a href="./." class="icon icon-home"> TPOT</a>
+        <a href="." class="icon icon-home"> TPOT</a>
         <div role="search">
   <form id ="rtd-search-form" class="wy-form" action="./search.html" method="get">
-    <input type="text" name="q" placeholder="Search docs" title="Type search term here" />
+    <input type="text" name="q" placeholder="Search docs" />
   </form>
 </div>
       </div>
@@ -43,52 +42,52 @@
           
             <li class="toctree-l1">
 		
-    <a class="" href="./.">Home</a>
+    <a class="" href=".">Home</a>
 	    </li>
           
             <li class="toctree-l1">
 		
-    <a class="" href="./installing/">Installation</a>
+    <a class="" href="installing/">Installation</a>
 	    </li>
           
             <li class="toctree-l1">
 		
-    <a class="" href="./using/">Using TPOT</a>
+    <a class="" href="using/">Using TPOT</a>
 	    </li>
           
             <li class="toctree-l1">
 		
-    <a class="" href="./api/">TPOT API</a>
+    <a class="" href="api/">TPOT API</a>
 	    </li>
           
             <li class="toctree-l1">
 		
-    <a class="" href="./examples/">Examples</a>
+    <a class="" href="examples/">Examples</a>
 	    </li>
           
             <li class="toctree-l1">
 		
-    <a class="" href="./contributing/">Contributing</a>
+    <a class="" href="contributing/">Contributing</a>
 	    </li>
           
             <li class="toctree-l1">
 		
-    <a class="" href="./releases/">Release Notes</a>
+    <a class="" href="releases/">Release Notes</a>
 	    </li>
           
             <li class="toctree-l1">
 		
-    <a class="" href="./citing/">Citing</a>
+    <a class="" href="citing/">Citing</a>
 	    </li>
           
             <li class="toctree-l1">
 		
-    <a class="" href="./support/">Support</a>
+    <a class="" href="support/">Support</a>
 	    </li>
           
             <li class="toctree-l1">
 		
-    <a class="" href="./related/">Related</a>
+    <a class="" href="related/">Related</a>
 	    </li>
           
         </ul>
@@ -101,7 +100,7 @@
       
       <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
         <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
-        <a href="./.">TPOT</a>
+        <a href=".">TPOT</a>
       </nav>
 
       
@@ -109,7 +108,7 @@
         <div class="rst-content">
           <div role="navigation" aria-label="breadcrumbs navigation">
   <ul class="wy-breadcrumbs">
-    <li><a href="./.">Docs</a> &raquo;</li>
+    <li><a href=".">Docs</a> &raquo;</li>
     
     
     <li class="wy-breadcrumbs-aside">
@@ -126,7 +125,7 @@ <h1 id="search">Search Results</h1>
 
   <form id="content_search" action="search.html">
     <span role="status" aria-live="polite" class="ui-helper-hidden-accessible"></span>
-    <input name="q" id="mkdocs-search-query" type="text" class="search_input search-query ui-autocomplete-input" placeholder="Search the Docs" autocomplete="off" autofocus title="Type search term here">
+    <input name="q" id="mkdocs-search-query" type="text" class="search_input search-query ui-autocomplete-input" placeholder="Search the Docs" autocomplete="off" autofocus>
   </form>
 
   <div id="mkdocs-search-results" class="search-results">
@@ -168,8 +167,9 @@ <h1 id="search">Search Results</h1>
     </span>
 </div>
     <script>var base_url = '.';</script>
-    <script src="./js/theme.js" defer></script>
-      <script src="./search/main.js" defer></script>
+    <script src="./js/theme.js"></script>
+      <script src="./search/require.js"></script>
+      <script src="./search/search.js"></script>
 
 </body>
 </html>
diff --git a/docs/search/lunr.js b/docs/search/lunr.js
deleted file mode 100644
index c218cc89..00000000
--- a/docs/search/lunr.js
+++ /dev/null
@@ -1,2986 +0,0 @@
-/**
- * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 2.1.6
- * Copyright (C) 2018 Oliver Nightingale
- * @license MIT
- */
-
-;(function(){
-
-/**
- * A convenience function for configuring and constructing
- * a new lunr Index.
- *
- * A lunr.Builder instance is created and the pipeline setup
- * with a trimmer, stop word filter and stemmer.
- *
- * This builder object is yielded to the configuration function
- * that is passed as a parameter, allowing the list of fields
- * and other builder parameters to be customised.
- *
- * All documents _must_ be added within the passed config function.
- *
- * @example
- * var idx = lunr(function () {
- *   this.field('title')
- *   this.field('body')
- *   this.ref('id')
- *
- *   documents.forEach(function (doc) {
- *     this.add(doc)
- *   }, this)
- * })
- *
- * @see {@link lunr.Builder}
- * @see {@link lunr.Pipeline}
- * @see {@link lunr.trimmer}
- * @see {@link lunr.stopWordFilter}
- * @see {@link lunr.stemmer}
- * @namespace {function} lunr
- */
-var lunr = function (config) {
-  var builder = new lunr.Builder
-
-  builder.pipeline.add(
-    lunr.trimmer,
-    lunr.stopWordFilter,
-    lunr.stemmer
-  )
-
-  builder.searchPipeline.add(
-    lunr.stemmer
-  )
-
-  config.call(builder, builder)
-  return builder.build()
-}
-
-lunr.version = "2.1.6"
-/*!
- * lunr.utils
- * Copyright (C) 2018 Oliver Nightingale
- */
-
-/**
- * A namespace containing utils for the rest of the lunr library
- */
-lunr.utils = {}
-
-/**
- * Print a warning message to the console.
- *
- * @param {String} message The message to be printed.
- * @memberOf Utils
- */
-lunr.utils.warn = (function (global) {
-  /* eslint-disable no-console */
-  return function (message) {
-    if (global.console && console.warn) {
-      console.warn(message)
-    }
-  }
-  /* eslint-enable no-console */
-})(this)
-
-/**
- * Convert an object to a string.
- *
- * In the case of `null` and `undefined` the function returns
- * the empty string, in all other cases the result of calling
- * `toString` on the passed object is returned.
- *
- * @param {Any} obj The object to convert to a string.
- * @return {String} string representation of the passed object.
- * @memberOf Utils
- */
-lunr.utils.asString = function (obj) {
-  if (obj === void 0 || obj === null) {
-    return ""
-  } else {
-    return obj.toString()
-  }
-}
-lunr.FieldRef = function (docRef, fieldName, stringValue) {
-  this.docRef = docRef
-  this.fieldName = fieldName
-  this._stringValue = stringValue
-}
-
-lunr.FieldRef.joiner = "/"
-
-lunr.FieldRef.fromString = function (s) {
-  var n = s.indexOf(lunr.FieldRef.joiner)
-
-  if (n === -1) {
-    throw "malformed field ref string"
-  }
-
-  var fieldRef = s.slice(0, n),
-      docRef = s.slice(n + 1)
-
-  return new lunr.FieldRef (docRef, fieldRef, s)
-}
-
-lunr.FieldRef.prototype.toString = function () {
-  if (this._stringValue == undefined) {
-    this._stringValue = this.fieldName + lunr.FieldRef.joiner + this.docRef
-  }
-
-  return this._stringValue
-}
-/**
- * A function to calculate the inverse document frequency for
- * a posting. This is shared between the builder and the index
- *
- * @private
- * @param {object} posting - The posting for a given term
- * @param {number} documentCount - The total number of documents.
- */
-lunr.idf = function (posting, documentCount) {
-  var documentsWithTerm = 0
-
-  for (var fieldName in posting) {
-    if (fieldName == '_index') continue // Ignore the term index, its not a field
-    documentsWithTerm += Object.keys(posting[fieldName]).length
-  }
-
-  var x = (documentCount - documentsWithTerm + 0.5) / (documentsWithTerm + 0.5)
-
-  return Math.log(1 + Math.abs(x))
-}
-
-/**
- * A token wraps a string representation of a token
- * as it is passed through the text processing pipeline.
- *
- * @constructor
- * @param {string} [str=''] - The string token being wrapped.
- * @param {object} [metadata={}] - Metadata associated with this token.
- */
-lunr.Token = function (str, metadata) {
-  this.str = str || ""
-  this.metadata = metadata || {}
-}
-
-/**
- * Returns the token string that is being wrapped by this object.
- *
- * @returns {string}
- */
-lunr.Token.prototype.toString = function () {
-  return this.str
-}
-
-/**
- * A token update function is used when updating or optionally
- * when cloning a token.
- *
- * @callback lunr.Token~updateFunction
- * @param {string} str - The string representation of the token.
- * @param {Object} metadata - All metadata associated with this token.
- */
-
-/**
- * Applies the given function to the wrapped string token.
- *
- * @example
- * token.update(function (str, metadata) {
- *   return str.toUpperCase()
- * })
- *
- * @param {lunr.Token~updateFunction} fn - A function to apply to the token string.
- * @returns {lunr.Token}
- */
-lunr.Token.prototype.update = function (fn) {
-  this.str = fn(this.str, this.metadata)
-  return this
-}
-
-/**
- * Creates a clone of this token. Optionally a function can be
- * applied to the cloned token.
- *
- * @param {lunr.Token~updateFunction} [fn] - An optional function to apply to the cloned token.
- * @returns {lunr.Token}
- */
-lunr.Token.prototype.clone = function (fn) {
-  fn = fn || function (s) { return s }
-  return new lunr.Token (fn(this.str, this.metadata), this.metadata)
-}
-/*!
- * lunr.tokenizer
- * Copyright (C) 2018 Oliver Nightingale
- */
-
-/**
- * A function for splitting a string into tokens ready to be inserted into
- * the search index. Uses `lunr.tokenizer.separator` to split strings, change
- * the value of this property to change how strings are split into tokens.
- *
- * This tokenizer will convert its parameter to a string by calling `toString` and
- * then will split this string on the character in `lunr.tokenizer.separator`.
- * Arrays will have their elements converted to strings and wrapped in a lunr.Token.
- *
- * @static
- * @param {?(string|object|object[])} obj - The object to convert into tokens
- * @returns {lunr.Token[]}
- */
-lunr.tokenizer = function (obj) {
-  if (obj == null || obj == undefined) {
-    return []
-  }
-
-  if (Array.isArray(obj)) {
-    return obj.map(function (t) {
-      return new lunr.Token(lunr.utils.asString(t).toLowerCase())
-    })
-  }
-
-  var str = obj.toString().trim().toLowerCase(),
-      len = str.length,
-      tokens = []
-
-  for (var sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) {
-    var char = str.charAt(sliceEnd),
-        sliceLength = sliceEnd - sliceStart
-
-    if ((char.match(lunr.tokenizer.separator) || sliceEnd == len)) {
-
-      if (sliceLength > 0) {
-        tokens.push(
-          new lunr.Token (str.slice(sliceStart, sliceEnd), {
-            position: [sliceStart, sliceLength],
-            index: tokens.length
-          })
-        )
-      }
-
-      sliceStart = sliceEnd + 1
-    }
-
-  }
-
-  return tokens
-}
-
-/**
- * The separator used to split a string into tokens. Override this property to change the behaviour of
- * `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
- *
- * @static
- * @see lunr.tokenizer
- */
-lunr.tokenizer.separator = /[\s\-]+/
-/*!
- * lunr.Pipeline
- * Copyright (C) 2018 Oliver Nightingale
- */
-
-/**
- * lunr.Pipelines maintain an ordered list of functions to be applied to all
- * tokens in documents entering the search index and queries being ran against
- * the index.
- *
- * An instance of lunr.Index created with the lunr shortcut will contain a
- * pipeline with a stop word filter and an English language stemmer. Extra
- * functions can be added before or after either of these functions or these
- * default functions can be removed.
- *
- * When run the pipeline will call each function in turn, passing a token, the
- * index of that token in the original list of all tokens and finally a list of
- * all the original tokens.
- *
- * The output of functions in the pipeline will be passed to the next function
- * in the pipeline. To exclude a token from entering the index the function
- * should return undefined, the rest of the pipeline will not be called with
- * this token.
- *
- * For serialisation of pipelines to work, all functions used in an instance of
- * a pipeline should be registered with lunr.Pipeline. Registered functions can
- * then be loaded. If trying to load a serialised pipeline that uses functions
- * that are not registered an error will be thrown.
- *
- * If not planning on serialising the pipeline then registering pipeline functions
- * is not necessary.
- *
- * @constructor
- */
-lunr.Pipeline = function () {
-  this._stack = []
-}
-
-lunr.Pipeline.registeredFunctions = Object.create(null)
-
-/**
- * A pipeline function maps lunr.Token to lunr.Token. A lunr.Token contains the token
- * string as well as all known metadata. A pipeline function can mutate the token string
- * or mutate (or add) metadata for a given token.
- *
- * A pipeline function can indicate that the passed token should be discarded by returning
- * null. This token will not be passed to any downstream pipeline functions and will not be
- * added to the index.
- *
- * Multiple tokens can be returned by returning an array of tokens. Each token will be passed
- * to any downstream pipeline functions and all will returned tokens will be added to the index.
- *
- * Any number of pipeline functions may be chained together using a lunr.Pipeline.
- *
- * @interface lunr.PipelineFunction
- * @param {lunr.Token} token - A token from the document being processed.
- * @param {number} i - The index of this token in the complete list of tokens for this document/field.
- * @param {lunr.Token[]} tokens - All tokens for this document/field.
- * @returns {(?lunr.Token|lunr.Token[])}
- */
-
-/**
- * Register a function with the pipeline.
- *
- * Functions that are used in the pipeline should be registered if the pipeline
- * needs to be serialised, or a serialised pipeline needs to be loaded.
- *
- * Registering a function does not add it to a pipeline, functions must still be
- * added to instances of the pipeline for them to be used when running a pipeline.
- *
- * @param {lunr.PipelineFunction} fn - The function to check for.
- * @param {String} label - The label to register this function with
- */
-lunr.Pipeline.registerFunction = function (fn, label) {
-  if (label in this.registeredFunctions) {
-    lunr.utils.warn('Overwriting existing registered function: ' + label)
-  }
-
-  fn.label = label
-  lunr.Pipeline.registeredFunctions[fn.label] = fn
-}
-
-/**
- * Warns if the function is not registered as a Pipeline function.
- *
- * @param {lunr.PipelineFunction} fn - The function to check for.
- * @private
- */
-lunr.Pipeline.warnIfFunctionNotRegistered = function (fn) {
-  var isRegistered = fn.label && (fn.label in this.registeredFunctions)
-
-  if (!isRegistered) {
-    lunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn)
-  }
-}
-
-/**
- * Loads a previously serialised pipeline.
- *
- * All functions to be loaded must already be registered with lunr.Pipeline.
- * If any function from the serialised data has not been registered then an
- * error will be thrown.
- *
- * @param {Object} serialised - The serialised pipeline to load.
- * @returns {lunr.Pipeline}
- */
-lunr.Pipeline.load = function (serialised) {
-  var pipeline = new lunr.Pipeline
-
-  serialised.forEach(function (fnName) {
-    var fn = lunr.Pipeline.registeredFunctions[fnName]
-
-    if (fn) {
-      pipeline.add(fn)
-    } else {
-      throw new Error('Cannot load unregistered function: ' + fnName)
-    }
-  })
-
-  return pipeline
-}
-
-/**
- * Adds new functions to the end of the pipeline.
- *
- * Logs a warning if the function has not been registered.
- *
- * @param {lunr.PipelineFunction[]} functions - Any number of functions to add to the pipeline.
- */
-lunr.Pipeline.prototype.add = function () {
-  var fns = Array.prototype.slice.call(arguments)
-
-  fns.forEach(function (fn) {
-    lunr.Pipeline.warnIfFunctionNotRegistered(fn)
-    this._stack.push(fn)
-  }, this)
-}
-
-/**
- * Adds a single function after a function that already exists in the
- * pipeline.
- *
- * Logs a warning if the function has not been registered.
- *
- * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
- * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
- */
-lunr.Pipeline.prototype.after = function (existingFn, newFn) {
-  lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
-
-  var pos = this._stack.indexOf(existingFn)
-  if (pos == -1) {
-    throw new Error('Cannot find existingFn')
-  }
-
-  pos = pos + 1
-  this._stack.splice(pos, 0, newFn)
-}
-
-/**
- * Adds a single function before a function that already exists in the
- * pipeline.
- *
- * Logs a warning if the function has not been registered.
- *
- * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
- * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
- */
-lunr.Pipeline.prototype.before = function (existingFn, newFn) {
-  lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
-
-  var pos = this._stack.indexOf(existingFn)
-  if (pos == -1) {
-    throw new Error('Cannot find existingFn')
-  }
-
-  this._stack.splice(pos, 0, newFn)
-}
-
-/**
- * Removes a function from the pipeline.
- *
- * @param {lunr.PipelineFunction} fn The function to remove from the pipeline.
- */
-lunr.Pipeline.prototype.remove = function (fn) {
-  var pos = this._stack.indexOf(fn)
-  if (pos == -1) {
-    return
-  }
-
-  this._stack.splice(pos, 1)
-}
-
-/**
- * Runs the current list of functions that make up the pipeline against the
- * passed tokens.
- *
- * @param {Array} tokens The tokens to run through the pipeline.
- * @returns {Array}
- */
-lunr.Pipeline.prototype.run = function (tokens) {
-  var stackLength = this._stack.length
-
-  for (var i = 0; i < stackLength; i++) {
-    var fn = this._stack[i]
-    var memo = []
-
-    for (var j = 0; j < tokens.length; j++) {
-      var result = fn(tokens[j], j, tokens)
-
-      if (result === void 0 || result === '') continue
-
-      if (result instanceof Array) {
-        for (var k = 0; k < result.length; k++) {
-          memo.push(result[k])
-        }
-      } else {
-        memo.push(result)
-      }
-    }
-
-    tokens = memo
-  }
-
-  return tokens
-}
-
-/**
- * Convenience method for passing a string through a pipeline and getting
- * strings out. This method takes care of wrapping the passed string in a
- * token and mapping the resulting tokens back to strings.
- *
- * @param {string} str - The string to pass through the pipeline.
- * @returns {string[]}
- */
-lunr.Pipeline.prototype.runString = function (str) {
-  var token = new lunr.Token (str)
-
-  return this.run([token]).map(function (t) {
-    return t.toString()
-  })
-}
-
-/**
- * Resets the pipeline by removing any existing processors.
- *
- */
-lunr.Pipeline.prototype.reset = function () {
-  this._stack = []
-}
-
-/**
- * Returns a representation of the pipeline ready for serialisation.
- *
- * Logs a warning if the function has not been registered.
- *
- * @returns {Array}
- */
-lunr.Pipeline.prototype.toJSON = function () {
-  return this._stack.map(function (fn) {
-    lunr.Pipeline.warnIfFunctionNotRegistered(fn)
-
-    return fn.label
-  })
-}
-/*!
- * lunr.Vector
- * Copyright (C) 2018 Oliver Nightingale
- */
-
-/**
- * A vector is used to construct the vector space of documents and queries. These
- * vectors support operations to determine the similarity between two documents or
- * a document and a query.
- *
- * Normally no parameters are required for initializing a vector, but in the case of
- * loading a previously dumped vector the raw elements can be provided to the constructor.
- *
- * For performance reasons vectors are implemented with a flat array, where an elements
- * index is immediately followed by its value. E.g. [index, value, index, value]. This
- * allows the underlying array to be as sparse as possible and still offer decent
- * performance when being used for vector calculations.
- *
- * @constructor
- * @param {Number[]} [elements] - The flat list of element index and element value pairs.
- */
-lunr.Vector = function (elements) {
-  this._magnitude = 0
-  this.elements = elements || []
-}
-
-
-/**
- * Calculates the position within the vector to insert a given index.
- *
- * This is used internally by insert and upsert. If there are duplicate indexes then
- * the position is returned as if the value for that index were to be updated, but it
- * is the callers responsibility to check whether there is a duplicate at that index
- *
- * @param {Number} insertIdx - The index at which the element should be inserted.
- * @returns {Number}
- */
-lunr.Vector.prototype.positionForIndex = function (index) {
-  // For an empty vector the tuple can be inserted at the beginning
-  if (this.elements.length == 0) {
-    return 0
-  }
-
-  var start = 0,
-      end = this.elements.length / 2,
-      sliceLength = end - start,
-      pivotPoint = Math.floor(sliceLength / 2),
-      pivotIndex = this.elements[pivotPoint * 2]
-
-  while (sliceLength > 1) {
-    if (pivotIndex < index) {
-      start = pivotPoint
-    }
-
-    if (pivotIndex > index) {
-      end = pivotPoint
-    }
-
-    if (pivotIndex == index) {
-      break
-    }
-
-    sliceLength = end - start
-    pivotPoint = start + Math.floor(sliceLength / 2)
-    pivotIndex = this.elements[pivotPoint * 2]
-  }
-
-  if (pivotIndex == index) {
-    return pivotPoint * 2
-  }
-
-  if (pivotIndex > index) {
-    return pivotPoint * 2
-  }
-
-  if (pivotIndex < index) {
-    return (pivotPoint + 1) * 2
-  }
-}
-
-/**
- * Inserts an element at an index within the vector.
- *
- * Does not allow duplicates, will throw an error if there is already an entry
- * for this index.
- *
- * @param {Number} insertIdx - The index at which the element should be inserted.
- * @param {Number} val - The value to be inserted into the vector.
- */
-lunr.Vector.prototype.insert = function (insertIdx, val) {
-  this.upsert(insertIdx, val, function () {
-    throw "duplicate index"
-  })
-}
-
-/**
- * Inserts or updates an existing index within the vector.
- *
- * @param {Number} insertIdx - The index at which the element should be inserted.
- * @param {Number} val - The value to be inserted into the vector.
- * @param {function} fn - A function that is called for updates, the existing value and the
- * requested value are passed as arguments
- */
-lunr.Vector.prototype.upsert = function (insertIdx, val, fn) {
-  this._magnitude = 0
-  var position = this.positionForIndex(insertIdx)
-
-  if (this.elements[position] == insertIdx) {
-    this.elements[position + 1] = fn(this.elements[position + 1], val)
-  } else {
-    this.elements.splice(position, 0, insertIdx, val)
-  }
-}
-
-/**
- * Calculates the magnitude of this vector.
- *
- * @returns {Number}
- */
-lunr.Vector.prototype.magnitude = function () {
-  if (this._magnitude) return this._magnitude
-
-  var sumOfSquares = 0,
-      elementsLength = this.elements.length
-
-  for (var i = 1; i < elementsLength; i += 2) {
-    var val = this.elements[i]
-    sumOfSquares += val * val
-  }
-
-  return this._magnitude = Math.sqrt(sumOfSquares)
-}
-
-/**
- * Calculates the dot product of this vector and another vector.
- *
- * @param {lunr.Vector} otherVector - The vector to compute the dot product with.
- * @returns {Number}
- */
-lunr.Vector.prototype.dot = function (otherVector) {
-  var dotProduct = 0,
-      a = this.elements, b = otherVector.elements,
-      aLen = a.length, bLen = b.length,
-      aVal = 0, bVal = 0,
-      i = 0, j = 0
-
-  while (i < aLen && j < bLen) {
-    aVal = a[i], bVal = b[j]
-    if (aVal < bVal) {
-      i += 2
-    } else if (aVal > bVal) {
-      j += 2
-    } else if (aVal == bVal) {
-      dotProduct += a[i + 1] * b[j + 1]
-      i += 2
-      j += 2
-    }
-  }
-
-  return dotProduct
-}
-
-/**
- * Calculates the cosine similarity between this vector and another
- * vector.
- *
- * @param {lunr.Vector} otherVector - The other vector to calculate the
- * similarity with.
- * @returns {Number}
- */
-lunr.Vector.prototype.similarity = function (otherVector) {
-  return this.dot(otherVector) / (this.magnitude() * otherVector.magnitude())
-}
-
-/**
- * Converts the vector to an array of the elements within the vector.
- *
- * @returns {Number[]}
- */
-lunr.Vector.prototype.toArray = function () {
-  var output = new Array (this.elements.length / 2)
-
-  for (var i = 1, j = 0; i < this.elements.length; i += 2, j++) {
-    output[j] = this.elements[i]
-  }
-
-  return output
-}
-
-/**
- * A JSON serializable representation of the vector.
- *
- * @returns {Number[]}
- */
-lunr.Vector.prototype.toJSON = function () {
-  return this.elements
-}
-/* eslint-disable */
-/*!
- * lunr.stemmer
- * Copyright (C) 2018 Oliver Nightingale
- * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt
- */
-
-/**
- * lunr.stemmer is an english language stemmer, this is a JavaScript
- * implementation of the PorterStemmer taken from http://tartarus.org/~martin
- *
- * @static
- * @implements {lunr.PipelineFunction}
- * @param {lunr.Token} token - The string to stem
- * @returns {lunr.Token}
- * @see {@link lunr.Pipeline}
- */
-lunr.stemmer = (function(){
-  var step2list = {
-      "ational" : "ate",
-      "tional" : "tion",
-      "enci" : "ence",
-      "anci" : "ance",
-      "izer" : "ize",
-      "bli" : "ble",
-      "alli" : "al",
-      "entli" : "ent",
-      "eli" : "e",
-      "ousli" : "ous",
-      "ization" : "ize",
-      "ation" : "ate",
-      "ator" : "ate",
-      "alism" : "al",
-      "iveness" : "ive",
-      "fulness" : "ful",
-      "ousness" : "ous",
-      "aliti" : "al",
-      "iviti" : "ive",
-      "biliti" : "ble",
-      "logi" : "log"
-    },
-
-    step3list = {
-      "icate" : "ic",
-      "ative" : "",
-      "alize" : "al",
-      "iciti" : "ic",
-      "ical" : "ic",
-      "ful" : "",
-      "ness" : ""
-    },
-
-    c = "[^aeiou]",          // consonant
-    v = "[aeiouy]",          // vowel
-    C = c + "[^aeiouy]*",    // consonant sequence
-    V = v + "[aeiou]*",      // vowel sequence
-
-    mgr0 = "^(" + C + ")?" + V + C,               // [C]VC... is m>0
-    meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$",  // [C]VC[V] is m=1
-    mgr1 = "^(" + C + ")?" + V + C + V + C,       // [C]VCVC... is m>1
-    s_v = "^(" + C + ")?" + v;                   // vowel in stem
-
-  var re_mgr0 = new RegExp(mgr0);
-  var re_mgr1 = new RegExp(mgr1);
-  var re_meq1 = new RegExp(meq1);
-  var re_s_v = new RegExp(s_v);
-
-  var re_1a = /^(.+?)(ss|i)es$/;
-  var re2_1a = /^(.+?)([^s])s$/;
-  var re_1b = /^(.+?)eed$/;
-  var re2_1b = /^(.+?)(ed|ing)$/;
-  var re_1b_2 = /.$/;
-  var re2_1b_2 = /(at|bl|iz)$/;
-  var re3_1b_2 = new RegExp("([^aeiouylsz])\\1$");
-  var re4_1b_2 = new RegExp("^" + C + v + "[^aeiouwxy]$");
-
-  var re_1c = /^(.+?[^aeiou])y$/;
-  var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
-
-  var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
-
-  var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
-  var re2_4 = /^(.+?)(s|t)(ion)$/;
-
-  var re_5 = /^(.+?)e$/;
-  var re_5_1 = /ll$/;
-  var re3_5 = new RegExp("^" + C + v + "[^aeiouwxy]$");
-
-  var porterStemmer = function porterStemmer(w) {
-    var stem,
-      suffix,
-      firstch,
-      re,
-      re2,
-      re3,
-      re4;
-
-    if (w.length < 3) { return w; }
-
-    firstch = w.substr(0,1);
-    if (firstch == "y") {
-      w = firstch.toUpperCase() + w.substr(1);
-    }
-
-    // Step 1a
-    re = re_1a
-    re2 = re2_1a;
-
-    if (re.test(w)) { w = w.replace(re,"$1$2"); }
-    else if (re2.test(w)) { w = w.replace(re2,"$1$2"); }
-
-    // Step 1b
-    re = re_1b;
-    re2 = re2_1b;
-    if (re.test(w)) {
-      var fp = re.exec(w);
-      re = re_mgr0;
-      if (re.test(fp[1])) {
-        re = re_1b_2;
-        w = w.replace(re,"");
-      }
-    } else if (re2.test(w)) {
-      var fp = re2.exec(w);
-      stem = fp[1];
-      re2 = re_s_v;
-      if (re2.test(stem)) {
-        w = stem;
-        re2 = re2_1b_2;
-        re3 = re3_1b_2;
-        re4 = re4_1b_2;
-        if (re2.test(w)) { w = w + "e"; }
-        else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,""); }
-        else if (re4.test(w)) { w = w + "e"; }
-      }
-    }
-
-    // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say)
-    re = re_1c;
-    if (re.test(w)) {
-      var fp = re.exec(w);
-      stem = fp[1];
-      w = stem + "i";
-    }
-
-    // Step 2
-    re = re_2;
-    if (re.test(w)) {
-      var fp = re.exec(w);
-      stem = fp[1];
-      suffix = fp[2];
-      re = re_mgr0;
-      if (re.test(stem)) {
-        w = stem + step2list[suffix];
-      }
-    }
-
-    // Step 3
-    re = re_3;
-    if (re.test(w)) {
-      var fp = re.exec(w);
-      stem = fp[1];
-      suffix = fp[2];
-      re = re_mgr0;
-      if (re.test(stem)) {
-        w = stem + step3list[suffix];
-      }
-    }
-
-    // Step 4
-    re = re_4;
-    re2 = re2_4;
-    if (re.test(w)) {
-      var fp = re.exec(w);
-      stem = fp[1];
-      re = re_mgr1;
-      if (re.test(stem)) {
-        w = stem;
-      }
-    } else if (re2.test(w)) {
-      var fp = re2.exec(w);
-      stem = fp[1] + fp[2];
-      re2 = re_mgr1;
-      if (re2.test(stem)) {
-        w = stem;
-      }
-    }
-
-    // Step 5
-    re = re_5;
-    if (re.test(w)) {
-      var fp = re.exec(w);
-      stem = fp[1];
-      re = re_mgr1;
-      re2 = re_meq1;
-      re3 = re3_5;
-      if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
-        w = stem;
-      }
-    }
-
-    re = re_5_1;
-    re2 = re_mgr1;
-    if (re.test(w) && re2.test(w)) {
-      re = re_1b_2;
-      w = w.replace(re,"");
-    }
-
-    // and turn initial Y back to y
-
-    if (firstch == "y") {
-      w = firstch.toLowerCase() + w.substr(1);
-    }
-
-    return w;
-  };
-
-  return function (token) {
-    return token.update(porterStemmer);
-  }
-})();
-
-lunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer')
-/*!
- * lunr.stopWordFilter
- * Copyright (C) 2018 Oliver Nightingale
- */
-
-/**
- * lunr.generateStopWordFilter builds a stopWordFilter function from the provided
- * list of stop words.
- *
- * The built in lunr.stopWordFilter is built using this generator and can be used
- * to generate custom stopWordFilters for applications or non English languages.
- *
- * @param {Array} token The token to pass through the filter
- * @returns {lunr.PipelineFunction}
- * @see lunr.Pipeline
- * @see lunr.stopWordFilter
- */
-lunr.generateStopWordFilter = function (stopWords) {
-  var words = stopWords.reduce(function (memo, stopWord) {
-    memo[stopWord] = stopWord
-    return memo
-  }, {})
-
-  return function (token) {
-    if (token && words[token.toString()] !== token.toString()) return token
-  }
-}
-
-/**
- * lunr.stopWordFilter is an English language stop word list filter, any words
- * contained in the list will not be passed through the filter.
- *
- * This is intended to be used in the Pipeline. If the token does not pass the
- * filter then undefined will be returned.
- *
- * @implements {lunr.PipelineFunction}
- * @params {lunr.Token} token - A token to check for being a stop word.
- * @returns {lunr.Token}
- * @see {@link lunr.Pipeline}
- */
-lunr.stopWordFilter = lunr.generateStopWordFilter([
-  'a',
-  'able',
-  'about',
-  'across',
-  'after',
-  'all',
-  'almost',
-  'also',
-  'am',
-  'among',
-  'an',
-  'and',
-  'any',
-  'are',
-  'as',
-  'at',
-  'be',
-  'because',
-  'been',
-  'but',
-  'by',
-  'can',
-  'cannot',
-  'could',
-  'dear',
-  'did',
-  'do',
-  'does',
-  'either',
-  'else',
-  'ever',
-  'every',
-  'for',
-  'from',
-  'get',
-  'got',
-  'had',
-  'has',
-  'have',
-  'he',
-  'her',
-  'hers',
-  'him',
-  'his',
-  'how',
-  'however',
-  'i',
-  'if',
-  'in',
-  'into',
-  'is',
-  'it',
-  'its',
-  'just',
-  'least',
-  'let',
-  'like',
-  'likely',
-  'may',
-  'me',
-  'might',
-  'most',
-  'must',
-  'my',
-  'neither',
-  'no',
-  'nor',
-  'not',
-  'of',
-  'off',
-  'often',
-  'on',
-  'only',
-  'or',
-  'other',
-  'our',
-  'own',
-  'rather',
-  'said',
-  'say',
-  'says',
-  'she',
-  'should',
-  'since',
-  'so',
-  'some',
-  'than',
-  'that',
-  'the',
-  'their',
-  'them',
-  'then',
-  'there',
-  'these',
-  'they',
-  'this',
-  'tis',
-  'to',
-  'too',
-  'twas',
-  'us',
-  'wants',
-  'was',
-  'we',
-  'were',
-  'what',
-  'when',
-  'where',
-  'which',
-  'while',
-  'who',
-  'whom',
-  'why',
-  'will',
-  'with',
-  'would',
-  'yet',
-  'you',
-  'your'
-])
-
-lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter')
-/*!
- * lunr.trimmer
- * Copyright (C) 2018 Oliver Nightingale
- */
-
-/**
- * lunr.trimmer is a pipeline function for trimming non word
- * characters from the beginning and end of tokens before they
- * enter the index.
- *
- * This implementation may not work correctly for non latin
- * characters and should either be removed or adapted for use
- * with languages with non-latin characters.
- *
- * @static
- * @implements {lunr.PipelineFunction}
- * @param {lunr.Token} token The token to pass through the filter
- * @returns {lunr.Token}
- * @see lunr.Pipeline
- */
-lunr.trimmer = function (token) {
-  return token.update(function (s) {
-    return s.replace(/^\W+/, '').replace(/\W+$/, '')
-  })
-}
-
-lunr.Pipeline.registerFunction(lunr.trimmer, 'trimmer')
-/*!
- * lunr.TokenSet
- * Copyright (C) 2018 Oliver Nightingale
- */
-
-/**
- * A token set is used to store the unique list of all tokens
- * within an index. Token sets are also used to represent an
- * incoming query to the index, this query token set and index
- * token set are then intersected to find which tokens to look
- * up in the inverted index.
- *
- * A token set can hold multiple tokens, as in the case of the
- * index token set, or it can hold a single token as in the
- * case of a simple query token set.
- *
- * Additionally token sets are used to perform wildcard matching.
- * Leading, contained and trailing wildcards are supported, and
- * from this edit distance matching can also be provided.
- *
- * Token sets are implemented as a minimal finite state automata,
- * where both common prefixes and suffixes are shared between tokens.
- * This helps to reduce the space used for storing the token set.
- *
- * @constructor
- */
-lunr.TokenSet = function () {
-  this.final = false
-  this.edges = {}
-  this.id = lunr.TokenSet._nextId
-  lunr.TokenSet._nextId += 1
-}
-
-/**
- * Keeps track of the next, auto increment, identifier to assign
- * to a new tokenSet.
- *
- * TokenSets require a unique identifier to be correctly minimised.
- *
- * @private
- */
-lunr.TokenSet._nextId = 1
-
-/**
- * Creates a TokenSet instance from the given sorted array of words.
- *
- * @param {String[]} arr - A sorted array of strings to create the set from.
- * @returns {lunr.TokenSet}
- * @throws Will throw an error if the input array is not sorted.
- */
-lunr.TokenSet.fromArray = function (arr) {
-  var builder = new lunr.TokenSet.Builder
-
-  for (var i = 0, len = arr.length; i < len; i++) {
-    builder.insert(arr[i])
-  }
-
-  builder.finish()
-  return builder.root
-}
-
-/**
- * Creates a token set from a query clause.
- *
- * @private
- * @param {Object} clause - A single clause from lunr.Query.
- * @param {string} clause.term - The query clause term.
- * @param {number} [clause.editDistance] - The optional edit distance for the term.
- * @returns {lunr.TokenSet}
- */
-lunr.TokenSet.fromClause = function (clause) {
-  if ('editDistance' in clause) {
-    return lunr.TokenSet.fromFuzzyString(clause.term, clause.editDistance)
-  } else {
-    return lunr.TokenSet.fromString(clause.term)
-  }
-}
-
-/**
- * Creates a token set representing a single string with a specified
- * edit distance.
- *
- * Insertions, deletions, substitutions and transpositions are each
- * treated as an edit distance of 1.
- *
- * Increasing the allowed edit distance will have a dramatic impact
- * on the performance of both creating and intersecting these TokenSets.
- * It is advised to keep the edit distance less than 3.
- *
- * @param {string} str - The string to create the token set from.
- * @param {number} editDistance - The allowed edit distance to match.
- * @returns {lunr.Vector}
- */
-lunr.TokenSet.fromFuzzyString = function (str, editDistance) {
-  var root = new lunr.TokenSet
-
-  var stack = [{
-    node: root,
-    editsRemaining: editDistance,
-    str: str
-  }]
-
-  while (stack.length) {
-    var frame = stack.pop()
-
-    // no edit
-    if (frame.str.length > 0) {
-      var char = frame.str.charAt(0),
-          noEditNode
-
-      if (char in frame.node.edges) {
-        noEditNode = frame.node.edges[char]
-      } else {
-        noEditNode = new lunr.TokenSet
-        frame.node.edges[char] = noEditNode
-      }
-
-      if (frame.str.length == 1) {
-        noEditNode.final = true
-      } else {
-        stack.push({
-          node: noEditNode,
-          editsRemaining: frame.editsRemaining,
-          str: frame.str.slice(1)
-        })
-      }
-    }
-
-    // deletion
-    // can only do a deletion if we have enough edits remaining
-    // and if there are characters left to delete in the string
-    if (frame.editsRemaining > 0 && frame.str.length > 1) {
-      var char = frame.str.charAt(1),
-          deletionNode
-
-      if (char in frame.node.edges) {
-        deletionNode = frame.node.edges[char]
-      } else {
-        deletionNode = new lunr.TokenSet
-        frame.node.edges[char] = deletionNode
-      }
-
-      if (frame.str.length <= 2) {
-        deletionNode.final = true
-      } else {
-        stack.push({
-          node: deletionNode,
-          editsRemaining: frame.editsRemaining - 1,
-          str: frame.str.slice(2)
-        })
-      }
-    }
-
-    // deletion
-    // just removing the last character from the str
-    if (frame.editsRemaining > 0 && frame.str.length == 1) {
-      frame.node.final = true
-    }
-
-    // substitution
-    // can only do a substitution if we have enough edits remaining
-    // and if there are characters left to substitute
-    if (frame.editsRemaining > 0 && frame.str.length >= 1) {
-      if ("*" in frame.node.edges) {
-        var substitutionNode = frame.node.edges["*"]
-      } else {
-        var substitutionNode = new lunr.TokenSet
-        frame.node.edges["*"] = substitutionNode
-      }
-
-      if (frame.str.length == 1) {
-        substitutionNode.final = true
-      } else {
-        stack.push({
-          node: substitutionNode,
-          editsRemaining: frame.editsRemaining - 1,
-          str: frame.str.slice(1)
-        })
-      }
-    }
-
-    // insertion
-    // can only do insertion if there are edits remaining
-    if (frame.editsRemaining > 0) {
-      if ("*" in frame.node.edges) {
-        var insertionNode = frame.node.edges["*"]
-      } else {
-        var insertionNode = new lunr.TokenSet
-        frame.node.edges["*"] = insertionNode
-      }
-
-      if (frame.str.length == 0) {
-        insertionNode.final = true
-      } else {
-        stack.push({
-          node: insertionNode,
-          editsRemaining: frame.editsRemaining - 1,
-          str: frame.str
-        })
-      }
-    }
-
-    // transposition
-    // can only do a transposition if there are edits remaining
-    // and there are enough characters to transpose
-    if (frame.editsRemaining > 0 && frame.str.length > 1) {
-      var charA = frame.str.charAt(0),
-          charB = frame.str.charAt(1),
-          transposeNode
-
-      if (charB in frame.node.edges) {
-        transposeNode = frame.node.edges[charB]
-      } else {
-        transposeNode = new lunr.TokenSet
-        frame.node.edges[charB] = transposeNode
-      }
-
-      if (frame.str.length == 1) {
-        transposeNode.final = true
-      } else {
-        stack.push({
-          node: transposeNode,
-          editsRemaining: frame.editsRemaining - 1,
-          str: charA + frame.str.slice(2)
-        })
-      }
-    }
-  }
-
-  return root
-}
-
-/**
- * Creates a TokenSet from a string.
- *
- * The string may contain one or more wildcard characters (*)
- * that will allow wildcard matching when intersecting with
- * another TokenSet.
- *
- * @param {string} str - The string to create a TokenSet from.
- * @returns {lunr.TokenSet}
- */
-lunr.TokenSet.fromString = function (str) {
-  var node = new lunr.TokenSet,
-      root = node,
-      wildcardFound = false
-
-  /*
-   * Iterates through all characters within the passed string
-   * appending a node for each character.
-   *
-   * As soon as a wildcard character is found then a self
-   * referencing edge is introduced to continually match
-   * any number of any characters.
-   */
-  for (var i = 0, len = str.length; i < len; i++) {
-    var char = str[i],
-        final = (i == len - 1)
-
-    if (char == "*") {
-      wildcardFound = true
-      node.edges[char] = node
-      node.final = final
-
-    } else {
-      var next = new lunr.TokenSet
-      next.final = final
-
-      node.edges[char] = next
-      node = next
-
-      // TODO: is this needed anymore?
-      if (wildcardFound) {
-        node.edges["*"] = root
-      }
-    }
-  }
-
-  return root
-}
-
-/**
- * Converts this TokenSet into an array of strings
- * contained within the TokenSet.
- *
- * @returns {string[]}
- */
-lunr.TokenSet.prototype.toArray = function () {
-  var words = []
-
-  var stack = [{
-    prefix: "",
-    node: this
-  }]
-
-  while (stack.length) {
-    var frame = stack.pop(),
-        edges = Object.keys(frame.node.edges),
-        len = edges.length
-
-    if (frame.node.final) {
-      words.push(frame.prefix)
-    }
-
-    for (var i = 0; i < len; i++) {
-      var edge = edges[i]
-
-      stack.push({
-        prefix: frame.prefix.concat(edge),
-        node: frame.node.edges[edge]
-      })
-    }
-  }
-
-  return words
-}
-
-/**
- * Generates a string representation of a TokenSet.
- *
- * This is intended to allow TokenSets to be used as keys
- * in objects, largely to aid the construction and minimisation
- * of a TokenSet. As such it is not designed to be a human
- * friendly representation of the TokenSet.
- *
- * @returns {string}
- */
-lunr.TokenSet.prototype.toString = function () {
-  // NOTE: Using Object.keys here as this.edges is very likely
-  // to enter 'hash-mode' with many keys being added
-  //
-  // avoiding a for-in loop here as it leads to the function
-  // being de-optimised (at least in V8). From some simple
-  // benchmarks the performance is comparable, but allowing
-  // V8 to optimize may mean easy performance wins in the future.
-
-  if (this._str) {
-    return this._str
-  }
-
-  var str = this.final ? '1' : '0',
-      labels = Object.keys(this.edges).sort(),
-      len = labels.length
-
-  for (var i = 0; i < len; i++) {
-    var label = labels[i],
-        node = this.edges[label]
-
-    str = str + label + node.id
-  }
-
-  return str
-}
-
-/**
- * Returns a new TokenSet that is the intersection of
- * this TokenSet and the passed TokenSet.
- *
- * This intersection will take into account any wildcards
- * contained within the TokenSet.
- *
- * @param {lunr.TokenSet} b - An other TokenSet to intersect with.
- * @returns {lunr.TokenSet}
- */
-lunr.TokenSet.prototype.intersect = function (b) {
-  var output = new lunr.TokenSet,
-      frame = undefined
-
-  var stack = [{
-    qNode: b,
-    output: output,
-    node: this
-  }]
-
-  while (stack.length) {
-    frame = stack.pop()
-
-    // NOTE: As with the #toString method, we are using
-    // Object.keys and a for loop instead of a for-in loop
-    // as both of these objects enter 'hash' mode, causing
-    // the function to be de-optimised in V8
-    var qEdges = Object.keys(frame.qNode.edges),
-        qLen = qEdges.length,
-        nEdges = Object.keys(frame.node.edges),
-        nLen = nEdges.length
-
-    for (var q = 0; q < qLen; q++) {
-      var qEdge = qEdges[q]
-
-      for (var n = 0; n < nLen; n++) {
-        var nEdge = nEdges[n]
-
-        if (nEdge == qEdge || qEdge == '*') {
-          var node = frame.node.edges[nEdge],
-              qNode = frame.qNode.edges[qEdge],
-              final = node.final && qNode.final,
-              next = undefined
-
-          if (nEdge in frame.output.edges) {
-            // an edge already exists for this character
-            // no need to create a new node, just set the finality
-            // bit unless this node is already final
-            next = frame.output.edges[nEdge]
-            next.final = next.final || final
-
-          } else {
-            // no edge exists yet, must create one
-            // set the finality bit and insert it
-            // into the output
-            next = new lunr.TokenSet
-            next.final = final
-            frame.output.edges[nEdge] = next
-          }
-
-          stack.push({
-            qNode: qNode,
-            output: next,
-            node: node
-          })
-        }
-      }
-    }
-  }
-
-  return output
-}
-lunr.TokenSet.Builder = function () {
-  this.previousWord = ""
-  this.root = new lunr.TokenSet
-  this.uncheckedNodes = []
-  this.minimizedNodes = {}
-}
-
-lunr.TokenSet.Builder.prototype.insert = function (word) {
-  var node,
-      commonPrefix = 0
-
-  if (word < this.previousWord) {
-    throw new Error ("Out of order word insertion")
-  }
-
-  for (var i = 0; i < word.length && i < this.previousWord.length; i++) {
-    if (word[i] != this.previousWord[i]) break
-    commonPrefix++
-  }
-
-  this.minimize(commonPrefix)
-
-  if (this.uncheckedNodes.length == 0) {
-    node = this.root
-  } else {
-    node = this.uncheckedNodes[this.uncheckedNodes.length - 1].child
-  }
-
-  for (var i = commonPrefix; i < word.length; i++) {
-    var nextNode = new lunr.TokenSet,
-        char = word[i]
-
-    node.edges[char] = nextNode
-
-    this.uncheckedNodes.push({
-      parent: node,
-      char: char,
-      child: nextNode
-    })
-
-    node = nextNode
-  }
-
-  node.final = true
-  this.previousWord = word
-}
-
-lunr.TokenSet.Builder.prototype.finish = function () {
-  this.minimize(0)
-}
-
-lunr.TokenSet.Builder.prototype.minimize = function (downTo) {
-  for (var i = this.uncheckedNodes.length - 1; i >= downTo; i--) {
-    var node = this.uncheckedNodes[i],
-        childKey = node.child.toString()
-
-    if (childKey in this.minimizedNodes) {
-      node.parent.edges[node.char] = this.minimizedNodes[childKey]
-    } else {
-      // Cache the key for this node since
-      // we know it can't change anymore
-      node.child._str = childKey
-
-      this.minimizedNodes[childKey] = node.child
-    }
-
-    this.uncheckedNodes.pop()
-  }
-}
-/*!
- * lunr.Index
- * Copyright (C) 2018 Oliver Nightingale
- */
-
-/**
- * An index contains the built index of all documents and provides a query interface
- * to the index.
- *
- * Usually instances of lunr.Index will not be created using this constructor, instead
- * lunr.Builder should be used to construct new indexes, or lunr.Index.load should be
- * used to load previously built and serialized indexes.
- *
- * @constructor
- * @param {Object} attrs - The attributes of the built search index.
- * @param {Object} attrs.invertedIndex - An index of term/field to document reference.
- * @param {Object<string, lunr.Vector>} attrs.documentVectors - Document vectors keyed by document reference.
- * @param {lunr.TokenSet} attrs.tokenSet - An set of all corpus tokens.
- * @param {string[]} attrs.fields - The names of indexed document fields.
- * @param {lunr.Pipeline} attrs.pipeline - The pipeline to use for search terms.
- */
-lunr.Index = function (attrs) {
-  this.invertedIndex = attrs.invertedIndex
-  this.fieldVectors = attrs.fieldVectors
-  this.tokenSet = attrs.tokenSet
-  this.fields = attrs.fields
-  this.pipeline = attrs.pipeline
-}
-
-/**
- * A result contains details of a document matching a search query.
- * @typedef {Object} lunr.Index~Result
- * @property {string} ref - The reference of the document this result represents.
- * @property {number} score - A number between 0 and 1 representing how similar this document is to the query.
- * @property {lunr.MatchData} matchData - Contains metadata about this match including which term(s) caused the match.
- */
-
-/**
- * Although lunr provides the ability to create queries using lunr.Query, it also provides a simple
- * query language which itself is parsed into an instance of lunr.Query.
- *
- * For programmatically building queries it is advised to directly use lunr.Query, the query language
- * is best used for human entered text rather than program generated text.
- *
- * At its simplest queries can just be a single term, e.g. `hello`, multiple terms are also supported
- * and will be combined with OR, e.g `hello world` will match documents that contain either 'hello'
- * or 'world', though those that contain both will rank higher in the results.
- *
- * Wildcards can be included in terms to match one or more unspecified characters, these wildcards can
- * be inserted anywhere within the term, and more than one wildcard can exist in a single term. Adding
- * wildcards will increase the number of documents that will be found but can also have a negative
- * impact on query performance, especially with wildcards at the beginning of a term.
- *
- * Terms can be restricted to specific fields, e.g. `title:hello`, only documents with the term
- * hello in the title field will match this query. Using a field not present in the index will lead
- * to an error being thrown.
- *
- * Modifiers can also be added to terms, lunr supports edit distance and boost modifiers on terms. A term
- * boost will make documents matching that term score higher, e.g. `foo^5`. Edit distance is also supported
- * to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2.
- * Avoid large values for edit distance to improve query performance.
- *
- * To escape special characters the backslash character '\' can be used, this allows searches to include
- * characters that would normally be considered modifiers, e.g. `foo\~2` will search for a term "foo~2" instead
- * of attempting to apply a boost of 2 to the search term "foo".
- *
- * @typedef {string} lunr.Index~QueryString
- * @example <caption>Simple single term query</caption>
- * hello
- * @example <caption>Multiple term query</caption>
- * hello world
- * @example <caption>term scoped to a field</caption>
- * title:hello
- * @example <caption>term with a boost of 10</caption>
- * hello^10
- * @example <caption>term with an edit distance of 2</caption>
- * hello~2
- */
-
-/**
- * Performs a search against the index using lunr query syntax.
- *
- * Results will be returned sorted by their score, the most relevant results
- * will be returned first.
- *
- * For more programmatic querying use lunr.Index#query.
- *
- * @param {lunr.Index~QueryString} queryString - A string containing a lunr query.
- * @throws {lunr.QueryParseError} If the passed query string cannot be parsed.
- * @returns {lunr.Index~Result[]}
- */
-lunr.Index.prototype.search = function (queryString) {
-  return this.query(function (query) {
-    var parser = new lunr.QueryParser(queryString, query)
-    parser.parse()
-  })
-}
-
-/**
- * A query builder callback provides a query object to be used to express
- * the query to perform on the index.
- *
- * @callback lunr.Index~queryBuilder
- * @param {lunr.Query} query - The query object to build up.
- * @this lunr.Query
- */
-
-/**
- * Performs a query against the index using the yielded lunr.Query object.
- *
- * If performing programmatic queries against the index, this method is preferred
- * over lunr.Index#search so as to avoid the additional query parsing overhead.
- *
- * A query object is yielded to the supplied function which should be used to
- * express the query to be run against the index.
- *
- * Note that although this function takes a callback parameter it is _not_ an
- * asynchronous operation, the callback is just yielded a query object to be
- * customized.
- *
- * @param {lunr.Index~queryBuilder} fn - A function that is used to build the query.
- * @returns {lunr.Index~Result[]}
- */
-lunr.Index.prototype.query = function (fn) {
-  // for each query clause
-  // * process terms
-  // * expand terms from token set
-  // * find matching documents and metadata
-  // * get document vectors
-  // * score documents
-
-  var query = new lunr.Query(this.fields),
-      matchingFields = Object.create(null),
-      queryVectors = Object.create(null),
-      termFieldCache = Object.create(null)
-
-  fn.call(query, query)
-
-  for (var i = 0; i < query.clauses.length; i++) {
-    /*
-     * Unless the pipeline has been disabled for this term, which is
-     * the case for terms with wildcards, we need to pass the clause
-     * term through the search pipeline. A pipeline returns an array
-     * of processed terms. Pipeline functions may expand the passed
-     * term, which means we may end up performing multiple index lookups
-     * for a single query term.
-     */
-    var clause = query.clauses[i],
-        terms = null
-
-    if (clause.usePipeline) {
-      terms = this.pipeline.runString(clause.term)
-    } else {
-      terms = [clause.term]
-    }
-
-    for (var m = 0; m < terms.length; m++) {
-      var term = terms[m]
-
-      /*
-       * Each term returned from the pipeline needs to use the same query
-       * clause object, e.g. the same boost and or edit distance. The
-       * simplest way to do this is to re-use the clause object but mutate
-       * its term property.
-       */
-      clause.term = term
-
-      /*
-       * From the term in the clause we create a token set which will then
-       * be used to intersect the indexes token set to get a list of terms
-       * to lookup in the inverted index
-       */
-      var termTokenSet = lunr.TokenSet.fromClause(clause),
-          expandedTerms = this.tokenSet.intersect(termTokenSet).toArray()
-
-      for (var j = 0; j < expandedTerms.length; j++) {
-        /*
-         * For each term get the posting and termIndex, this is required for
-         * building the query vector.
-         */
-        var expandedTerm = expandedTerms[j],
-            posting = this.invertedIndex[expandedTerm],
-            termIndex = posting._index
-
-        for (var k = 0; k < clause.fields.length; k++) {
-          /*
-           * For each field that this query term is scoped by (by default
-           * all fields are in scope) we need to get all the document refs
-           * that have this term in that field.
-           *
-           * The posting is the entry in the invertedIndex for the matching
-           * term from above.
-           */
-          var field = clause.fields[k],
-              fieldPosting = posting[field],
-              matchingDocumentRefs = Object.keys(fieldPosting),
-              termField = expandedTerm + "/" + field
-
-          /*
-           * To support field level boosts a query vector is created per
-           * field. This vector is populated using the termIndex found for
-           * the term and a unit value with the appropriate boost applied.
-           *
-           * If the query vector for this field does not exist yet it needs
-           * to be created.
-           */
-          if (queryVectors[field] === undefined) {
-            queryVectors[field] = new lunr.Vector
-          }
-
-          /*
-           * Using upsert because there could already be an entry in the vector
-           * for the term we are working with. In that case we just add the scores
-           * together.
-           */
-          queryVectors[field].upsert(termIndex, 1 * clause.boost, function (a, b) { return a + b })
-
-          /**
-           * If we've already seen this term, field combo then we've already collected
-           * the matching documents and metadata, no need to go through all that again
-           */
-          if (termFieldCache[termField]) {
-            continue
-          }
-
-          for (var l = 0; l < matchingDocumentRefs.length; l++) {
-            /*
-             * All metadata for this term/field/document triple
-             * are then extracted and collected into an instance
-             * of lunr.MatchData ready to be returned in the query
-             * results
-             */
-            var matchingDocumentRef = matchingDocumentRefs[l],
-                matchingFieldRef = new lunr.FieldRef (matchingDocumentRef, field),
-                metadata = fieldPosting[matchingDocumentRef],
-                fieldMatch
-
-            if ((fieldMatch = matchingFields[matchingFieldRef]) === undefined) {
-              matchingFields[matchingFieldRef] = new lunr.MatchData (expandedTerm, field, metadata)
-            } else {
-              fieldMatch.add(expandedTerm, field, metadata)
-            }
-
-          }
-
-          termFieldCache[termField] = true
-        }
-      }
-    }
-  }
-
-  var matchingFieldRefs = Object.keys(matchingFields),
-      results = [],
-      matches = Object.create(null)
-
-  for (var i = 0; i < matchingFieldRefs.length; i++) {
-    /*
-     * Currently we have document fields that match the query, but we
-     * need to return documents. The matchData and scores are combined
-     * from multiple fields belonging to the same document.
-     *
-     * Scores are calculated by field, using the query vectors created
-     * above, and combined into a final document score using addition.
-     */
-    var fieldRef = lunr.FieldRef.fromString(matchingFieldRefs[i]),
-        docRef = fieldRef.docRef,
-        fieldVector = this.fieldVectors[fieldRef],
-        score = queryVectors[fieldRef.fieldName].similarity(fieldVector),
-        docMatch
-
-    if ((docMatch = matches[docRef]) !== undefined) {
-      docMatch.score += score
-      docMatch.matchData.combine(matchingFields[fieldRef])
-    } else {
-      var match = {
-        ref: docRef,
-        score: score,
-        matchData: matchingFields[fieldRef]
-      }
-      matches[docRef] = match
-      results.push(match)
-    }
-  }
-
-  /*
-   * Sort the results objects by score, highest first.
-   */
-  return results.sort(function (a, b) {
-    return b.score - a.score
-  })
-}
-
-/**
- * Prepares the index for JSON serialization.
- *
- * The schema for this JSON blob will be described in a
- * separate JSON schema file.
- *
- * @returns {Object}
- */
-lunr.Index.prototype.toJSON = function () {
-  var invertedIndex = Object.keys(this.invertedIndex)
-    .sort()
-    .map(function (term) {
-      return [term, this.invertedIndex[term]]
-    }, this)
-
-  var fieldVectors = Object.keys(this.fieldVectors)
-    .map(function (ref) {
-      return [ref, this.fieldVectors[ref].toJSON()]
-    }, this)
-
-  return {
-    version: lunr.version,
-    fields: this.fields,
-    fieldVectors: fieldVectors,
-    invertedIndex: invertedIndex,
-    pipeline: this.pipeline.toJSON()
-  }
-}
-
-/**
- * Loads a previously serialized lunr.Index
- *
- * @param {Object} serializedIndex - A previously serialized lunr.Index
- * @returns {lunr.Index}
- */
-lunr.Index.load = function (serializedIndex) {
-  var attrs = {},
-      fieldVectors = {},
-      serializedVectors = serializedIndex.fieldVectors,
-      invertedIndex = {},
-      serializedInvertedIndex = serializedIndex.invertedIndex,
-      tokenSetBuilder = new lunr.TokenSet.Builder,
-      pipeline = lunr.Pipeline.load(serializedIndex.pipeline)
-
-  if (serializedIndex.version != lunr.version) {
-    lunr.utils.warn("Version mismatch when loading serialised index. Current version of lunr '" + lunr.version + "' does not match serialized index '" + serializedIndex.version + "'")
-  }
-
-  for (var i = 0; i < serializedVectors.length; i++) {
-    var tuple = serializedVectors[i],
-        ref = tuple[0],
-        elements = tuple[1]
-
-    fieldVectors[ref] = new lunr.Vector(elements)
-  }
-
-  for (var i = 0; i < serializedInvertedIndex.length; i++) {
-    var tuple = serializedInvertedIndex[i],
-        term = tuple[0],
-        posting = tuple[1]
-
-    tokenSetBuilder.insert(term)
-    invertedIndex[term] = posting
-  }
-
-  tokenSetBuilder.finish()
-
-  attrs.fields = serializedIndex.fields
-
-  attrs.fieldVectors = fieldVectors
-  attrs.invertedIndex = invertedIndex
-  attrs.tokenSet = tokenSetBuilder.root
-  attrs.pipeline = pipeline
-
-  return new lunr.Index(attrs)
-}
-/*!
- * lunr.Builder
- * Copyright (C) 2018 Oliver Nightingale
- */
-
-/**
- * lunr.Builder performs indexing on a set of documents and
- * returns instances of lunr.Index ready for querying.
- *
- * All configuration of the index is done via the builder, the
- * fields to index, the document reference, the text processing
- * pipeline and document scoring parameters are all set on the
- * builder before indexing.
- *
- * @constructor
- * @property {string} _ref - Internal reference to the document reference field.
- * @property {string[]} _fields - Internal reference to the document fields to index.
- * @property {object} invertedIndex - The inverted index maps terms to document fields.
- * @property {object} documentTermFrequencies - Keeps track of document term frequencies.
- * @property {object} documentLengths - Keeps track of the length of documents added to the index.
- * @property {lunr.tokenizer} tokenizer - Function for splitting strings into tokens for indexing.
- * @property {lunr.Pipeline} pipeline - The pipeline performs text processing on tokens before indexing.
- * @property {lunr.Pipeline} searchPipeline - A pipeline for processing search terms before querying the index.
- * @property {number} documentCount - Keeps track of the total number of documents indexed.
- * @property {number} _b - A parameter to control field length normalization, setting this to 0 disabled normalization, 1 fully normalizes field lengths, the default value is 0.75.
- * @property {number} _k1 - A parameter to control how quickly an increase in term frequency results in term frequency saturation, the default value is 1.2.
- * @property {number} termIndex - A counter incremented for each unique term, used to identify a terms position in the vector space.
- * @property {array} metadataWhitelist - A list of metadata keys that have been whitelisted for entry in the index.
- */
-lunr.Builder = function () {
-  this._ref = "id"
-  this._fields = []
-  this.invertedIndex = Object.create(null)
-  this.fieldTermFrequencies = {}
-  this.fieldLengths = {}
-  this.tokenizer = lunr.tokenizer
-  this.pipeline = new lunr.Pipeline
-  this.searchPipeline = new lunr.Pipeline
-  this.documentCount = 0
-  this._b = 0.75
-  this._k1 = 1.2
-  this.termIndex = 0
-  this.metadataWhitelist = []
-}
-
-/**
- * Sets the document field used as the document reference. Every document must have this field.
- * The type of this field in the document should be a string, if it is not a string it will be
- * coerced into a string by calling toString.
- *
- * The default ref is 'id'.
- *
- * The ref should _not_ be changed during indexing, it should be set before any documents are
- * added to the index. Changing it during indexing can lead to inconsistent results.
- *
- * @param {string} ref - The name of the reference field in the document.
- */
-lunr.Builder.prototype.ref = function (ref) {
-  this._ref = ref
-}
-
-/**
- * Adds a field to the list of document fields that will be indexed. Every document being
- * indexed should have this field. Null values for this field in indexed documents will
- * not cause errors but will limit the chance of that document being retrieved by searches.
- *
- * All fields should be added before adding documents to the index. Adding fields after
- * a document has been indexed will have no effect on already indexed documents.
- *
- * @param {string} field - The name of a field to index in all documents.
- */
-lunr.Builder.prototype.field = function (field) {
-  this._fields.push(field)
-}
-
-/**
- * A parameter to tune the amount of field length normalisation that is applied when
- * calculating relevance scores. A value of 0 will completely disable any normalisation
- * and a value of 1 will fully normalise field lengths. The default is 0.75. Values of b
- * will be clamped to the range 0 - 1.
- *
- * @param {number} number - The value to set for this tuning parameter.
- */
-lunr.Builder.prototype.b = function (number) {
-  if (number < 0) {
-    this._b = 0
-  } else if (number > 1) {
-    this._b = 1
-  } else {
-    this._b = number
-  }
-}
-
-/**
- * A parameter that controls the speed at which a rise in term frequency results in term
- * frequency saturation. The default value is 1.2. Setting this to a higher value will give
- * slower saturation levels, a lower value will result in quicker saturation.
- *
- * @param {number} number - The value to set for this tuning parameter.
- */
-lunr.Builder.prototype.k1 = function (number) {
-  this._k1 = number
-}
-
-/**
- * Adds a document to the index.
- *
- * Before adding fields to the index the index should have been fully setup, with the document
- * ref and all fields to index already having been specified.
- *
- * The document must have a field name as specified by the ref (by default this is 'id') and
- * it should have all fields defined for indexing, though null or undefined values will not
- * cause errors.
- *
- * @param {object} doc - The document to add to the index.
- */
-lunr.Builder.prototype.add = function (doc) {
-  var docRef = doc[this._ref]
-
-  this.documentCount += 1
-
-  for (var i = 0; i < this._fields.length; i++) {
-    var fieldName = this._fields[i],
-        field = doc[fieldName],
-        tokens = this.tokenizer(field),
-        terms = this.pipeline.run(tokens),
-        fieldRef = new lunr.FieldRef (docRef, fieldName),
-        fieldTerms = Object.create(null)
-
-    this.fieldTermFrequencies[fieldRef] = fieldTerms
-    this.fieldLengths[fieldRef] = 0
-
-    // store the length of this field for this document
-    this.fieldLengths[fieldRef] += terms.length
-
-    // calculate term frequencies for this field
-    for (var j = 0; j < terms.length; j++) {
-      var term = terms[j]
-
-      if (fieldTerms[term] == undefined) {
-        fieldTerms[term] = 0
-      }
-
-      fieldTerms[term] += 1
-
-      // add to inverted index
-      // create an initial posting if one doesn't exist
-      if (this.invertedIndex[term] == undefined) {
-        var posting = Object.create(null)
-        posting["_index"] = this.termIndex
-        this.termIndex += 1
-
-        for (var k = 0; k < this._fields.length; k++) {
-          posting[this._fields[k]] = Object.create(null)
-        }
-
-        this.invertedIndex[term] = posting
-      }
-
-      // add an entry for this term/fieldName/docRef to the invertedIndex
-      if (this.invertedIndex[term][fieldName][docRef] == undefined) {
-        this.invertedIndex[term][fieldName][docRef] = Object.create(null)
-      }
-
-      // store all whitelisted metadata about this token in the
-      // inverted index
-      for (var l = 0; l < this.metadataWhitelist.length; l++) {
-        var metadataKey = this.metadataWhitelist[l],
-            metadata = term.metadata[metadataKey]
-
-        if (this.invertedIndex[term][fieldName][docRef][metadataKey] == undefined) {
-          this.invertedIndex[term][fieldName][docRef][metadataKey] = []
-        }
-
-        this.invertedIndex[term][fieldName][docRef][metadataKey].push(metadata)
-      }
-    }
-
-  }
-}
-
-/**
- * Calculates the average document length for this index
- *
- * @private
- */
-lunr.Builder.prototype.calculateAverageFieldLengths = function () {
-
-  var fieldRefs = Object.keys(this.fieldLengths),
-      numberOfFields = fieldRefs.length,
-      accumulator = {},
-      documentsWithField = {}
-
-  for (var i = 0; i < numberOfFields; i++) {
-    var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]),
-        field = fieldRef.fieldName
-
-    documentsWithField[field] || (documentsWithField[field] = 0)
-    documentsWithField[field] += 1
-
-    accumulator[field] || (accumulator[field] = 0)
-    accumulator[field] += this.fieldLengths[fieldRef]
-  }
-
-  for (var i = 0; i < this._fields.length; i++) {
-    var field = this._fields[i]
-    accumulator[field] = accumulator[field] / documentsWithField[field]
-  }
-
-  this.averageFieldLength = accumulator
-}
-
-/**
- * Builds a vector space model of every document using lunr.Vector
- *
- * @private
- */
-lunr.Builder.prototype.createFieldVectors = function () {
-  var fieldVectors = {},
-      fieldRefs = Object.keys(this.fieldTermFrequencies),
-      fieldRefsLength = fieldRefs.length,
-      termIdfCache = Object.create(null)
-
-  for (var i = 0; i < fieldRefsLength; i++) {
-    var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]),
-        field = fieldRef.fieldName,
-        fieldLength = this.fieldLengths[fieldRef],
-        fieldVector = new lunr.Vector,
-        termFrequencies = this.fieldTermFrequencies[fieldRef],
-        terms = Object.keys(termFrequencies),
-        termsLength = terms.length
-
-    for (var j = 0; j < termsLength; j++) {
-      var term = terms[j],
-          tf = termFrequencies[term],
-          termIndex = this.invertedIndex[term]._index,
-          idf, score, scoreWithPrecision
-
-      if (termIdfCache[term] === undefined) {
-        idf = lunr.idf(this.invertedIndex[term], this.documentCount)
-        termIdfCache[term] = idf
-      } else {
-        idf = termIdfCache[term]
-      }
-
-      score = idf * ((this._k1 + 1) * tf) / (this._k1 * (1 - this._b + this._b * (fieldLength / this.averageFieldLength[field])) + tf)
-      scoreWithPrecision = Math.round(score * 1000) / 1000
-      // Converts 1.23456789 to 1.234.
-      // Reducing the precision so that the vectors take up less
-      // space when serialised. Doing it now so that they behave
-      // the same before and after serialisation. Also, this is
-      // the fastest approach to reducing a number's precision in
-      // JavaScript.
-
-      fieldVector.insert(termIndex, scoreWithPrecision)
-    }
-
-    fieldVectors[fieldRef] = fieldVector
-  }
-
-  this.fieldVectors = fieldVectors
-}
-
-/**
- * Creates a token set of all tokens in the index using lunr.TokenSet
- *
- * @private
- */
-lunr.Builder.prototype.createTokenSet = function () {
-  this.tokenSet = lunr.TokenSet.fromArray(
-    Object.keys(this.invertedIndex).sort()
-  )
-}
-
-/**
- * Builds the index, creating an instance of lunr.Index.
- *
- * This completes the indexing process and should only be called
- * once all documents have been added to the index.
- *
- * @returns {lunr.Index}
- */
-lunr.Builder.prototype.build = function () {
-  this.calculateAverageFieldLengths()
-  this.createFieldVectors()
-  this.createTokenSet()
-
-  return new lunr.Index({
-    invertedIndex: this.invertedIndex,
-    fieldVectors: this.fieldVectors,
-    tokenSet: this.tokenSet,
-    fields: this._fields,
-    pipeline: this.searchPipeline
-  })
-}
-
-/**
- * Applies a plugin to the index builder.
- *
- * A plugin is a function that is called with the index builder as its context.
- * Plugins can be used to customise or extend the behaviour of the index
- * in some way. A plugin is just a function, that encapsulated the custom
- * behaviour that should be applied when building the index.
- *
- * The plugin function will be called with the index builder as its argument, additional
- * arguments can also be passed when calling use. The function will be called
- * with the index builder as its context.
- *
- * @param {Function} plugin The plugin to apply.
- */
-lunr.Builder.prototype.use = function (fn) {
-  var args = Array.prototype.slice.call(arguments, 1)
-  args.unshift(this)
-  fn.apply(this, args)
-}
-/**
- * Contains and collects metadata about a matching document.
- * A single instance of lunr.MatchData is returned as part of every
- * lunr.Index~Result.
- *
- * @constructor
- * @param {string} term - The term this match data is associated with
- * @param {string} field - The field in which the term was found
- * @param {object} metadata - The metadata recorded about this term in this field
- * @property {object} metadata - A cloned collection of metadata associated with this document.
- * @see {@link lunr.Index~Result}
- */
-lunr.MatchData = function (term, field, metadata) {
-  var clonedMetadata = Object.create(null),
-      metadataKeys = Object.keys(metadata)
-
-  // Cloning the metadata to prevent the original
-  // being mutated during match data combination.
-  // Metadata is kept in an array within the inverted
-  // index so cloning the data can be done with
-  // Array#slice
-  for (var i = 0; i < metadataKeys.length; i++) {
-    var key = metadataKeys[i]
-    clonedMetadata[key] = metadata[key].slice()
-  }
-
-  this.metadata = Object.create(null)
-  this.metadata[term] = Object.create(null)
-  this.metadata[term][field] = clonedMetadata
-}
-
-/**
- * An instance of lunr.MatchData will be created for every term that matches a
- * document. However only one instance is required in a lunr.Index~Result. This
- * method combines metadata from another instance of lunr.MatchData with this
- * objects metadata.
- *
- * @param {lunr.MatchData} otherMatchData - Another instance of match data to merge with this one.
- * @see {@link lunr.Index~Result}
- */
-lunr.MatchData.prototype.combine = function (otherMatchData) {
-  var terms = Object.keys(otherMatchData.metadata)
-
-  for (var i = 0; i < terms.length; i++) {
-    var term = terms[i],
-        fields = Object.keys(otherMatchData.metadata[term])
-
-    if (this.metadata[term] == undefined) {
-      this.metadata[term] = Object.create(null)
-    }
-
-    for (var j = 0; j < fields.length; j++) {
-      var field = fields[j],
-          keys = Object.keys(otherMatchData.metadata[term][field])
-
-      if (this.metadata[term][field] == undefined) {
-        this.metadata[term][field] = Object.create(null)
-      }
-
-      for (var k = 0; k < keys.length; k++) {
-        var key = keys[k]
-
-        if (this.metadata[term][field][key] == undefined) {
-          this.metadata[term][field][key] = otherMatchData.metadata[term][field][key]
-        } else {
-          this.metadata[term][field][key] = this.metadata[term][field][key].concat(otherMatchData.metadata[term][field][key])
-        }
-
-      }
-    }
-  }
-}
-
-/**
- * Add metadata for a term/field pair to this instance of match data.
- *
- * @param {string} term - The term this match data is associated with
- * @param {string} field - The field in which the term was found
- * @param {object} metadata - The metadata recorded about this term in this field
- */
-lunr.MatchData.prototype.add = function (term, field, metadata) {
-  if (!(term in this.metadata)) {
-    this.metadata[term] = Object.create(null)
-    this.metadata[term][field] = metadata
-    return
-  }
-
-  if (!(field in this.metadata[term])) {
-    this.metadata[term][field] = metadata
-    return
-  }
-
-  var metadataKeys = Object.keys(metadata)
-
-  for (var i = 0; i < metadataKeys.length; i++) {
-    var key = metadataKeys[i]
-
-    if (key in this.metadata[term][field]) {
-      this.metadata[term][field][key] = this.metadata[term][field][key].concat(metadata[key])
-    } else {
-      this.metadata[term][field][key] = metadata[key]
-    }
-  }
-}
-/**
- * A lunr.Query provides a programmatic way of defining queries to be performed
- * against a {@link lunr.Index}.
- *
- * Prefer constructing a lunr.Query using the {@link lunr.Index#query} method
- * so the query object is pre-initialized with the right index fields.
- *
- * @constructor
- * @property {lunr.Query~Clause[]} clauses - An array of query clauses.
- * @property {string[]} allFields - An array of all available fields in a lunr.Index.
- */
-lunr.Query = function (allFields) {
-  this.clauses = []
-  this.allFields = allFields
-}
-
-/**
- * Constants for indicating what kind of automatic wildcard insertion will be used when constructing a query clause.
- *
- * This allows wildcards to be added to the beginning and end of a term without having to manually do any string
- * concatenation.
- *
- * The wildcard constants can be bitwise combined to select both leading and trailing wildcards.
- *
- * @constant
- * @default
- * @property {number} wildcard.NONE - The term will have no wildcards inserted, this is the default behaviour
- * @property {number} wildcard.LEADING - Prepend the term with a wildcard, unless a leading wildcard already exists
- * @property {number} wildcard.TRAILING - Append a wildcard to the term, unless a trailing wildcard already exists
- * @see lunr.Query~Clause
- * @see lunr.Query#clause
- * @see lunr.Query#term
- * @example <caption>query term with trailing wildcard</caption>
- * query.term('foo', { wildcard: lunr.Query.wildcard.TRAILING })
- * @example <caption>query term with leading and trailing wildcard</caption>
- * query.term('foo', {
- *   wildcard: lunr.Query.wildcard.LEADING | lunr.Query.wildcard.TRAILING
- * })
- */
-lunr.Query.wildcard = new String ("*")
-lunr.Query.wildcard.NONE = 0
-lunr.Query.wildcard.LEADING = 1
-lunr.Query.wildcard.TRAILING = 2
-
-/**
- * A single clause in a {@link lunr.Query} contains a term and details on how to
- * match that term against a {@link lunr.Index}.
- *
- * @typedef {Object} lunr.Query~Clause
- * @property {string[]} fields - The fields in an index this clause should be matched against.
- * @property {number} [boost=1] - Any boost that should be applied when matching this clause.
- * @property {number} [editDistance] - Whether the term should have fuzzy matching applied, and how fuzzy the match should be.
- * @property {boolean} [usePipeline] - Whether the term should be passed through the search pipeline.
- * @property {number} [wildcard=0] - Whether the term should have wildcards appended or prepended.
- */
-
-/**
- * Adds a {@link lunr.Query~Clause} to this query.
- *
- * Unless the clause contains the fields to be matched all fields will be matched. In addition
- * a default boost of 1 is applied to the clause.
- *
- * @param {lunr.Query~Clause} clause - The clause to add to this query.
- * @see lunr.Query~Clause
- * @returns {lunr.Query}
- */
-lunr.Query.prototype.clause = function (clause) {
-  if (!('fields' in clause)) {
-    clause.fields = this.allFields
-  }
-
-  if (!('boost' in clause)) {
-    clause.boost = 1
-  }
-
-  if (!('usePipeline' in clause)) {
-    clause.usePipeline = true
-  }
-
-  if (!('wildcard' in clause)) {
-    clause.wildcard = lunr.Query.wildcard.NONE
-  }
-
-  if ((clause.wildcard & lunr.Query.wildcard.LEADING) && (clause.term.charAt(0) != lunr.Query.wildcard)) {
-    clause.term = "*" + clause.term
-  }
-
-  if ((clause.wildcard & lunr.Query.wildcard.TRAILING) && (clause.term.slice(-1) != lunr.Query.wildcard)) {
-    clause.term = "" + clause.term + "*"
-  }
-
-  this.clauses.push(clause)
-
-  return this
-}
-
-/**
- * Adds a term to the current query, under the covers this will create a {@link lunr.Query~Clause}
- * to the list of clauses that make up this query.
- *
- * @param {string} term - The term to add to the query.
- * @param {Object} [options] - Any additional properties to add to the query clause.
- * @returns {lunr.Query}
- * @see lunr.Query#clause
- * @see lunr.Query~Clause
- * @example <caption>adding a single term to a query</caption>
- * query.term("foo")
- * @example <caption>adding a single term to a query and specifying search fields, term boost and automatic trailing wildcard</caption>
- * query.term("foo", {
- *   fields: ["title"],
- *   boost: 10,
- *   wildcard: lunr.Query.wildcard.TRAILING
- * })
- */
-lunr.Query.prototype.term = function (term, options) {
-  var clause = options || {}
-  clause.term = term
-
-  this.clause(clause)
-
-  return this
-}
-lunr.QueryParseError = function (message, start, end) {
-  this.name = "QueryParseError"
-  this.message = message
-  this.start = start
-  this.end = end
-}
-
-lunr.QueryParseError.prototype = new Error
-lunr.QueryLexer = function (str) {
-  this.lexemes = []
-  this.str = str
-  this.length = str.length
-  this.pos = 0
-  this.start = 0
-  this.escapeCharPositions = []
-}
-
-lunr.QueryLexer.prototype.run = function () {
-  var state = lunr.QueryLexer.lexText
-
-  while (state) {
-    state = state(this)
-  }
-}
-
-lunr.QueryLexer.prototype.sliceString = function () {
-  var subSlices = [],
-      sliceStart = this.start,
-      sliceEnd = this.pos
-
-  for (var i = 0; i < this.escapeCharPositions.length; i++) {
-    sliceEnd = this.escapeCharPositions[i]
-    subSlices.push(this.str.slice(sliceStart, sliceEnd))
-    sliceStart = sliceEnd + 1
-  }
-
-  subSlices.push(this.str.slice(sliceStart, this.pos))
-  this.escapeCharPositions.length = 0
-
-  return subSlices.join('')
-}
-
-lunr.QueryLexer.prototype.emit = function (type) {
-  this.lexemes.push({
-    type: type,
-    str: this.sliceString(),
-    start: this.start,
-    end: this.pos
-  })
-
-  this.start = this.pos
-}
-
-lunr.QueryLexer.prototype.escapeCharacter = function () {
-  this.escapeCharPositions.push(this.pos - 1)
-  this.pos += 1
-}
-
-lunr.QueryLexer.prototype.next = function () {
-  if (this.pos >= this.length) {
-    return lunr.QueryLexer.EOS
-  }
-
-  var char = this.str.charAt(this.pos)
-  this.pos += 1
-  return char
-}
-
-lunr.QueryLexer.prototype.width = function () {
-  return this.pos - this.start
-}
-
-lunr.QueryLexer.prototype.ignore = function () {
-  if (this.start == this.pos) {
-    this.pos += 1
-  }
-
-  this.start = this.pos
-}
-
-lunr.QueryLexer.prototype.backup = function () {
-  this.pos -= 1
-}
-
-lunr.QueryLexer.prototype.acceptDigitRun = function () {
-  var char, charCode
-
-  do {
-    char = this.next()
-    charCode = char.charCodeAt(0)
-  } while (charCode > 47 && charCode < 58)
-
-  if (char != lunr.QueryLexer.EOS) {
-    this.backup()
-  }
-}
-
-lunr.QueryLexer.prototype.more = function () {
-  return this.pos < this.length
-}
-
-lunr.QueryLexer.EOS = 'EOS'
-lunr.QueryLexer.FIELD = 'FIELD'
-lunr.QueryLexer.TERM = 'TERM'
-lunr.QueryLexer.EDIT_DISTANCE = 'EDIT_DISTANCE'
-lunr.QueryLexer.BOOST = 'BOOST'
-
-lunr.QueryLexer.lexField = function (lexer) {
-  lexer.backup()
-  lexer.emit(lunr.QueryLexer.FIELD)
-  lexer.ignore()
-  return lunr.QueryLexer.lexText
-}
-
-lunr.QueryLexer.lexTerm = function (lexer) {
-  if (lexer.width() > 1) {
-    lexer.backup()
-    lexer.emit(lunr.QueryLexer.TERM)
-  }
-
-  lexer.ignore()
-
-  if (lexer.more()) {
-    return lunr.QueryLexer.lexText
-  }
-}
-
-lunr.QueryLexer.lexEditDistance = function (lexer) {
-  lexer.ignore()
-  lexer.acceptDigitRun()
-  lexer.emit(lunr.QueryLexer.EDIT_DISTANCE)
-  return lunr.QueryLexer.lexText
-}
-
-lunr.QueryLexer.lexBoost = function (lexer) {
-  lexer.ignore()
-  lexer.acceptDigitRun()
-  lexer.emit(lunr.QueryLexer.BOOST)
-  return lunr.QueryLexer.lexText
-}
-
-lunr.QueryLexer.lexEOS = function (lexer) {
-  if (lexer.width() > 0) {
-    lexer.emit(lunr.QueryLexer.TERM)
-  }
-}
-
-// This matches the separator used when tokenising fields
-// within a document. These should match otherwise it is
-// not possible to search for some tokens within a document.
-//
-// It is possible for the user to change the separator on the
-// tokenizer so it _might_ clash with any other of the special
-// characters already used within the search string, e.g. :.
-//
-// This means that it is possible to change the separator in
-// such a way that makes some words unsearchable using a search
-// string.
-lunr.QueryLexer.termSeparator = lunr.tokenizer.separator
-
-lunr.QueryLexer.lexText = function (lexer) {
-  while (true) {
-    var char = lexer.next()
-
-    if (char == lunr.QueryLexer.EOS) {
-      return lunr.QueryLexer.lexEOS
-    }
-
-    // Escape character is '\'
-    if (char.charCodeAt(0) == 92) {
-      lexer.escapeCharacter()
-      continue
-    }
-
-    if (char == ":") {
-      return lunr.QueryLexer.lexField
-    }
-
-    if (char == "~") {
-      lexer.backup()
-      if (lexer.width() > 0) {
-        lexer.emit(lunr.QueryLexer.TERM)
-      }
-      return lunr.QueryLexer.lexEditDistance
-    }
-
-    if (char == "^") {
-      lexer.backup()
-      if (lexer.width() > 0) {
-        lexer.emit(lunr.QueryLexer.TERM)
-      }
-      return lunr.QueryLexer.lexBoost
-    }
-
-    if (char.match(lunr.QueryLexer.termSeparator)) {
-      return lunr.QueryLexer.lexTerm
-    }
-  }
-}
-
-lunr.QueryParser = function (str, query) {
-  this.lexer = new lunr.QueryLexer (str)
-  this.query = query
-  this.currentClause = {}
-  this.lexemeIdx = 0
-}
-
-lunr.QueryParser.prototype.parse = function () {
-  this.lexer.run()
-  this.lexemes = this.lexer.lexemes
-
-  var state = lunr.QueryParser.parseFieldOrTerm
-
-  while (state) {
-    state = state(this)
-  }
-
-  return this.query
-}
-
-lunr.QueryParser.prototype.peekLexeme = function () {
-  return this.lexemes[this.lexemeIdx]
-}
-
-lunr.QueryParser.prototype.consumeLexeme = function () {
-  var lexeme = this.peekLexeme()
-  this.lexemeIdx += 1
-  return lexeme
-}
-
-lunr.QueryParser.prototype.nextClause = function () {
-  var completedClause = this.currentClause
-  this.query.clause(completedClause)
-  this.currentClause = {}
-}
-
-lunr.QueryParser.parseFieldOrTerm = function (parser) {
-  var lexeme = parser.peekLexeme()
-
-  if (lexeme == undefined) {
-    return
-  }
-
-  switch (lexeme.type) {
-    case lunr.QueryLexer.FIELD:
-      return lunr.QueryParser.parseField
-    case lunr.QueryLexer.TERM:
-      return lunr.QueryParser.parseTerm
-    default:
-      var errorMessage = "expected either a field or a term, found " + lexeme.type
-
-      if (lexeme.str.length >= 1) {
-        errorMessage += " with value '" + lexeme.str + "'"
-      }
-
-      throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
-  }
-}
-
-lunr.QueryParser.parseField = function (parser) {
-  var lexeme = parser.consumeLexeme()
-
-  if (lexeme == undefined) {
-    return
-  }
-
-  if (parser.query.allFields.indexOf(lexeme.str) == -1) {
-    var possibleFields = parser.query.allFields.map(function (f) { return "'" + f + "'" }).join(', '),
-        errorMessage = "unrecognised field '" + lexeme.str + "', possible fields: " + possibleFields
-
-    throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
-  }
-
-  parser.currentClause.fields = [lexeme.str]
-
-  var nextLexeme = parser.peekLexeme()
-
-  if (nextLexeme == undefined) {
-    var errorMessage = "expecting term, found nothing"
-    throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
-  }
-
-  switch (nextLexeme.type) {
-    case lunr.QueryLexer.TERM:
-      return lunr.QueryParser.parseTerm
-    default:
-      var errorMessage = "expecting term, found '" + nextLexeme.type + "'"
-      throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
-  }
-}
-
-lunr.QueryParser.parseTerm = function (parser) {
-  var lexeme = parser.consumeLexeme()
-
-  if (lexeme == undefined) {
-    return
-  }
-
-  parser.currentClause.term = lexeme.str.toLowerCase()
-
-  if (lexeme.str.indexOf("*") != -1) {
-    parser.currentClause.usePipeline = false
-  }
-
-  var nextLexeme = parser.peekLexeme()
-
-  if (nextLexeme == undefined) {
-    parser.nextClause()
-    return
-  }
-
-  switch (nextLexeme.type) {
-    case lunr.QueryLexer.TERM:
-      parser.nextClause()
-      return lunr.QueryParser.parseTerm
-    case lunr.QueryLexer.FIELD:
-      parser.nextClause()
-      return lunr.QueryParser.parseField
-    case lunr.QueryLexer.EDIT_DISTANCE:
-      return lunr.QueryParser.parseEditDistance
-    case lunr.QueryLexer.BOOST:
-      return lunr.QueryParser.parseBoost
-    default:
-      var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'"
-      throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
-  }
-}
-
-lunr.QueryParser.parseEditDistance = function (parser) {
-  var lexeme = parser.consumeLexeme()
-
-  if (lexeme == undefined) {
-    return
-  }
-
-  var editDistance = parseInt(lexeme.str, 10)
-
-  if (isNaN(editDistance)) {
-    var errorMessage = "edit distance must be numeric"
-    throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
-  }
-
-  parser.currentClause.editDistance = editDistance
-
-  var nextLexeme = parser.peekLexeme()
-
-  if (nextLexeme == undefined) {
-    parser.nextClause()
-    return
-  }
-
-  switch (nextLexeme.type) {
-    case lunr.QueryLexer.TERM:
-      parser.nextClause()
-      return lunr.QueryParser.parseTerm
-    case lunr.QueryLexer.FIELD:
-      parser.nextClause()
-      return lunr.QueryParser.parseField
-    case lunr.QueryLexer.EDIT_DISTANCE:
-      return lunr.QueryParser.parseEditDistance
-    case lunr.QueryLexer.BOOST:
-      return lunr.QueryParser.parseBoost
-    default:
-      var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'"
-      throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
-  }
-}
-
-lunr.QueryParser.parseBoost = function (parser) {
-  var lexeme = parser.consumeLexeme()
-
-  if (lexeme == undefined) {
-    return
-  }
-
-  var boost = parseInt(lexeme.str, 10)
-
-  if (isNaN(boost)) {
-    var errorMessage = "boost must be numeric"
-    throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
-  }
-
-  parser.currentClause.boost = boost
-
-  var nextLexeme = parser.peekLexeme()
-
-  if (nextLexeme == undefined) {
-    parser.nextClause()
-    return
-  }
-
-  switch (nextLexeme.type) {
-    case lunr.QueryLexer.TERM:
-      parser.nextClause()
-      return lunr.QueryParser.parseTerm
-    case lunr.QueryLexer.FIELD:
-      parser.nextClause()
-      return lunr.QueryParser.parseField
-    case lunr.QueryLexer.EDIT_DISTANCE:
-      return lunr.QueryParser.parseEditDistance
-    case lunr.QueryLexer.BOOST:
-      return lunr.QueryParser.parseBoost
-    default:
-      var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'"
-      throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
-  }
-}
-
-  /**
-   * export the module via AMD, CommonJS or as a browser global
-   * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
-   */
-  ;(function (root, factory) {
-    if (typeof define === 'function' && define.amd) {
-      // AMD. Register as an anonymous module.
-      define(factory)
-    } else if (typeof exports === 'object') {
-      /**
-       * Node. Does not work with strict CommonJS, but
-       * only CommonJS-like enviroments that support module.exports,
-       * like Node.
-       */
-      module.exports = factory()
-    } else {
-      // Browser globals (root is window)
-      root.lunr = factory()
-    }
-  }(this, function () {
-    /**
-     * Just return a value to define the module export.
-     * This example returns an object, but the module
-     * can return a function as the exported value.
-     */
-    return lunr
-  }))
-})();
diff --git a/docs/search/lunr.min.js b/docs/search/lunr.min.js
new file mode 100644
index 00000000..b0198dff
--- /dev/null
+++ b/docs/search/lunr.min.js
@@ -0,0 +1,7 @@
+/**
+ * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 0.7.0
+ * Copyright (C) 2016 Oliver Nightingale
+ * MIT Licensed
+ * @license
+ */
+!function(){var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.7.0",t.utils={},t.utils.warn=function(t){return function(e){t.console&&console.warn&&console.warn(e)}}(this),t.utils.asString=function(t){return void 0===t||null===t?"":t.toString()},t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var t=Array.prototype.slice.call(arguments),e=t.pop(),n=t;if("function"!=typeof e)throw new TypeError("last argument must be a function");n.forEach(function(t){this.hasHandler(t)||(this.events[t]=[]),this.events[t].push(e)},this)},t.EventEmitter.prototype.removeListener=function(t,e){if(this.hasHandler(t)){var n=this.events[t].indexOf(e);this.events[t].splice(n,1),this.events[t].length||delete this.events[t]}},t.EventEmitter.prototype.emit=function(t){if(this.hasHandler(t)){var e=Array.prototype.slice.call(arguments,1);this.events[t].forEach(function(t){t.apply(void 0,e)})}},t.EventEmitter.prototype.hasHandler=function(t){return t in this.events},t.tokenizer=function(e){return arguments.length&&null!=e&&void 0!=e?Array.isArray(e)?e.map(function(e){return t.utils.asString(e).toLowerCase()}):e.toString().trim().toLowerCase().split(t.tokenizer.seperator):[]},t.tokenizer.seperator=/[\s\-]+/,t.tokenizer.load=function(t){var e=this.registeredFunctions[t];if(!e)throw new Error("Cannot load un-registered function: "+t);return e},t.tokenizer.label="default",t.tokenizer.registeredFunctions={"default":t.tokenizer},t.tokenizer.registerFunction=function(e,n){n in this.registeredFunctions&&t.utils.warn("Overwriting existing tokenizer: "+n),e.label=n,this.registeredFunctions[n]=e},t.Pipeline=function(){this._stack=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in this.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[e.label]=e},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.registeredFunctions[e];if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._stack.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._stack.indexOf(e);if(-1==i)throw new Error("Cannot find existingFn");i+=1,this._stack.splice(i,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._stack.indexOf(e);if(-1==i)throw new Error("Cannot find existingFn");this._stack.splice(i,0,n)},t.Pipeline.prototype.remove=function(t){var e=this._stack.indexOf(t);-1!=e&&this._stack.splice(e,1)},t.Pipeline.prototype.run=function(t){for(var e=[],n=t.length,i=this._stack.length,r=0;n>r;r++){for(var o=t[r],s=0;i>s&&(o=this._stack[s](o,r,t),void 0!==o&&""!==o);s++);void 0!==o&&""!==o&&e.push(o)}return e},t.Pipeline.prototype.reset=function(){this._stack=[]},t.Pipeline.prototype.toJSON=function(){return this._stack.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Vector=function(){this._magnitude=null,this.list=void 0,this.length=0},t.Vector.Node=function(t,e,n){this.idx=t,this.val=e,this.next=n},t.Vector.prototype.insert=function(e,n){this._magnitude=void 0;var i=this.list;if(!i)return this.list=new t.Vector.Node(e,n,i),this.length++;if(e<i.idx)return this.list=new t.Vector.Node(e,n,i),this.length++;for(var r=i,o=i.next;void 0!=o;){if(e<o.idx)return r.next=new t.Vector.Node(e,n,o),this.length++;r=o,o=o.next}return r.next=new t.Vector.Node(e,n,o),this.length++},t.Vector.prototype.magnitude=function(){if(this._magnitude)return this._magnitude;for(var t,e=this.list,n=0;e;)t=e.val,n+=t*t,e=e.next;return this._magnitude=Math.sqrt(n)},t.Vector.prototype.dot=function(t){for(var e=this.list,n=t.list,i=0;e&&n;)e.idx<n.idx?e=e.next:e.idx>n.idx?n=n.next:(i+=e.val*n.val,e=e.next,n=n.next);return i},t.Vector.prototype.similarity=function(t){return this.dot(t)/(this.magnitude()*t.magnitude())},t.SortedSet=function(){this.length=0,this.elements=[]},t.SortedSet.load=function(t){var e=new this;return e.elements=t,e.length=t.length,e},t.SortedSet.prototype.add=function(){var t,e;for(t=0;t<arguments.length;t++)e=arguments[t],~this.indexOf(e)||this.elements.splice(this.locationFor(e),0,e);this.length=this.elements.length},t.SortedSet.prototype.toArray=function(){return this.elements.slice()},t.SortedSet.prototype.map=function(t,e){return this.elements.map(t,e)},t.SortedSet.prototype.forEach=function(t,e){return this.elements.forEach(t,e)},t.SortedSet.prototype.indexOf=function(t){for(var e=0,n=this.elements.length,i=n-e,r=e+Math.floor(i/2),o=this.elements[r];i>1;){if(o===t)return r;t>o&&(e=r),o>t&&(n=r),i=n-e,r=e+Math.floor(i/2),o=this.elements[r]}return o===t?r:-1},t.SortedSet.prototype.locationFor=function(t){for(var e=0,n=this.elements.length,i=n-e,r=e+Math.floor(i/2),o=this.elements[r];i>1;)t>o&&(e=r),o>t&&(n=r),i=n-e,r=e+Math.floor(i/2),o=this.elements[r];return o>t?r:t>o?r+1:void 0},t.SortedSet.prototype.intersect=function(e){for(var n=new t.SortedSet,i=0,r=0,o=this.length,s=e.length,a=this.elements,h=e.elements;;){if(i>o-1||r>s-1)break;a[i]!==h[r]?a[i]<h[r]?i++:a[i]>h[r]&&r++:(n.add(a[i]),i++,r++)}return n},t.SortedSet.prototype.clone=function(){var e=new t.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},t.SortedSet.prototype.union=function(t){var e,n,i;this.length>=t.length?(e=this,n=t):(e=t,n=this),i=e.clone();for(var r=0,o=n.toArray();r<o.length;r++)i.add(o[r]);return i},t.SortedSet.prototype.toJSON=function(){return this.toArray()},t.Index=function(){this._fields=[],this._ref="id",this.pipeline=new t.Pipeline,this.documentStore=new t.Store,this.tokenStore=new t.TokenStore,this.corpusTokens=new t.SortedSet,this.eventEmitter=new t.EventEmitter,this.tokenizerFn=t.tokenizer,this._idfCache={},this.on("add","remove","update",function(){this._idfCache={}}.bind(this))},t.Index.prototype.on=function(){var t=Array.prototype.slice.call(arguments);return this.eventEmitter.addListener.apply(this.eventEmitter,t)},t.Index.prototype.off=function(t,e){return this.eventEmitter.removeListener(t,e)},t.Index.load=function(e){e.version!==t.version&&t.utils.warn("version mismatch: current "+t.version+" importing "+e.version);var n=new this;return n._fields=e.fields,n._ref=e.ref,n.tokenizer=t.tokenizer.load(e.tokenizer),n.documentStore=t.Store.load(e.documentStore),n.tokenStore=t.TokenStore.load(e.tokenStore),n.corpusTokens=t.SortedSet.load(e.corpusTokens),n.pipeline=t.Pipeline.load(e.pipeline),n},t.Index.prototype.field=function(t,e){var e=e||{},n={name:t,boost:e.boost||1};return this._fields.push(n),this},t.Index.prototype.ref=function(t){return this._ref=t,this},t.Index.prototype.tokenizer=function(e){var n=e.label&&e.label in t.tokenizer.registeredFunctions;return n||t.utils.warn("Function is not a registered tokenizer. This may cause problems when serialising the index"),this.tokenizerFn=e,this},t.Index.prototype.add=function(e,n){var i={},r=new t.SortedSet,o=e[this._ref],n=void 0===n?!0:n;this._fields.forEach(function(t){var n=this.pipeline.run(this.tokenizerFn(e[t.name]));i[t.name]=n;for(var o=0;o<n.length;o++){var s=n[o];r.add(s),this.corpusTokens.add(s)}},this),this.documentStore.set(o,r);for(var s=0;s<r.length;s++){for(var a=r.elements[s],h=0,u=0;u<this._fields.length;u++){var l=this._fields[u],c=i[l.name],f=c.length;if(f){for(var d=0,p=0;f>p;p++)c[p]===a&&d++;h+=d/f*l.boost}}this.tokenStore.add(a,{ref:o,tf:h})}n&&this.eventEmitter.emit("add",e,this)},t.Index.prototype.remove=function(t,e){var n=t[this._ref],e=void 0===e?!0:e;if(this.documentStore.has(n)){var i=this.documentStore.get(n);this.documentStore.remove(n),i.forEach(function(t){this.tokenStore.remove(t,n)},this),e&&this.eventEmitter.emit("remove",t,this)}},t.Index.prototype.update=function(t,e){var e=void 0===e?!0:e;this.remove(t,!1),this.add(t,!1),e&&this.eventEmitter.emit("update",t,this)},t.Index.prototype.idf=function(t){var e="@"+t;if(Object.prototype.hasOwnProperty.call(this._idfCache,e))return this._idfCache[e];var n=this.tokenStore.count(t),i=1;return n>0&&(i=1+Math.log(this.documentStore.length/n)),this._idfCache[e]=i},t.Index.prototype.search=function(e){var n=this.pipeline.run(this.tokenizerFn(e)),i=new t.Vector,r=[],o=this._fields.reduce(function(t,e){return t+e.boost},0),s=n.some(function(t){return this.tokenStore.has(t)},this);if(!s)return[];n.forEach(function(e,n,s){var a=1/s.length*this._fields.length*o,h=this,u=this.tokenStore.expand(e).reduce(function(n,r){var o=h.corpusTokens.indexOf(r),s=h.idf(r),u=1,l=new t.SortedSet;if(r!==e){var c=Math.max(3,r.length-e.length);u=1/Math.log(c)}o>-1&&i.insert(o,a*s*u);for(var f=h.tokenStore.get(r),d=Object.keys(f),p=d.length,v=0;p>v;v++)l.add(f[d[v]].ref);return n.union(l)},new t.SortedSet);r.push(u)},this);var a=r.reduce(function(t,e){return t.intersect(e)});return a.map(function(t){return{ref:t,score:i.similarity(this.documentVector(t))}},this).sort(function(t,e){return e.score-t.score})},t.Index.prototype.documentVector=function(e){for(var n=this.documentStore.get(e),i=n.length,r=new t.Vector,o=0;i>o;o++){var s=n.elements[o],a=this.tokenStore.get(s)[e].tf,h=this.idf(s);r.insert(this.corpusTokens.indexOf(s),a*h)}return r},t.Index.prototype.toJSON=function(){return{version:t.version,fields:this._fields,ref:this._ref,tokenizer:this.tokenizerFn.label,documentStore:this.documentStore.toJSON(),tokenStore:this.tokenStore.toJSON(),corpusTokens:this.corpusTokens.toJSON(),pipeline:this.pipeline.toJSON()}},t.Index.prototype.use=function(t){var e=Array.prototype.slice.call(arguments,1);e.unshift(this),t.apply(this,e)},t.Store=function(){this.store={},this.length=0},t.Store.load=function(e){var n=new this;return n.length=e.length,n.store=Object.keys(e.store).reduce(function(n,i){return n[i]=t.SortedSet.load(e.store[i]),n},{}),n},t.Store.prototype.set=function(t,e){this.has(t)||this.length++,this.store[t]=e},t.Store.prototype.get=function(t){return this.store[t]},t.Store.prototype.has=function(t){return t in this.store},t.Store.prototype.remove=function(t){this.has(t)&&(delete this.store[t],this.length--)},t.Store.prototype.toJSON=function(){return{store:this.store,length:this.length}},t.stemmer=function(){var t={ational:"ate",tional:"tion",enci:"ence",anci:"ance",izer:"ize",bli:"ble",alli:"al",entli:"ent",eli:"e",ousli:"ous",ization:"ize",ation:"ate",ator:"ate",alism:"al",iveness:"ive",fulness:"ful",ousness:"ous",aliti:"al",iviti:"ive",biliti:"ble",logi:"log"},e={icate:"ic",ative:"",alize:"al",iciti:"ic",ical:"ic",ful:"",ness:""},n="[^aeiou]",i="[aeiouy]",r=n+"[^aeiouy]*",o=i+"[aeiou]*",s="^("+r+")?"+o+r,a="^("+r+")?"+o+r+"("+o+")?$",h="^("+r+")?"+o+r+o+r,u="^("+r+")?"+i,l=new RegExp(s),c=new RegExp(h),f=new RegExp(a),d=new RegExp(u),p=/^(.+?)(ss|i)es$/,v=/^(.+?)([^s])s$/,g=/^(.+?)eed$/,m=/^(.+?)(ed|ing)$/,y=/.$/,S=/(at|bl|iz)$/,w=new RegExp("([^aeiouylsz])\\1$"),k=new RegExp("^"+r+i+"[^aeiouwxy]$"),x=/^(.+?[^aeiou])y$/,b=/^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/,E=/^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/,F=/^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/,_=/^(.+?)(s|t)(ion)$/,z=/^(.+?)e$/,O=/ll$/,P=new RegExp("^"+r+i+"[^aeiouwxy]$"),T=function(n){var i,r,o,s,a,h,u;if(n.length<3)return n;if(o=n.substr(0,1),"y"==o&&(n=o.toUpperCase()+n.substr(1)),s=p,a=v,s.test(n)?n=n.replace(s,"$1$2"):a.test(n)&&(n=n.replace(a,"$1$2")),s=g,a=m,s.test(n)){var T=s.exec(n);s=l,s.test(T[1])&&(s=y,n=n.replace(s,""))}else if(a.test(n)){var T=a.exec(n);i=T[1],a=d,a.test(i)&&(n=i,a=S,h=w,u=k,a.test(n)?n+="e":h.test(n)?(s=y,n=n.replace(s,"")):u.test(n)&&(n+="e"))}if(s=x,s.test(n)){var T=s.exec(n);i=T[1],n=i+"i"}if(s=b,s.test(n)){var T=s.exec(n);i=T[1],r=T[2],s=l,s.test(i)&&(n=i+t[r])}if(s=E,s.test(n)){var T=s.exec(n);i=T[1],r=T[2],s=l,s.test(i)&&(n=i+e[r])}if(s=F,a=_,s.test(n)){var T=s.exec(n);i=T[1],s=c,s.test(i)&&(n=i)}else if(a.test(n)){var T=a.exec(n);i=T[1]+T[2],a=c,a.test(i)&&(n=i)}if(s=z,s.test(n)){var T=s.exec(n);i=T[1],s=c,a=f,h=P,(s.test(i)||a.test(i)&&!h.test(i))&&(n=i)}return s=O,a=c,s.test(n)&&a.test(n)&&(s=y,n=n.replace(s,"")),"y"==o&&(n=o.toLowerCase()+n.substr(1)),n};return T}(),t.Pipeline.registerFunction(t.stemmer,"stemmer"),t.generateStopWordFilter=function(t){var e=t.reduce(function(t,e){return t[e]=e,t},{});return function(t){return t&&e[t]!==t?t:void 0}},t.stopWordFilter=t.generateStopWordFilter(["a","able","about","across","after","all","almost","also","am","among","an","and","any","are","as","at","be","because","been","but","by","can","cannot","could","dear","did","do","does","either","else","ever","every","for","from","get","got","had","has","have","he","her","hers","him","his","how","however","i","if","in","into","is","it","its","just","least","let","like","likely","may","me","might","most","must","my","neither","no","nor","not","of","off","often","on","only","or","other","our","own","rather","said","say","says","she","should","since","so","some","than","that","the","their","them","then","there","these","they","this","tis","to","too","twas","us","wants","was","we","were","what","when","where","which","while","who","whom","why","will","with","would","yet","you","your"]),t.Pipeline.registerFunction(t.stopWordFilter,"stopWordFilter"),t.trimmer=function(t){return t.replace(/^\W+/,"").replace(/\W+$/,"")},t.Pipeline.registerFunction(t.trimmer,"trimmer"),t.TokenStore=function(){this.root={docs:{}},this.length=0},t.TokenStore.load=function(t){var e=new this;return e.root=t.root,e.length=t.length,e},t.TokenStore.prototype.add=function(t,e,n){var n=n||this.root,i=t.charAt(0),r=t.slice(1);return i in n||(n[i]={docs:{}}),0===r.length?(n[i].docs[e.ref]=e,void(this.length+=1)):this.add(r,e,n[i])},t.TokenStore.prototype.has=function(t){if(!t)return!1;for(var e=this.root,n=0;n<t.length;n++){if(!e[t.charAt(n)])return!1;e=e[t.charAt(n)]}return!0},t.TokenStore.prototype.getNode=function(t){if(!t)return{};for(var e=this.root,n=0;n<t.length;n++){if(!e[t.charAt(n)])return{};e=e[t.charAt(n)]}return e},t.TokenStore.prototype.get=function(t,e){return this.getNode(t,e).docs||{}},t.TokenStore.prototype.count=function(t,e){return Object.keys(this.get(t,e)).length},t.TokenStore.prototype.remove=function(t,e){if(t){for(var n=this.root,i=0;i<t.length;i++){if(!(t.charAt(i)in n))return;n=n[t.charAt(i)]}delete n.docs[e]}},t.TokenStore.prototype.expand=function(t,e){var n=this.getNode(t),i=n.docs||{},e=e||[];return Object.keys(i).length&&e.push(t),Object.keys(n).forEach(function(n){"docs"!==n&&e.concat(this.expand(t+n,e))},this),e},t.TokenStore.prototype.toJSON=function(){return{root:this.root,length:this.length}},function(t,e){"function"==typeof define&&define.amd?define(e):"object"==typeof exports?module.exports=e():t.lunr=e()}(this,function(){return t})}();
diff --git a/docs/search/main.js b/docs/search/main.js
deleted file mode 100644
index 0a82ab56..00000000
--- a/docs/search/main.js
+++ /dev/null
@@ -1,96 +0,0 @@
-function getSearchTermFromLocation() {
-  var sPageURL = window.location.search.substring(1);
-  var sURLVariables = sPageURL.split('&');
-  for (var i = 0; i < sURLVariables.length; i++) {
-    var sParameterName = sURLVariables[i].split('=');
-    if (sParameterName[0] == 'q') {
-      return decodeURIComponent(sParameterName[1].replace(/\+/g, '%20'));
-    }
-  }
-}
-
-function joinUrl (base, path) {
-  if (path.substring(0, 1) === "/") {
-    // path starts with `/`. Thus it is absolute.
-    return path;
-  }
-  if (base.substring(base.length-1) === "/") {
-    // base ends with `/`
-    return base + path;
-  }
-  return base + "/" + path;
-}
-
-function formatResult (location, title, summary) {
-  return '<article><h3><a href="' + joinUrl(base_url, location) + '">'+ title + '</a></h3><p>' + summary +'</p></article>';
-}
-
-function displayResults (results) {
-  var search_results = document.getElementById("mkdocs-search-results");
-  while (search_results.firstChild) {
-    search_results.removeChild(search_results.firstChild);
-  }
-  if (results.length > 0){
-    for (var i=0; i < results.length; i++){
-      var result = results[i];
-      var html = formatResult(result.location, result.title, result.summary);
-      search_results.insertAdjacentHTML('beforeend', html);
-    }
-  } else {
-    search_results.insertAdjacentHTML('beforeend', "<p>No results found</p>");
-  }
-}
-
-function doSearch () {
-  var query = document.getElementById('mkdocs-search-query').value;
-  if (query.length > 2) {
-    if (!window.Worker) {
-      displayResults(search(query));
-    } else {
-      searchWorker.postMessage({query: query});
-    }
-  } else {
-    // Clear results for short queries
-    displayResults([]);
-  }
-}
-
-function initSearch () {
-  var search_input = document.getElementById('mkdocs-search-query');
-  if (search_input) {
-    search_input.addEventListener("keyup", doSearch);
-  }
-  var term = getSearchTermFromLocation();
-  if (term) {
-    search_input.value = term;
-    doSearch();
-  }
-}
-
-function onWorkerMessage (e) {
-  if (e.data.allowSearch) {
-    initSearch();
-  } else if (e.data.results) {
-    var results = e.data.results;
-    displayResults(results);
-  }
-}
-
-if (!window.Worker) {
-  console.log('Web Worker API not supported');
-  // load index in main thread
-  $.getScript(joinUrl(base_url, "search/worker.js")).done(function () {
-    console.log('Loaded worker');
-    init();
-    window.postMessage = function (msg) {
-      onWorkerMessage({data: msg});
-    };
-  }).fail(function (jqxhr, settings, exception) {
-    console.error('Could not load worker.js');
-  });
-} else {
-  // Wrap search in a web worker
-  var searchWorker = new Worker(joinUrl(base_url, "search/worker.js"));
-  searchWorker.postMessage({init: true});
-  searchWorker.onmessage = onWorkerMessage;
-}
diff --git a/docs/search/mustache.min.js b/docs/search/mustache.min.js
new file mode 100644
index 00000000..7fc6da86
--- /dev/null
+++ b/docs/search/mustache.min.js
@@ -0,0 +1 @@
+(function(global,factory){if(typeof exports==="object"&&exports){factory(exports)}else if(typeof define==="function"&&define.amd){define(["exports"],factory)}else{factory(global.Mustache={})}})(this,function(mustache){var Object_toString=Object.prototype.toString;var isArray=Array.isArray||function(object){return Object_toString.call(object)==="[object Array]"};function isFunction(object){return typeof object==="function"}function escapeRegExp(string){return string.replace(/[\-\[\]{}()*+?.,\\\^$|#\s]/g,"\\$&")}var RegExp_test=RegExp.prototype.test;function testRegExp(re,string){return RegExp_test.call(re,string)}var nonSpaceRe=/\S/;function isWhitespace(string){return!testRegExp(nonSpaceRe,string)}var entityMap={"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;","/":"&#x2F;"};function escapeHtml(string){return String(string).replace(/[&<>"'\/]/g,function(s){return entityMap[s]})}var whiteRe=/\s*/;var spaceRe=/\s+/;var equalsRe=/\s*=/;var curlyRe=/\s*\}/;var tagRe=/#|\^|\/|>|\{|&|=|!/;function parseTemplate(template,tags){if(!template)return[];var sections=[];var tokens=[];var spaces=[];var hasTag=false;var nonSpace=false;function stripSpace(){if(hasTag&&!nonSpace){while(spaces.length)delete tokens[spaces.pop()]}else{spaces=[]}hasTag=false;nonSpace=false}var openingTagRe,closingTagRe,closingCurlyRe;function compileTags(tags){if(typeof tags==="string")tags=tags.split(spaceRe,2);if(!isArray(tags)||tags.length!==2)throw new Error("Invalid tags: "+tags);openingTagRe=new RegExp(escapeRegExp(tags[0])+"\\s*");closingTagRe=new RegExp("\\s*"+escapeRegExp(tags[1]));closingCurlyRe=new RegExp("\\s*"+escapeRegExp("}"+tags[1]))}compileTags(tags||mustache.tags);var scanner=new Scanner(template);var start,type,value,chr,token,openSection;while(!scanner.eos()){start=scanner.pos;value=scanner.scanUntil(openingTagRe);if(value){for(var i=0,valueLength=value.length;i<valueLength;++i){chr=value.charAt(i);if(isWhitespace(chr)){spaces.push(tokens.length)}else{nonSpace=true}tokens.push(["text",chr,start,start+1]);start+=1;if(chr==="\n")stripSpace()}}if(!scanner.scan(openingTagRe))break;hasTag=true;type=scanner.scan(tagRe)||"name";scanner.scan(whiteRe);if(type==="="){value=scanner.scanUntil(equalsRe);scanner.scan(equalsRe);scanner.scanUntil(closingTagRe)}else if(type==="{"){value=scanner.scanUntil(closingCurlyRe);scanner.scan(curlyRe);scanner.scanUntil(closingTagRe);type="&"}else{value=scanner.scanUntil(closingTagRe)}if(!scanner.scan(closingTagRe))throw new Error("Unclosed tag at "+scanner.pos);token=[type,value,start,scanner.pos];tokens.push(token);if(type==="#"||type==="^"){sections.push(token)}else if(type==="/"){openSection=sections.pop();if(!openSection)throw new Error('Unopened section "'+value+'" at '+start);if(openSection[1]!==value)throw new Error('Unclosed section "'+openSection[1]+'" at '+start)}else if(type==="name"||type==="{"||type==="&"){nonSpace=true}else if(type==="="){compileTags(value)}}openSection=sections.pop();if(openSection)throw new Error('Unclosed section "'+openSection[1]+'" at '+scanner.pos);return nestTokens(squashTokens(tokens))}function squashTokens(tokens){var squashedTokens=[];var token,lastToken;for(var i=0,numTokens=tokens.length;i<numTokens;++i){token=tokens[i];if(token){if(token[0]==="text"&&lastToken&&lastToken[0]==="text"){lastToken[1]+=token[1];lastToken[3]=token[3]}else{squashedTokens.push(token);lastToken=token}}}return squashedTokens}function nestTokens(tokens){var nestedTokens=[];var collector=nestedTokens;var sections=[];var token,section;for(var i=0,numTokens=tokens.length;i<numTokens;++i){token=tokens[i];switch(token[0]){case"#":case"^":collector.push(token);sections.push(token);collector=token[4]=[];break;case"/":section=sections.pop();section[5]=token[2];collector=sections.length>0?sections[sections.length-1][4]:nestedTokens;break;default:collector.push(token)}}return nestedTokens}function Scanner(string){this.string=string;this.tail=string;this.pos=0}Scanner.prototype.eos=function(){return this.tail===""};Scanner.prototype.scan=function(re){var match=this.tail.match(re);if(!match||match.index!==0)return"";var string=match[0];this.tail=this.tail.substring(string.length);this.pos+=string.length;return string};Scanner.prototype.scanUntil=function(re){var index=this.tail.search(re),match;switch(index){case-1:match=this.tail;this.tail="";break;case 0:match="";break;default:match=this.tail.substring(0,index);this.tail=this.tail.substring(index)}this.pos+=match.length;return match};function Context(view,parentContext){this.view=view;this.cache={".":this.view};this.parent=parentContext}Context.prototype.push=function(view){return new Context(view,this)};Context.prototype.lookup=function(name){var cache=this.cache;var value;if(name in cache){value=cache[name]}else{var context=this,names,index,lookupHit=false;while(context){if(name.indexOf(".")>0){value=context.view;names=name.split(".");index=0;while(value!=null&&index<names.length){if(index===names.length-1&&value!=null)lookupHit=typeof value==="object"&&value.hasOwnProperty(names[index]);value=value[names[index++]]}}else if(context.view!=null&&typeof context.view==="object"){value=context.view[name];lookupHit=context.view.hasOwnProperty(name)}if(lookupHit)break;context=context.parent}cache[name]=value}if(isFunction(value))value=value.call(this.view);return value};function Writer(){this.cache={}}Writer.prototype.clearCache=function(){this.cache={}};Writer.prototype.parse=function(template,tags){var cache=this.cache;var tokens=cache[template];if(tokens==null)tokens=cache[template]=parseTemplate(template,tags);return tokens};Writer.prototype.render=function(template,view,partials){var tokens=this.parse(template);var context=view instanceof Context?view:new Context(view);return this.renderTokens(tokens,context,partials,template)};Writer.prototype.renderTokens=function(tokens,context,partials,originalTemplate){var buffer="";var token,symbol,value;for(var i=0,numTokens=tokens.length;i<numTokens;++i){value=undefined;token=tokens[i];symbol=token[0];if(symbol==="#")value=this._renderSection(token,context,partials,originalTemplate);else if(symbol==="^")value=this._renderInverted(token,context,partials,originalTemplate);else if(symbol===">")value=this._renderPartial(token,context,partials,originalTemplate);else if(symbol==="&")value=this._unescapedValue(token,context);else if(symbol==="name")value=this._escapedValue(token,context);else if(symbol==="text")value=this._rawValue(token);if(value!==undefined)buffer+=value}return buffer};Writer.prototype._renderSection=function(token,context,partials,originalTemplate){var self=this;var buffer="";var value=context.lookup(token[1]);function subRender(template){return self.render(template,context,partials)}if(!value)return;if(isArray(value)){for(var j=0,valueLength=value.length;j<valueLength;++j){buffer+=this.renderTokens(token[4],context.push(value[j]),partials,originalTemplate)}}else if(typeof value==="object"||typeof value==="string"||typeof value==="number"){buffer+=this.renderTokens(token[4],context.push(value),partials,originalTemplate)}else if(isFunction(value)){if(typeof originalTemplate!=="string")throw new Error("Cannot use higher-order sections without the original template");value=value.call(context.view,originalTemplate.slice(token[3],token[5]),subRender);if(value!=null)buffer+=value}else{buffer+=this.renderTokens(token[4],context,partials,originalTemplate)}return buffer};Writer.prototype._renderInverted=function(token,context,partials,originalTemplate){var value=context.lookup(token[1]);if(!value||isArray(value)&&value.length===0)return this.renderTokens(token[4],context,partials,originalTemplate)};Writer.prototype._renderPartial=function(token,context,partials){if(!partials)return;var value=isFunction(partials)?partials(token[1]):partials[token[1]];if(value!=null)return this.renderTokens(this.parse(value),context,partials,value)};Writer.prototype._unescapedValue=function(token,context){var value=context.lookup(token[1]);if(value!=null)return value};Writer.prototype._escapedValue=function(token,context){var value=context.lookup(token[1]);if(value!=null)return mustache.escape(value)};Writer.prototype._rawValue=function(token){return token[1]};mustache.name="mustache.js";mustache.version="2.0.0";mustache.tags=["{{","}}"];var defaultWriter=new Writer;mustache.clearCache=function(){return defaultWriter.clearCache()};mustache.parse=function(template,tags){return defaultWriter.parse(template,tags)};mustache.render=function(template,view,partials){return defaultWriter.render(template,view,partials)};mustache.to_html=function(template,view,partials,send){var result=mustache.render(template,view,partials);if(isFunction(send)){send(result)}else{return result}};mustache.escape=escapeHtml;mustache.Scanner=Scanner;mustache.Context=Context;mustache.Writer=Writer});
\ No newline at end of file
diff --git a/docs/search/require.js b/docs/search/require.js
new file mode 100644
index 00000000..8638a310
--- /dev/null
+++ b/docs/search/require.js
@@ -0,0 +1,36 @@
+/*
+ RequireJS 2.1.16 Copyright (c) 2010-2015, The Dojo Foundation All Rights Reserved.
+ Available via the MIT or new BSD license.
+ see: http://github.com/jrburke/requirejs for details
+*/
+var requirejs,require,define;
+(function(ba){function G(b){return"[object Function]"===K.call(b)}function H(b){return"[object Array]"===K.call(b)}function v(b,c){if(b){var d;for(d=0;d<b.length&&(!b[d]||!c(b[d],d,b));d+=1);}}function T(b,c){if(b){var d;for(d=b.length-1;-1<d&&(!b[d]||!c(b[d],d,b));d-=1);}}function t(b,c){return fa.call(b,c)}function m(b,c){return t(b,c)&&b[c]}function B(b,c){for(var d in b)if(t(b,d)&&c(b[d],d))break}function U(b,c,d,e){c&&B(c,function(c,g){if(d||!t(b,g))e&&"object"===typeof c&&c&&!H(c)&&!G(c)&&!(c instanceof
+RegExp)?(b[g]||(b[g]={}),U(b[g],c,d,e)):b[g]=c});return b}function u(b,c){return function(){return c.apply(b,arguments)}}function ca(b){throw b;}function da(b){if(!b)return b;var c=ba;v(b.split("."),function(b){c=c[b]});return c}function C(b,c,d,e){c=Error(c+"\nhttp://requirejs.org/docs/errors.html#"+b);c.requireType=b;c.requireModules=e;d&&(c.originalError=d);return c}function ga(b){function c(a,k,b){var f,l,c,d,e,g,i,p,k=k&&k.split("/"),h=j.map,n=h&&h["*"];if(a){a=a.split("/");l=a.length-1;j.nodeIdCompat&&
+Q.test(a[l])&&(a[l]=a[l].replace(Q,""));"."===a[0].charAt(0)&&k&&(l=k.slice(0,k.length-1),a=l.concat(a));l=a;for(c=0;c<l.length;c++)if(d=l[c],"."===d)l.splice(c,1),c-=1;else if(".."===d&&!(0===c||1==c&&".."===l[2]||".."===l[c-1])&&0<c)l.splice(c-1,2),c-=2;a=a.join("/")}if(b&&h&&(k||n)){l=a.split("/");c=l.length;a:for(;0<c;c-=1){e=l.slice(0,c).join("/");if(k)for(d=k.length;0<d;d-=1)if(b=m(h,k.slice(0,d).join("/")))if(b=m(b,e)){f=b;g=c;break a}!i&&(n&&m(n,e))&&(i=m(n,e),p=c)}!f&&i&&(f=i,g=p);f&&(l.splice(0,
+g,f),a=l.join("/"))}return(f=m(j.pkgs,a))?f:a}function d(a){z&&v(document.getElementsByTagName("script"),function(k){if(k.getAttribute("data-requiremodule")===a&&k.getAttribute("data-requirecontext")===i.contextName)return k.parentNode.removeChild(k),!0})}function e(a){var k=m(j.paths,a);if(k&&H(k)&&1<k.length)return k.shift(),i.require.undef(a),i.makeRequire(null,{skipMap:!0})([a]),!0}function n(a){var k,c=a?a.indexOf("!"):-1;-1<c&&(k=a.substring(0,c),a=a.substring(c+1,a.length));return[k,a]}function p(a,
+k,b,f){var l,d,e=null,g=k?k.name:null,j=a,p=!0,h="";a||(p=!1,a="_@r"+(K+=1));a=n(a);e=a[0];a=a[1];e&&(e=c(e,g,f),d=m(r,e));a&&(e?h=d&&d.normalize?d.normalize(a,function(a){return c(a,g,f)}):-1===a.indexOf("!")?c(a,g,f):a:(h=c(a,g,f),a=n(h),e=a[0],h=a[1],b=!0,l=i.nameToUrl(h)));b=e&&!d&&!b?"_unnormalized"+(O+=1):"";return{prefix:e,name:h,parentMap:k,unnormalized:!!b,url:l,originalName:j,isDefine:p,id:(e?e+"!"+h:h)+b}}function s(a){var k=a.id,b=m(h,k);b||(b=h[k]=new i.Module(a));return b}function q(a,
+k,b){var f=a.id,c=m(h,f);if(t(r,f)&&(!c||c.defineEmitComplete))"defined"===k&&b(r[f]);else if(c=s(a),c.error&&"error"===k)b(c.error);else c.on(k,b)}function w(a,b){var c=a.requireModules,f=!1;if(b)b(a);else if(v(c,function(b){if(b=m(h,b))b.error=a,b.events.error&&(f=!0,b.emit("error",a))}),!f)g.onError(a)}function x(){R.length&&(ha.apply(A,[A.length,0].concat(R)),R=[])}function y(a){delete h[a];delete V[a]}function F(a,b,c){var f=a.map.id;a.error?a.emit("error",a.error):(b[f]=!0,v(a.depMaps,function(f,
+d){var e=f.id,g=m(h,e);g&&(!a.depMatched[d]&&!c[e])&&(m(b,e)?(a.defineDep(d,r[e]),a.check()):F(g,b,c))}),c[f]=!0)}function D(){var a,b,c=(a=1E3*j.waitSeconds)&&i.startTime+a<(new Date).getTime(),f=[],l=[],g=!1,h=!0;if(!W){W=!0;B(V,function(a){var i=a.map,j=i.id;if(a.enabled&&(i.isDefine||l.push(a),!a.error))if(!a.inited&&c)e(j)?g=b=!0:(f.push(j),d(j));else if(!a.inited&&(a.fetched&&i.isDefine)&&(g=!0,!i.prefix))return h=!1});if(c&&f.length)return a=C("timeout","Load timeout for modules: "+f,null,
+f),a.contextName=i.contextName,w(a);h&&v(l,function(a){F(a,{},{})});if((!c||b)&&g)if((z||ea)&&!X)X=setTimeout(function(){X=0;D()},50);W=!1}}function E(a){t(r,a[0])||s(p(a[0],null,!0)).init(a[1],a[2])}function I(a){var a=a.currentTarget||a.srcElement,b=i.onScriptLoad;a.detachEvent&&!Y?a.detachEvent("onreadystatechange",b):a.removeEventListener("load",b,!1);b=i.onScriptError;(!a.detachEvent||Y)&&a.removeEventListener("error",b,!1);return{node:a,id:a&&a.getAttribute("data-requiremodule")}}function J(){var a;
+for(x();A.length;){a=A.shift();if(null===a[0])return w(C("mismatch","Mismatched anonymous define() module: "+a[a.length-1]));E(a)}}var W,Z,i,L,X,j={waitSeconds:7,baseUrl:"./",paths:{},bundles:{},pkgs:{},shim:{},config:{}},h={},V={},$={},A=[],r={},S={},aa={},K=1,O=1;L={require:function(a){return a.require?a.require:a.require=i.makeRequire(a.map)},exports:function(a){a.usingExports=!0;if(a.map.isDefine)return a.exports?r[a.map.id]=a.exports:a.exports=r[a.map.id]={}},module:function(a){return a.module?
+a.module:a.module={id:a.map.id,uri:a.map.url,config:function(){return m(j.config,a.map.id)||{}},exports:a.exports||(a.exports={})}}};Z=function(a){this.events=m($,a.id)||{};this.map=a;this.shim=m(j.shim,a.id);this.depExports=[];this.depMaps=[];this.depMatched=[];this.pluginMaps={};this.depCount=0};Z.prototype={init:function(a,b,c,f){f=f||{};if(!this.inited){this.factory=b;if(c)this.on("error",c);else this.events.error&&(c=u(this,function(a){this.emit("error",a)}));this.depMaps=a&&a.slice(0);this.errback=
+c;this.inited=!0;this.ignore=f.ignore;f.enabled||this.enabled?this.enable():this.check()}},defineDep:function(a,b){this.depMatched[a]||(this.depMatched[a]=!0,this.depCount-=1,this.depExports[a]=b)},fetch:function(){if(!this.fetched){this.fetched=!0;i.startTime=(new Date).getTime();var a=this.map;if(this.shim)i.makeRequire(this.map,{enableBuildCallback:!0})(this.shim.deps||[],u(this,function(){return a.prefix?this.callPlugin():this.load()}));else return a.prefix?this.callPlugin():this.load()}},load:function(){var a=
+this.map.url;S[a]||(S[a]=!0,i.load(this.map.id,a))},check:function(){if(this.enabled&&!this.enabling){var a,b,c=this.map.id;b=this.depExports;var f=this.exports,l=this.factory;if(this.inited)if(this.error)this.emit("error",this.error);else{if(!this.defining){this.defining=!0;if(1>this.depCount&&!this.defined){if(G(l)){if(this.events.error&&this.map.isDefine||g.onError!==ca)try{f=i.execCb(c,l,b,f)}catch(d){a=d}else f=i.execCb(c,l,b,f);this.map.isDefine&&void 0===f&&((b=this.module)?f=b.exports:this.usingExports&&
+(f=this.exports));if(a)return a.requireMap=this.map,a.requireModules=this.map.isDefine?[this.map.id]:null,a.requireType=this.map.isDefine?"define":"require",w(this.error=a)}else f=l;this.exports=f;if(this.map.isDefine&&!this.ignore&&(r[c]=f,g.onResourceLoad))g.onResourceLoad(i,this.map,this.depMaps);y(c);this.defined=!0}this.defining=!1;this.defined&&!this.defineEmitted&&(this.defineEmitted=!0,this.emit("defined",this.exports),this.defineEmitComplete=!0)}}else this.fetch()}},callPlugin:function(){var a=
+this.map,b=a.id,d=p(a.prefix);this.depMaps.push(d);q(d,"defined",u(this,function(f){var l,d;d=m(aa,this.map.id);var e=this.map.name,P=this.map.parentMap?this.map.parentMap.name:null,n=i.makeRequire(a.parentMap,{enableBuildCallback:!0});if(this.map.unnormalized){if(f.normalize&&(e=f.normalize(e,function(a){return c(a,P,!0)})||""),f=p(a.prefix+"!"+e,this.map.parentMap),q(f,"defined",u(this,function(a){this.init([],function(){return a},null,{enabled:!0,ignore:!0})})),d=m(h,f.id)){this.depMaps.push(f);
+if(this.events.error)d.on("error",u(this,function(a){this.emit("error",a)}));d.enable()}}else d?(this.map.url=i.nameToUrl(d),this.load()):(l=u(this,function(a){this.init([],function(){return a},null,{enabled:!0})}),l.error=u(this,function(a){this.inited=!0;this.error=a;a.requireModules=[b];B(h,function(a){0===a.map.id.indexOf(b+"_unnormalized")&&y(a.map.id)});w(a)}),l.fromText=u(this,function(f,c){var d=a.name,e=p(d),P=M;c&&(f=c);P&&(M=!1);s(e);t(j.config,b)&&(j.config[d]=j.config[b]);try{g.exec(f)}catch(h){return w(C("fromtexteval",
+"fromText eval for "+b+" failed: "+h,h,[b]))}P&&(M=!0);this.depMaps.push(e);i.completeLoad(d);n([d],l)}),f.load(a.name,n,l,j))}));i.enable(d,this);this.pluginMaps[d.id]=d},enable:function(){V[this.map.id]=this;this.enabling=this.enabled=!0;v(this.depMaps,u(this,function(a,b){var c,f;if("string"===typeof a){a=p(a,this.map.isDefine?this.map:this.map.parentMap,!1,!this.skipMap);this.depMaps[b]=a;if(c=m(L,a.id)){this.depExports[b]=c(this);return}this.depCount+=1;q(a,"defined",u(this,function(a){this.defineDep(b,
+a);this.check()}));this.errback?q(a,"error",u(this,this.errback)):this.events.error&&q(a,"error",u(this,function(a){this.emit("error",a)}))}c=a.id;f=h[c];!t(L,c)&&(f&&!f.enabled)&&i.enable(a,this)}));B(this.pluginMaps,u(this,function(a){var b=m(h,a.id);b&&!b.enabled&&i.enable(a,this)}));this.enabling=!1;this.check()},on:function(a,b){var c=this.events[a];c||(c=this.events[a]=[]);c.push(b)},emit:function(a,b){v(this.events[a],function(a){a(b)});"error"===a&&delete this.events[a]}};i={config:j,contextName:b,
+registry:h,defined:r,urlFetched:S,defQueue:A,Module:Z,makeModuleMap:p,nextTick:g.nextTick,onError:w,configure:function(a){a.baseUrl&&"/"!==a.baseUrl.charAt(a.baseUrl.length-1)&&(a.baseUrl+="/");var b=j.shim,c={paths:!0,bundles:!0,config:!0,map:!0};B(a,function(a,b){c[b]?(j[b]||(j[b]={}),U(j[b],a,!0,!0)):j[b]=a});a.bundles&&B(a.bundles,function(a,b){v(a,function(a){a!==b&&(aa[a]=b)})});a.shim&&(B(a.shim,function(a,c){H(a)&&(a={deps:a});if((a.exports||a.init)&&!a.exportsFn)a.exportsFn=i.makeShimExports(a);
+b[c]=a}),j.shim=b);a.packages&&v(a.packages,function(a){var b,a="string"===typeof a?{name:a}:a;b=a.name;a.location&&(j.paths[b]=a.location);j.pkgs[b]=a.name+"/"+(a.main||"main").replace(ia,"").replace(Q,"")});B(h,function(a,b){!a.inited&&!a.map.unnormalized&&(a.map=p(b))});if(a.deps||a.callback)i.require(a.deps||[],a.callback)},makeShimExports:function(a){return function(){var b;a.init&&(b=a.init.apply(ba,arguments));return b||a.exports&&da(a.exports)}},makeRequire:function(a,e){function j(c,d,m){var n,
+q;e.enableBuildCallback&&(d&&G(d))&&(d.__requireJsBuild=!0);if("string"===typeof c){if(G(d))return w(C("requireargs","Invalid require call"),m);if(a&&t(L,c))return L[c](h[a.id]);if(g.get)return g.get(i,c,a,j);n=p(c,a,!1,!0);n=n.id;return!t(r,n)?w(C("notloaded",'Module name "'+n+'" has not been loaded yet for context: '+b+(a?"":". Use require([])"))):r[n]}J();i.nextTick(function(){J();q=s(p(null,a));q.skipMap=e.skipMap;q.init(c,d,m,{enabled:!0});D()});return j}e=e||{};U(j,{isBrowser:z,toUrl:function(b){var d,
+e=b.lastIndexOf("."),k=b.split("/")[0];if(-1!==e&&(!("."===k||".."===k)||1<e))d=b.substring(e,b.length),b=b.substring(0,e);return i.nameToUrl(c(b,a&&a.id,!0),d,!0)},defined:function(b){return t(r,p(b,a,!1,!0).id)},specified:function(b){b=p(b,a,!1,!0).id;return t(r,b)||t(h,b)}});a||(j.undef=function(b){x();var c=p(b,a,!0),e=m(h,b);d(b);delete r[b];delete S[c.url];delete $[b];T(A,function(a,c){a[0]===b&&A.splice(c,1)});e&&(e.events.defined&&($[b]=e.events),y(b))});return j},enable:function(a){m(h,a.id)&&
+s(a).enable()},completeLoad:function(a){var b,c,d=m(j.shim,a)||{},g=d.exports;for(x();A.length;){c=A.shift();if(null===c[0]){c[0]=a;if(b)break;b=!0}else c[0]===a&&(b=!0);E(c)}c=m(h,a);if(!b&&!t(r,a)&&c&&!c.inited){if(j.enforceDefine&&(!g||!da(g)))return e(a)?void 0:w(C("nodefine","No define call for "+a,null,[a]));E([a,d.deps||[],d.exportsFn])}D()},nameToUrl:function(a,b,c){var d,e,h;(d=m(j.pkgs,a))&&(a=d);if(d=m(aa,a))return i.nameToUrl(d,b,c);if(g.jsExtRegExp.test(a))d=a+(b||"");else{d=j.paths;
+a=a.split("/");for(e=a.length;0<e;e-=1)if(h=a.slice(0,e).join("/"),h=m(d,h)){H(h)&&(h=h[0]);a.splice(0,e,h);break}d=a.join("/");d+=b||(/^data\:|\?/.test(d)||c?"":".js");d=("/"===d.charAt(0)||d.match(/^[\w\+\.\-]+:/)?"":j.baseUrl)+d}return j.urlArgs?d+((-1===d.indexOf("?")?"?":"&")+j.urlArgs):d},load:function(a,b){g.load(i,a,b)},execCb:function(a,b,c,d){return b.apply(d,c)},onScriptLoad:function(a){if("load"===a.type||ja.test((a.currentTarget||a.srcElement).readyState))N=null,a=I(a),i.completeLoad(a.id)},
+onScriptError:function(a){var b=I(a);if(!e(b.id))return w(C("scripterror","Script error for: "+b.id,a,[b.id]))}};i.require=i.makeRequire();return i}var g,x,y,D,I,E,N,J,s,O,ka=/(\/\*([\s\S]*?)\*\/|([^:]|^)\/\/(.*)$)/mg,la=/[^.]\s*require\s*\(\s*["']([^'"\s]+)["']\s*\)/g,Q=/\.js$/,ia=/^\.\//;x=Object.prototype;var K=x.toString,fa=x.hasOwnProperty,ha=Array.prototype.splice,z=!!("undefined"!==typeof window&&"undefined"!==typeof navigator&&window.document),ea=!z&&"undefined"!==typeof importScripts,ja=
+z&&"PLAYSTATION 3"===navigator.platform?/^complete$/:/^(complete|loaded)$/,Y="undefined"!==typeof opera&&"[object Opera]"===opera.toString(),F={},q={},R=[],M=!1;if("undefined"===typeof define){if("undefined"!==typeof requirejs){if(G(requirejs))return;q=requirejs;requirejs=void 0}"undefined"!==typeof require&&!G(require)&&(q=require,require=void 0);g=requirejs=function(b,c,d,e){var n,p="_";!H(b)&&"string"!==typeof b&&(n=b,H(c)?(b=c,c=d,d=e):b=[]);n&&n.context&&(p=n.context);(e=m(F,p))||(e=F[p]=g.s.newContext(p));
+n&&e.configure(n);return e.require(b,c,d)};g.config=function(b){return g(b)};g.nextTick="undefined"!==typeof setTimeout?function(b){setTimeout(b,4)}:function(b){b()};require||(require=g);g.version="2.1.16";g.jsExtRegExp=/^\/|:|\?|\.js$/;g.isBrowser=z;x=g.s={contexts:F,newContext:ga};g({});v(["toUrl","undef","defined","specified"],function(b){g[b]=function(){var c=F._;return c.require[b].apply(c,arguments)}});if(z&&(y=x.head=document.getElementsByTagName("head")[0],D=document.getElementsByTagName("base")[0]))y=
+x.head=D.parentNode;g.onError=ca;g.createNode=function(b){var c=b.xhtml?document.createElementNS("http://www.w3.org/1999/xhtml","html:script"):document.createElement("script");c.type=b.scriptType||"text/javascript";c.charset="utf-8";c.async=!0;return c};g.load=function(b,c,d){var e=b&&b.config||{};if(z)return e=g.createNode(e,c,d),e.setAttribute("data-requirecontext",b.contextName),e.setAttribute("data-requiremodule",c),e.attachEvent&&!(e.attachEvent.toString&&0>e.attachEvent.toString().indexOf("[native code"))&&
+!Y?(M=!0,e.attachEvent("onreadystatechange",b.onScriptLoad)):(e.addEventListener("load",b.onScriptLoad,!1),e.addEventListener("error",b.onScriptError,!1)),e.src=d,J=e,D?y.insertBefore(e,D):y.appendChild(e),J=null,e;if(ea)try{importScripts(d),b.completeLoad(c)}catch(m){b.onError(C("importscripts","importScripts failed for "+c+" at "+d,m,[c]))}};z&&!q.skipDataMain&&T(document.getElementsByTagName("script"),function(b){y||(y=b.parentNode);if(I=b.getAttribute("data-main"))return s=I,q.baseUrl||(E=s.split("/"),
+s=E.pop(),O=E.length?E.join("/")+"/":"./",q.baseUrl=O),s=s.replace(Q,""),g.jsExtRegExp.test(s)&&(s=I),q.deps=q.deps?q.deps.concat(s):[s],!0});define=function(b,c,d){var e,g;"string"!==typeof b&&(d=c,c=b,b=null);H(c)||(d=c,c=null);!c&&G(d)&&(c=[],d.length&&(d.toString().replace(ka,"").replace(la,function(b,d){c.push(d)}),c=(1===d.length?["require"]:["require","exports","module"]).concat(c)));if(M){if(!(e=J))N&&"interactive"===N.readyState||T(document.getElementsByTagName("script"),function(b){if("interactive"===
+b.readyState)return N=b}),e=N;e&&(b||(b=e.getAttribute("data-requiremodule")),g=F[e.getAttribute("data-requirecontext")])}(g?g.defQueue:R).push([b,c,d])};define.amd={jQuery:!0};g.exec=function(b){return eval(b)};g(q)}})(this);
diff --git a/docs/search/search-results-template.mustache b/docs/search/search-results-template.mustache
new file mode 100644
index 00000000..a8b3862f
--- /dev/null
+++ b/docs/search/search-results-template.mustache
@@ -0,0 +1,4 @@
+<article>
+  <h3><a href="{{location}}">{{title}}</a></h3>
+  <p>{{summary}}</p>
+</article>
diff --git a/docs/search/search.js b/docs/search/search.js
new file mode 100644
index 00000000..2283930c
--- /dev/null
+++ b/docs/search/search.js
@@ -0,0 +1,92 @@
+require.config({
+   baseUrl: base_url + "/search/"
+});
+
+require([
+    'mustache.min',
+    'lunr.min',
+    'text!search-results-template.mustache',
+    'text!search_index.json',
+], function (Mustache, lunr, results_template, data) {
+   "use strict";
+
+    function getSearchTerm()
+    {
+        var sPageURL = window.location.search.substring(1);
+        var sURLVariables = sPageURL.split('&');
+        for (var i = 0; i < sURLVariables.length; i++)
+        {
+            var sParameterName = sURLVariables[i].split('=');
+            if (sParameterName[0] == 'q')
+            {
+                return decodeURIComponent(sParameterName[1].replace(/\+/g, '%20'));
+            }
+        }
+    }
+
+    var index = lunr(function () {
+        this.field('title', {boost: 10});
+        this.field('text');
+        this.ref('location');
+    });
+
+    data = JSON.parse(data);
+    var documents = {};
+
+    for (var i=0; i < data.docs.length; i++){
+        var doc = data.docs[i];
+        doc.location = base_url + doc.location;
+        index.add(doc);
+        documents[doc.location] = doc;
+    }
+
+    var search = function(){
+
+        var query = document.getElementById('mkdocs-search-query').value;
+        var search_results = document.getElementById("mkdocs-search-results");
+        while (search_results.firstChild) {
+            search_results.removeChild(search_results.firstChild);
+        }
+
+        if(query === ''){
+            return;
+        }
+
+        var results = index.search(query);
+
+        if (results.length > 0){
+            for (var i=0; i < results.length; i++){
+                var result = results[i];
+                doc = documents[result.ref];
+                doc.base_url = base_url;
+                doc.summary = doc.text.substring(0, 200);
+                var html = Mustache.to_html(results_template, doc);
+                search_results.insertAdjacentHTML('beforeend', html);
+            }
+        } else {
+            search_results.insertAdjacentHTML('beforeend', "<p>No results found</p>");
+        }
+
+        if(jQuery){
+            /*
+             * We currently only automatically hide bootstrap models. This
+             * requires jQuery to work.
+             */
+            jQuery('#mkdocs_search_modal a').click(function(){
+                jQuery('#mkdocs_search_modal').modal('hide');
+            });
+        }
+
+    };
+
+    var search_input = document.getElementById('mkdocs-search-query');
+
+    var term = getSearchTerm();
+    if (term){
+        search_input.value = term;
+        search();
+    }
+
+    if (search_input){search_input.addEventListener("keyup", search);}
+
+});
diff --git a/docs/search/search_index.json b/docs/search/search_index.json
index ee592d04..c76d2153 100644
--- a/docs/search/search_index.json
+++ b/docs/search/search_index.json
@@ -1 +1,249 @@
-{"config":{"lang":["en"],"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Consider TPOT your Data Science Assistant . TPOT is a Python Automated Machine Learning tool that optimizes machine learning pipelines using genetic programming. TPOT will automate the most tedious part of machine learning by intelligently exploring thousands of possible pipelines to find the best one for your data. An example machine learning pipeline Once TPOT is finished searching (or you get tired of waiting), it provides you with the Python code for the best pipeline it found so you can tinker with the pipeline from there. An example TPOT pipeline TPOT is built on top of scikit-learn, so all of the code it generates should look familiar... if you're familiar with scikit-learn, anyway. TPOT is still under active development and we encourage you to check back on this repository regularly for updates.","title":"Home"},{"location":"api/","text":"Classification class tpot. TPOTClassifier ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='accuracy', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised classification tasks. The TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTClassifier will also search over the hyperparameters of all objects in the pipeline. By default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters. However, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='accuracy') Function used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used: 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc' If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a StratifiedKFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets. max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTClassifier configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: pareto_front_fitted_pipelines_ is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Functions fit (features, classes[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the classes for a feature set. predict_proba (features) Use the optimized pipeline to estimate the class probabilities for a feature set. score (testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, classes, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. classes : array-like {n_samples} List of class labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the classes for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted classes for the samples in the feature matrix predict_proba(features) Use the optimized pipeline to estimate the class probabilities for a feature set. Note: This function will only work for pipelines whose final classifier supports the predict_proba function. TPOT will raise an error otherwise. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples, n_classes} The class probabilities of the input samples score(testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'accuracy'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_classes : array-like {n_samples} List of class labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything Regression class tpot. TPOTRegressor ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='neg_mean_squared_error', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised regression tasks. The TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTRegressor will also search over the hyperparameters of all objects in the pipeline. By default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters. However, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='neg_mean_squared_error') Function used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used: 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2' Note that we recommend using the neg version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric. If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a KFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTRegressor configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: _pareto_front_fitted_pipelines is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split digits = load_boston() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Functions fit (features, target[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the target values for a feature set. score (testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, target, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. target : array-like {n_samples} List of target labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the target values for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted target values for the samples in the feature matrix score(testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'mean_squared_error'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_target : array-like {n_samples} List of target labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything","title":"TPOT API"},{"location":"api/#classification","text":"class tpot. TPOTClassifier ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='accuracy', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised classification tasks. The TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTClassifier will also search over the hyperparameters of all objects in the pipeline. By default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters. However, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='accuracy') Function used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used: 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc' If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a StratifiedKFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets. max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTClassifier configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: pareto_front_fitted_pipelines_ is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Functions fit (features, classes[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the classes for a feature set. predict_proba (features) Use the optimized pipeline to estimate the class probabilities for a feature set. score (testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, classes, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. classes : array-like {n_samples} List of class labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the classes for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted classes for the samples in the feature matrix predict_proba(features) Use the optimized pipeline to estimate the class probabilities for a feature set. Note: This function will only work for pipelines whose final classifier supports the predict_proba function. TPOT will raise an error otherwise. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples, n_classes} The class probabilities of the input samples score(testing_features, testing_classes) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'accuracy'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_classes : array-like {n_samples} List of class labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything","title":"Classification"},{"location":"api/#regression","text":"class tpot. TPOTRegressor ( generations =100, population_size =100, offspring_size =None, mutation_rate =0.9, crossover_rate =0.1, scoring ='neg_mean_squared_error', cv =5, subsample =1.0, n_jobs =1, max_time_mins =None, max_eval_time_mins =5, random_state =None, config_dict =None, template =None, warm_start =False, memory =None, use_dask =False, periodic_checkpoint_folder =None, early_stop =None, verbosity =0, disable_update_check =False ) source Automated machine learning for supervised regression tasks. The TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models, preprocessors, feature selection techniques, and any other estimator or transformer that follows the scikit-learn API . The TPOTRegressor will also search over the hyperparameters of all objects in the pipeline. By default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters. However, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the config_dict parameter. Read more in the User Guide . Parameters: generations : int, optional (default=100) Number of iterations to the run pipeline optimization process. Must be a positive number. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate population_size + generations \u00d7 offspring_size pipelines in total. population_size : int, optional (default=100) Number of individuals to retain in the genetic programming population every generation. Must be a positive number. Generally, TPOT will work better when you give it more individuals with which to optimize the pipeline. offspring_size : int, optional (default=None) Number of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size. mutation_rate : float, optional (default=0.9) Mutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. crossover_rate : float, optional (default=0.1) Crossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation. mutation_rate + crossover_rate cannot exceed 1.0. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. scoring : string or callable, optional (default='neg_mean_squared_error') Function used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used: 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2' Note that we recommend using the neg version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric. If you would like to use a custom scorer, you can pass the callable object/function with signature scorer(estimator, X, y) . If you would like to use a metric function, you can pass the callable function to this parameter with the signature score_func(y_true, y_pred) . TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized, whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. See the section on scoring functions for more details. cv : int, cross-validation generator, or an iterable, optional (default=5) Cross-validation strategy used when evaluating pipelines. Possible inputs: integer, to specify the number of folds in a KFold, An object to be used as a cross-validation generator, or An iterable yielding train/test splits. subsample : float, optional (default=1.0) Fraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. Setting subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process. n_jobs : integer, optional (default=1) Number of processes to use in parallel for evaluating pipelines during the TPOT optimization process. Setting n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets max_time_mins : integer or None, optional (default=None) How many minutes TPOT has to optimize the pipeline. If not None, this setting will override the generations parameter and allow TPOT to run until max_time_mins minutes elapse. max_eval_time_mins : float, optional (default=5) How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines. random_state : integer or None, optional (default=None) The seed of the pseudo random number generator used in TPOT. Use this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed. config_dict : Python dictionary, string, or None, optional (default=None) A configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. Possible inputs are: Python dictionary, TPOT will use your custom configuration, string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or None, TPOT will use the default TPOTRegressor configuration. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. template : string (default=None) Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. warm_start : boolean, optional (default=False) Flag indicating whether the TPOT instance will reuse the population from previous calls to fit() . Setting warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off. memory : a joblib.Memory object or string, optional (default=None) If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in scikit-learn documentation Possible inputs are: String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or None, TPOT does not use memory caching. use_dask : boolean, optional (default: False) Whether to use Dask-ML's pipeline optimiziations. This avoid re-fitting the same estimator on the same split of data multiple times. It will also provide more detailed diagnostics when using Dask's distributed scheduler. See avoid repeated work for more details. periodic_checkpoint_folder : path string, optional (default: None) If supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. Currently once per generation but not more often than once per 30 seconds. Useful in multiple cases: Sudden death before TPOT could save optimized pipeline Track its progress Grab pipelines while it's still optimizing early_stop : integer, optional (default: None) How many generations TPOT checks whether there is no improvement in optimization process. Ends the optimization process if there is no improvement in the given number of generations. verbosity : integer, optional (default=0) How much information TPOT communicates while it's running. Possible inputs are: 0, TPOT will print nothing, 1, TPOT will print minimal information, 2, TPOT will print more information and provide a progress bar, or 3, TPOT will print everything and provide a progress bar. disable_update_check : boolean, optional (default=False) Flag indicating whether the TPOT version checker should be disabled. The update checker will tell you when a new version of TPOT has been released. Attributes: fitted_pipeline_ : scikit-learn Pipeline object The best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset. pareto_front_fitted_pipelines_ : Python dictionary Dictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. The TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. Note: _pareto_front_fitted_pipelines is only available when verbosity =3. evaluated_individuals_ : Python dictionary Dictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). This attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated. Example from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split digits = load_boston() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Functions fit (features, target[, sample_weight, groups]) Run the TPOT optimization process on the given training data. predict (features) Use the optimized pipeline to predict the target values for a feature set. score (testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. export (output_file_name) Export the optimized pipeline as Python code. fit(features, target, sample_weight=None, groups=None) Run the TPOT optimization process on the given training data. Uses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples. Parameters: features : array-like {n_samples, n_features} Feature matrix TPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values. As such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed) using median value imputation . If you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT. target : array-like {n_samples} List of target labels for prediction sample_weight : array-like {n_samples}, optional Per-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines. groups : array-like, with shape {n_samples, }, optional Group labels for the samples used when performing cross-validation. This parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as sklearn.model_selection.GroupKFold . Returns: self : object Returns a copy of the fitted TPOT object predict(features) Use the optimized pipeline to predict the target values for a feature set. Parameters: features : array-like {n_samples, n_features} Feature matrix Returns: predictions : array-like {n_samples} Predicted target values for the samples in the feature matrix score(testing_features, testing_target) Returns the optimized pipeline's score on the given testing data using the user-specified scoring function. The default scoring function for TPOTClassifier is 'mean_squared_error'. Parameters: testing_features : array-like {n_samples, n_features} Feature matrix of the testing set testing_target : array-like {n_samples} List of target labels for prediction in the testing set Returns: accuracy_score : float The estimated test set accuracy according to the user-specified scoring function. export(output_file_name) Export the optimized pipeline as Python code. See the usage documentation for example usage of the export function. Parameters: output_file_name : string String containing the path and file name of the desired output file Returns: Does not return anything","title":"Regression"},{"location":"citing/","text":"If you use TPOT in a scientific publication, please consider citing at least one of the following papers: Randal S. Olson, Ryan J. Urbanowicz, Peter C. Andrews, Nicole A. Lavender, La Creis Kidd, and Jason H. Moore (2016). Automating biomedical data science through tree-based pipeline optimization . Applications of Evolutionary Computation , pages 123-137. BibTeX entry: @inbook{Olson2016EvoBio, author={Olson, Randal S. and Urbanowicz, Ryan J. and Andrews, Peter C. and Lavender, Nicole A. and Kidd, La Creis and Moore, Jason H.}, editor={Squillero, Giovanni and Burelli, Paolo}, chapter={Automating Biomedical Data Science Through Tree-Based Pipeline Optimization}, title={Applications of Evolutionary Computation: 19th European Conference, EvoApplications 2016, Porto, Portugal, March 30 -- April 1, 2016, Proceedings, Part I}, year={2016}, publisher={Springer International Publishing}, pages={123--137}, isbn={978-3-319-31204-0}, doi={10.1007/978-3-319-31204-0_9}, url={http://dx.doi.org/10.1007/978-3-319-31204-0_9} } Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science Randal S. Olson, Nathan Bartley, Ryan J. Urbanowicz, and Jason H. Moore (2016). Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science . Proceedings of GECCO 2016 , pages 485-492. BibTeX entry: @inproceedings{OlsonGECCO2016, author = {Olson, Randal S. and Bartley, Nathan and Urbanowicz, Ryan J. and Moore, Jason H.}, title = {Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science}, booktitle = {Proceedings of the Genetic and Evolutionary Computation Conference 2016}, series = {GECCO '16}, year = {2016}, isbn = {978-1-4503-4206-3}, location = {Denver, Colorado, USA}, pages = {485--492}, numpages = {8}, url = {http://doi.acm.org/10.1145/2908812.2908918}, doi = {10.1145/2908812.2908918}, acmid = {2908918}, publisher = {ACM}, address = {New York, NY, USA}, } Alternatively, you can cite the repository directly with the following DOI:","title":"Citing"},{"location":"contributing/","text":"We welcome you to check the existing issues for bugs or enhancements to work on. If you have an idea for an extension to TPOT, please file a new issue so we can discuss it. Project layout The latest stable release of TPOT is on the master branch , whereas the latest version of TPOT in development is on the development branch . Make sure you are looking at and working on the correct branch if you're looking to contribute code. In terms of directory structure: All of TPOT's code sources are in the tpot directory The documentation sources are in the docs_sources directory Images in the documentation are in the images directory Tutorials for TPOT are in the tutorials directory Unit tests for TPOT are in the tests.py file Make sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the development branch. How to contribute The preferred way to contribute to TPOT is to fork the main repository on GitHub: Fork the project repository : click on the 'Fork' button near the top of the page. This creates a copy of the code under your account on the GitHub server. Clone this copy to your local disk: $ git clone git@github.com:YourUsername/tpot.git $ cd tpot Create a branch to hold your changes: $ git checkout -b my-contribution Make sure your local environment is setup correctly for development. Installation instructions are almost identical to the user instructions except that TPOT should not be installed. If you have TPOT installed on your computer then make sure you are using a virtual environment that does not have TPOT installed. Furthermore, you should make sure you have installed the nose package into your development environment so that you can test changes locally. $ conda install nose Start making changes on your newly created branch, remembering to never work on the master branch! Work on this copy on your computer using Git to do the version control. Once some changes are saved locally, you can use your tweaked version of TPOT by navigating to the project's base directory and running TPOT directly from the command line: $ python -m tpot.driver or by running script that imports and uses the TPOT module with code similar to from tpot import TPOTClassifier To check your changes haven't broken any existing tests and to check new tests you've added pass run the following (note, you must have the nose package installed within your dev environment for this to work): $ nosetests -s -v When you're done editing and local testing, run: $ git add modified_files $ git commit to record your changes in Git, then push them to GitHub with: $ git push -u origin my-contribution Finally, go to the web page of your fork of the TPOT repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. Make sure that you send your PR to the development branch, as the master branch is reserved for the latest stable release. This will start the CI server to check all the project's unit tests run and send an email to the maintainers. (If any of the above seems like magic to you, then look up the Git documentation on the web.) Before submitting your pull request Before you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes. If your contribution changes TPOT in any way: Update the documentation so all of your changes are reflected there. Update the README if anything there has changed. If your contribution involves any code changes: Update the project unit tests to test your code changes. Make sure that your code is properly commented with docstrings and comments explaining your rationale behind non-obvious coding practices. If your code affected any of the pipeline operators, make sure that the corresponding export functionality reflects those changes. If your contribution requires a new library dependency: Double-check that the new dependency is easy to install via pip or Anaconda and supports both Python 2 and 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep TPOT easy to install. Add the required version of the library to .travis.yml Add a line to pip install the library to .travis_install.sh Add a line to print the version of the library to .travis_install.sh Similarly add a line to print the version of the library to .travis_test.sh After submitting your pull request After submitting your pull request, Travis-CI will automatically run unit tests on your changes and make sure that your updated code builds and runs on Python 2 and 3. We also use services that automatically check code quality and test coverage. Check back shortly after submitting your pull request to make sure that your code passes these checks. If any of the checks come back with a red X, then do your best to address the errors.","title":"Contributing"},{"location":"contributing/#project-layout","text":"The latest stable release of TPOT is on the master branch , whereas the latest version of TPOT in development is on the development branch . Make sure you are looking at and working on the correct branch if you're looking to contribute code. In terms of directory structure: All of TPOT's code sources are in the tpot directory The documentation sources are in the docs_sources directory Images in the documentation are in the images directory Tutorials for TPOT are in the tutorials directory Unit tests for TPOT are in the tests.py file Make sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the development branch.","title":"Project layout"},{"location":"contributing/#how-to-contribute","text":"The preferred way to contribute to TPOT is to fork the main repository on GitHub: Fork the project repository : click on the 'Fork' button near the top of the page. This creates a copy of the code under your account on the GitHub server. Clone this copy to your local disk: $ git clone git@github.com:YourUsername/tpot.git $ cd tpot Create a branch to hold your changes: $ git checkout -b my-contribution Make sure your local environment is setup correctly for development. Installation instructions are almost identical to the user instructions except that TPOT should not be installed. If you have TPOT installed on your computer then make sure you are using a virtual environment that does not have TPOT installed. Furthermore, you should make sure you have installed the nose package into your development environment so that you can test changes locally. $ conda install nose Start making changes on your newly created branch, remembering to never work on the master branch! Work on this copy on your computer using Git to do the version control. Once some changes are saved locally, you can use your tweaked version of TPOT by navigating to the project's base directory and running TPOT directly from the command line: $ python -m tpot.driver or by running script that imports and uses the TPOT module with code similar to from tpot import TPOTClassifier To check your changes haven't broken any existing tests and to check new tests you've added pass run the following (note, you must have the nose package installed within your dev environment for this to work): $ nosetests -s -v When you're done editing and local testing, run: $ git add modified_files $ git commit to record your changes in Git, then push them to GitHub with: $ git push -u origin my-contribution Finally, go to the web page of your fork of the TPOT repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. Make sure that you send your PR to the development branch, as the master branch is reserved for the latest stable release. This will start the CI server to check all the project's unit tests run and send an email to the maintainers. (If any of the above seems like magic to you, then look up the Git documentation on the web.)","title":"How to contribute"},{"location":"contributing/#before-submitting-your-pull-request","text":"Before you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes. If your contribution changes TPOT in any way: Update the documentation so all of your changes are reflected there. Update the README if anything there has changed. If your contribution involves any code changes: Update the project unit tests to test your code changes. Make sure that your code is properly commented with docstrings and comments explaining your rationale behind non-obvious coding practices. If your code affected any of the pipeline operators, make sure that the corresponding export functionality reflects those changes. If your contribution requires a new library dependency: Double-check that the new dependency is easy to install via pip or Anaconda and supports both Python 2 and 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep TPOT easy to install. Add the required version of the library to .travis.yml Add a line to pip install the library to .travis_install.sh Add a line to print the version of the library to .travis_install.sh Similarly add a line to print the version of the library to .travis_test.sh","title":"Before submitting your pull request"},{"location":"contributing/#after-submitting-your-pull-request","text":"After submitting your pull request, Travis-CI will automatically run unit tests on your changes and make sure that your updated code builds and runs on Python 2 and 3. We also use services that automatically check code quality and test coverage. Check back shortly after submitting your pull request to make sure that your code passes these checks. If any of the checks come back with a red X, then do your best to address the errors.","title":"After submitting your pull request"},{"location":"examples/","text":"Overview The following sections illustrate the usage of TPOT with various datasets, each belonging to a typical class of machine learning tasks. Dataset Task Task class Dataset description Jupyter notebook Iris flower classification classification link link MNIST digit recognition (image) classification link link Boston housing prices modeling regression link N/A Titanic survival analysis classification link link Bank Marketing subscription prediction classification link link MAGIC Gamma Telescope event detection classification link link Notes: - For details on how the fit() , score() and export() methods work, refer to the usage documentation . - Upon re-running the experiments, your resulting pipelines may differ (to some extent) from the ones demonstrated here. Iris flower classification The following code illustrates how TPOT can be employed for performing a simple classification task over the Iris dataset. from tpot import TPOTClassifier from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import numpy as np iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64), iris.target.astype(np.float64), train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_iris_pipeline.py') Running this code should discover a pipeline (exported as tpot_iris_pipeline.py ) that achieves about 97% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline from sklearn.preprocessing import Normalizer # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = make_pipeline( Normalizer(), GaussianNB() ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) MNIST digit recognition Below is a minimal working example with the practice MNIST dataset, which is an image classification problem . from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Running this code should discover a pipeline (exported as tpot_digits_pipeline.py ) that achieves about 98% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = KNeighborsClassifier(n_neighbors=6, weights=\"distance\") exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) Boston housing prices modeling The following code illustrates how TPOT can be employed for performing a regression task over the Boston housing prices dataset. from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split housing = load_boston() X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Running this code should discover a pipeline (exported as tpot_boston_pipeline.py ) that achieves at least 10 mean squared error (MSE) on the test set: import numpy as np from sklearn.ensemble import GradientBoostingRegressor from sklearn.model_selection import train_test_split # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = GradientBoostingRegressor(alpha=0.85, learning_rate=0.1, loss=\"ls\", max_features=0.9, min_samples_leaf=5, min_samples_split=6) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features) Titanic survival analysis To see the TPOT applied the Titanic Kaggle dataset, see the Jupyter notebook here . This example shows how to take a messy dataset and preprocess it such that it can be used in scikit-learn and TPOT. Portuguese Bank Marketing The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here . MAGIC Gamma Telescope The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here .","title":"Examples"},{"location":"examples/#overview","text":"The following sections illustrate the usage of TPOT with various datasets, each belonging to a typical class of machine learning tasks. Dataset Task Task class Dataset description Jupyter notebook Iris flower classification classification link link MNIST digit recognition (image) classification link link Boston housing prices modeling regression link N/A Titanic survival analysis classification link link Bank Marketing subscription prediction classification link link MAGIC Gamma Telescope event detection classification link link Notes: - For details on how the fit() , score() and export() methods work, refer to the usage documentation . - Upon re-running the experiments, your resulting pipelines may differ (to some extent) from the ones demonstrated here.","title":"Overview"},{"location":"examples/#iris-flower-classification","text":"The following code illustrates how TPOT can be employed for performing a simple classification task over the Iris dataset. from tpot import TPOTClassifier from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import numpy as np iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64), iris.target.astype(np.float64), train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_iris_pipeline.py') Running this code should discover a pipeline (exported as tpot_iris_pipeline.py ) that achieves about 97% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline from sklearn.preprocessing import Normalizer # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = make_pipeline( Normalizer(), GaussianNB() ) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)","title":"Iris flower classification"},{"location":"examples/#mnist-digit-recognition","text":"Below is a minimal working example with the practice MNIST dataset, which is an image classification problem . from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Running this code should discover a pipeline (exported as tpot_digits_pipeline.py ) that achieves about 98% test accuracy: import numpy as np from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = KNeighborsClassifier(n_neighbors=6, weights=\"distance\") exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)","title":"MNIST digit recognition"},{"location":"examples/#boston-housing-prices-modeling","text":"The following code illustrates how TPOT can be employed for performing a regression task over the Boston housing prices dataset. from tpot import TPOTRegressor from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split housing = load_boston() X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, train_size=0.75, test_size=0.25) tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_boston_pipeline.py') Running this code should discover a pipeline (exported as tpot_boston_pipeline.py ) that achieves at least 10 mean squared error (MSE) on the test set: import numpy as np from sklearn.ensemble import GradientBoostingRegressor from sklearn.model_selection import train_test_split # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \\ train_test_split(features, tpot_data['class'], random_state=None) exported_pipeline = GradientBoostingRegressor(alpha=0.85, learning_rate=0.1, loss=\"ls\", max_features=0.9, min_samples_leaf=5, min_samples_split=6) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)","title":"Boston housing prices modeling"},{"location":"examples/#titanic-survival-analysis","text":"To see the TPOT applied the Titanic Kaggle dataset, see the Jupyter notebook here . This example shows how to take a messy dataset and preprocess it such that it can be used in scikit-learn and TPOT.","title":"Titanic survival analysis"},{"location":"examples/#portuguese-bank-marketing","text":"The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here .","title":"Portuguese Bank Marketing"},{"location":"examples/#magic-gamma-telescope","text":"The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found here .","title":"MAGIC Gamma Telescope"},{"location":"installing/","text":"TPOT is built on top of several existing Python libraries, including: NumPy SciPy scikit-learn DEAP update_checker tqdm stopit pandas joblib Most of the necessary Python packages can be installed via the Anaconda Python distribution , which we strongly recommend that you use. We also strongly recommend that you use of Python 3 over Python 2 if you're given the choice. NumPy, SciPy, scikit-learn, pandas and joblib can be installed in Anaconda via the command: conda install numpy scipy scikit-learn pandas joblib DEAP, update_checker, tqdm and stopit can be installed with pip via the command: pip install deap update_checker tqdm stopit For the Windows users , the pywin32 module is required if Python is NOT installed via the Anaconda Python distribution and can be installed with pip for Python verion <=3.3 or conda (e.g. miniconda) for any Python version: conda install pywin32 Optionally , you can install XGBoost if you would like TPOT to use the eXtreme Gradient Boosting models. XGBoost is entirely optional, and TPOT will still function normally without XGBoost if you do not have it installed. Windows users: pip installation may not work on some Windows environments, and it may cause unexpected errors. pip install xgboost If you have issues installing XGBoost, check the XGBoost installation documentation . If you plan to use Dask for parallel training, make sure to install dask[delay] and dask_ml . pip install dask[delayed] dask-ml If you plan to use the TPOT-MDR configuration , make sure to install scikit-mdr and scikit-rebate : pip install scikit-mdr skrebate Finally to install TPOT itself, run the following command: pip install tpot Please file a new issue if you run into installation problems.","title":"Installation"},{"location":"related/","text":"Other Automated Machine Learning (AutoML) tools and related projects: Name Language License Description Auto-WEKA Java GPL-v3 Automated model selection and hyper-parameter tuning for Weka models. auto-sklearn Python BSD-3-Clause An automated machine learning toolkit and a drop-in replacement for a scikit-learn estimator. auto_ml Python MIT Automated machine learning for analytics & production. Supports manual feature type declarations. H2O AutoML Java with Python, Scala & R APIs and web GUI Apache 2.0 Automated: data prep, hyperparameter tuning, random grid search and stacked ensembles in a distributed ML platform. devol Python MIT Automated deep neural network design via genetic programming. MLBox Python BSD-3-Clause Accurate hyper-parameter optimization in high-dimensional space with support for distributed computing. Recipe C GPL-v3 Machine-learning pipeline optimization through genetic programming. Uses grammars to define pipeline structure. Xcessiv Python Apache 2.0 A web-based application for quick, scalable, and automated hyper-parameter tuning and stacked ensembling in Python. GAMA Python Apache 2.0 Machine-learning pipeline optimization through asynchronous evaluation based genetic programming.","title":"Related"},{"location":"releases/","text":"Version 0.9 TPOT now supports sparse matrices with a new built-in TPOT configuration, \"TPOT sparse\". We are using a custom OneHotEncoder implementation that supports missing values and continuous features. We have added an \"early stopping\" option for stopping the optimization process if no improvement is made within a set number of generations. Look up the early_stop parameter to access this functionality. TPOT now reduces the number of duplicated pipelines between generations, which saves you time during the optimization process. TPOT now supports custom scoring functions via the command-line mode. We have added a new optional argument, periodic_checkpoint_folder , that allows TPOT to periodically save the best pipeline so far to a local folder during optimization process. TPOT no longer uses sklearn.externals.joblib when n_jobs=1 to avoid the potential freezing issue that scikit-learn suffers from . We have added pandas as a dependency to read input datasets instead of numpy.recfromcsv . NumPy's recfromcsv function is unable to parse datasets with complex data types. Fixed a bug that DEFAULT in the parameter(s) of nested estimator raises KeyError when exporting pipelines. Fixed a bug related to setting random_state in nested estimators. The issue would happen with pipeline with SelectFromModel ( ExtraTreesClassifier as nested estimator) or StackingEstimator if nested estimator has random_state parameter. Fixed a bug in the missing value imputation function in TPOT to impute along columns instead rows. Refined input checking for sparse matrices in TPOT. Refined the TPOT pipeline mutation operator. Version 0.8 TPOT now detects whether there are missing values in your dataset and replaces them with the median value of the column. TPOT now allows you to set a group parameter in the fit function so you can use the GroupKFold cross-validation strategy. TPOT now allows you to set a subsample ratio of the training instance with the subsample parameter. For example, setting subsample =0.5 tells TPOT to create a fixed subsample of half of the training data for the pipeline optimization process. This parameter can be useful for speeding up the pipeline optimization process, but may give less accurate performance estimates from cross-validation. TPOT now has more built-in configurations , including TPOT MDR and TPOT light, for both classification and regression problems. TPOTClassifier and TPOTRegressor now expose three useful internal attributes, fitted_pipeline_ , pareto_front_fitted_pipelines_ , and evaluated_individuals_ . These attributes are described in the API documentation . Oh, TPOT now has thorough API documentation . Check it out! Fixed a reproducibility issue where setting random_seed didn't necessarily result in the same results every time. This bug was present since TPOT v0.7. Refined input checking in TPOT. Removed Python 2 uncompliant code. Version 0.7 TPOT now has multiprocessing support. TPOT allows you to use multiple processes in parallel to accelerate the pipeline optimization process in TPOT with the n_jobs parameter. TPOT now allows you to customize the operators and parameters considered during the optimization process , which can be accomplished with the new config_dict parameter. The format of this customized dictionary can be found in the online documentation , along with a list of built-in configurations . TPOT now allows you to specify a time limit for evaluating a single pipeline (default limit is 5 minutes) in optimization process with the max_eval_time_mins parameter, so TPOT won't spend hours evaluating overly-complex pipelines. We tweaked TPOT's underlying evolutionary optimization algorithm to work even better, including using the mu+lambda algorithm . This algorithm gives you more control of how many pipelines are generated every iteration with the offspring_size parameter. Refined the default operators and parameters in TPOT, so TPOT 0.7 should work even better than 0.6. TPOT now supports sample weights in the fitness function if some if your samples are more important to classify correctly than others. The sample weights option works the same as in scikit-learn, e.g., tpot.fit(x_train, y_train, sample_weights=sample_weights) . The default scoring metric in TPOT has been changed from balanced accuracy to accuracy, the same default metric for classification algorithms in scikit-learn. Balanced accuracy can still be used by setting scoring='balanced_accuracy' when creating a TPOT instance. Version 0.6 TPOT now supports regression problems! We have created two separate TPOTClassifier and TPOTRegressor classes to support classification and regression problems, respectively. The command-line interface also supports this feature through the -mode parameter. TPOT now allows you to specify a time limit for the optimization process with the max_time_mins parameter, so you don't need to guess how long TPOT will take any more to recommend a pipeline to you. Added a new operator that performs feature selection using ExtraTrees feature importance scores. XGBoost has been added as an optional dependency to TPOT. If you have XGBoost installed, TPOT will automatically detect your installation and use the XGBoostClassifier and XGBoostRegressor in its pipelines. TPOT now offers a verbosity level of 3 (\"science mode\"), which outputs the entire Pareto front instead of only the current best score. This feature may be useful for users looking to make a trade-off between pipeline complexity and score. Version 0.5 Major refactor: Each operator is defined in a separate class file. Hooray for easier-to-maintain code! TPOT now exports directly to scikit-learn Pipelines instead of hacky code. Internal representation of individuals now uses scikit-learn pipelines. Parameters for each operator have been optimized so TPOT spends less time exploring useless parameters. We have removed pandas as a dependency and instead use numpy matrices to store the data. TPOT now uses k-fold cross-validation when evaluating pipelines, with a default k = 3. This k parameter can be tuned when creating a new TPOT instance. Improved scoring function support : Even though TPOT uses balanced accuracy by default, you can now have TPOT use any of the scoring functions that cross_val_score supports. Added the scikit-learn Normalizer preprocessor. Minor text fixes. Version 0.4 In TPOT 0.4, we've made some major changes to the internals of TPOT and added some convenience functions. We've summarized the changes below. Added new sklearn models and preprocessors AdaBoostClassifier BernoulliNB ExtraTreesClassifier GaussianNB MultinomialNB LinearSVC PassiveAggressiveClassifier GradientBoostingClassifier RBFSampler FastICA FeatureAgglomeration Nystroem Added operator that inserts virtual features for the count of features with values of zero Reworked parameterization of TPOT operators Reduced parameter search space with information from a scikit-learn benchmark TPOT no longer generates arbitrary parameter values, but uses a fixed parameter set instead Removed XGBoost as a dependency Too many users were having install issues with XGBoost Replaced with scikit-learn's GradientBoostingClassifier Improved descriptiveness of TPOT command line parameter documentation Removed min/max/avg details during fit() when verbosity > 1 Replaced with tqdm progress bar Added tqdm as a dependency Added fit_predict() convenience function Added get_params() function so TPOT can operate in scikit-learn's cross_val_score & related functions Version 0.3 We revised the internal optimization process of TPOT to make it more efficient, in particular in regards to the model parameters that TPOT optimizes over. Version 0.2 TPOT now has the ability to export the optimized pipelines to sklearn code. Logistic regression, SVM, and k-nearest neighbors classifiers were added as pipeline operators. Previously, TPOT only included decision tree and random forest classifiers. TPOT can now use arbitrary scoring functions for the optimization process. TPOT now performs multi-objective Pareto optimization to balance model complexity (i.e., # of pipeline operators) and the score of the pipeline. Version 0.1 First public release of TPOT. Optimizes pipelines with decision trees and random forest classifiers as the model, and uses a handful of feature preprocessors.","title":"Release Notes"},{"location":"releases/#version-09","text":"TPOT now supports sparse matrices with a new built-in TPOT configuration, \"TPOT sparse\". We are using a custom OneHotEncoder implementation that supports missing values and continuous features. We have added an \"early stopping\" option for stopping the optimization process if no improvement is made within a set number of generations. Look up the early_stop parameter to access this functionality. TPOT now reduces the number of duplicated pipelines between generations, which saves you time during the optimization process. TPOT now supports custom scoring functions via the command-line mode. We have added a new optional argument, periodic_checkpoint_folder , that allows TPOT to periodically save the best pipeline so far to a local folder during optimization process. TPOT no longer uses sklearn.externals.joblib when n_jobs=1 to avoid the potential freezing issue that scikit-learn suffers from . We have added pandas as a dependency to read input datasets instead of numpy.recfromcsv . NumPy's recfromcsv function is unable to parse datasets with complex data types. Fixed a bug that DEFAULT in the parameter(s) of nested estimator raises KeyError when exporting pipelines. Fixed a bug related to setting random_state in nested estimators. The issue would happen with pipeline with SelectFromModel ( ExtraTreesClassifier as nested estimator) or StackingEstimator if nested estimator has random_state parameter. Fixed a bug in the missing value imputation function in TPOT to impute along columns instead rows. Refined input checking for sparse matrices in TPOT. Refined the TPOT pipeline mutation operator.","title":"Version 0.9"},{"location":"releases/#version-08","text":"TPOT now detects whether there are missing values in your dataset and replaces them with the median value of the column. TPOT now allows you to set a group parameter in the fit function so you can use the GroupKFold cross-validation strategy. TPOT now allows you to set a subsample ratio of the training instance with the subsample parameter. For example, setting subsample =0.5 tells TPOT to create a fixed subsample of half of the training data for the pipeline optimization process. This parameter can be useful for speeding up the pipeline optimization process, but may give less accurate performance estimates from cross-validation. TPOT now has more built-in configurations , including TPOT MDR and TPOT light, for both classification and regression problems. TPOTClassifier and TPOTRegressor now expose three useful internal attributes, fitted_pipeline_ , pareto_front_fitted_pipelines_ , and evaluated_individuals_ . These attributes are described in the API documentation . Oh, TPOT now has thorough API documentation . Check it out! Fixed a reproducibility issue where setting random_seed didn't necessarily result in the same results every time. This bug was present since TPOT v0.7. Refined input checking in TPOT. Removed Python 2 uncompliant code.","title":"Version 0.8"},{"location":"releases/#version-07","text":"TPOT now has multiprocessing support. TPOT allows you to use multiple processes in parallel to accelerate the pipeline optimization process in TPOT with the n_jobs parameter. TPOT now allows you to customize the operators and parameters considered during the optimization process , which can be accomplished with the new config_dict parameter. The format of this customized dictionary can be found in the online documentation , along with a list of built-in configurations . TPOT now allows you to specify a time limit for evaluating a single pipeline (default limit is 5 minutes) in optimization process with the max_eval_time_mins parameter, so TPOT won't spend hours evaluating overly-complex pipelines. We tweaked TPOT's underlying evolutionary optimization algorithm to work even better, including using the mu+lambda algorithm . This algorithm gives you more control of how many pipelines are generated every iteration with the offspring_size parameter. Refined the default operators and parameters in TPOT, so TPOT 0.7 should work even better than 0.6. TPOT now supports sample weights in the fitness function if some if your samples are more important to classify correctly than others. The sample weights option works the same as in scikit-learn, e.g., tpot.fit(x_train, y_train, sample_weights=sample_weights) . The default scoring metric in TPOT has been changed from balanced accuracy to accuracy, the same default metric for classification algorithms in scikit-learn. Balanced accuracy can still be used by setting scoring='balanced_accuracy' when creating a TPOT instance.","title":"Version 0.7"},{"location":"releases/#version-06","text":"TPOT now supports regression problems! We have created two separate TPOTClassifier and TPOTRegressor classes to support classification and regression problems, respectively. The command-line interface also supports this feature through the -mode parameter. TPOT now allows you to specify a time limit for the optimization process with the max_time_mins parameter, so you don't need to guess how long TPOT will take any more to recommend a pipeline to you. Added a new operator that performs feature selection using ExtraTrees feature importance scores. XGBoost has been added as an optional dependency to TPOT. If you have XGBoost installed, TPOT will automatically detect your installation and use the XGBoostClassifier and XGBoostRegressor in its pipelines. TPOT now offers a verbosity level of 3 (\"science mode\"), which outputs the entire Pareto front instead of only the current best score. This feature may be useful for users looking to make a trade-off between pipeline complexity and score.","title":"Version 0.6"},{"location":"releases/#version-05","text":"Major refactor: Each operator is defined in a separate class file. Hooray for easier-to-maintain code! TPOT now exports directly to scikit-learn Pipelines instead of hacky code. Internal representation of individuals now uses scikit-learn pipelines. Parameters for each operator have been optimized so TPOT spends less time exploring useless parameters. We have removed pandas as a dependency and instead use numpy matrices to store the data. TPOT now uses k-fold cross-validation when evaluating pipelines, with a default k = 3. This k parameter can be tuned when creating a new TPOT instance. Improved scoring function support : Even though TPOT uses balanced accuracy by default, you can now have TPOT use any of the scoring functions that cross_val_score supports. Added the scikit-learn Normalizer preprocessor. Minor text fixes.","title":"Version 0.5"},{"location":"releases/#version-04","text":"In TPOT 0.4, we've made some major changes to the internals of TPOT and added some convenience functions. We've summarized the changes below. Added new sklearn models and preprocessors AdaBoostClassifier BernoulliNB ExtraTreesClassifier GaussianNB MultinomialNB LinearSVC PassiveAggressiveClassifier GradientBoostingClassifier RBFSampler FastICA FeatureAgglomeration Nystroem Added operator that inserts virtual features for the count of features with values of zero Reworked parameterization of TPOT operators Reduced parameter search space with information from a scikit-learn benchmark TPOT no longer generates arbitrary parameter values, but uses a fixed parameter set instead Removed XGBoost as a dependency Too many users were having install issues with XGBoost Replaced with scikit-learn's GradientBoostingClassifier Improved descriptiveness of TPOT command line parameter documentation Removed min/max/avg details during fit() when verbosity > 1 Replaced with tqdm progress bar Added tqdm as a dependency Added fit_predict() convenience function Added get_params() function so TPOT can operate in scikit-learn's cross_val_score & related functions","title":"Version 0.4"},{"location":"releases/#version-03","text":"We revised the internal optimization process of TPOT to make it more efficient, in particular in regards to the model parameters that TPOT optimizes over.","title":"Version 0.3"},{"location":"releases/#version-02","text":"TPOT now has the ability to export the optimized pipelines to sklearn code. Logistic regression, SVM, and k-nearest neighbors classifiers were added as pipeline operators. Previously, TPOT only included decision tree and random forest classifiers. TPOT can now use arbitrary scoring functions for the optimization process. TPOT now performs multi-objective Pareto optimization to balance model complexity (i.e., # of pipeline operators) and the score of the pipeline.","title":"Version 0.2"},{"location":"releases/#version-01","text":"First public release of TPOT. Optimizes pipelines with decision trees and random forest classifiers as the model, and uses a handful of feature preprocessors.","title":"Version 0.1"},{"location":"support/","text":"TPOT was developed in the Computational Genetics Lab at the University of Pennsylvania with funding from the NIH under grant R01 AI117694. We are incredibly grateful for the support of the NIH and the University of Pennsylvania during the development of this project. The TPOT logo was designed by Todd Newmuis, who generously donated his time to the project.","title":"Support"},{"location":"using/","text":"What to expect from AutoML software Automated machine learning (AutoML) takes a higher-level approach to machine learning than most practitioners are used to, so we've gathered a handful of guidelines on what to expect when running AutoML software such as TPOT. AutoML algorithms aren't intended to run for only a few minutes Of course, you can run TPOT for only a few minutes and it will find a reasonably good pipeline for your dataset. However, if you don't run TPOT for long enough, it may not find the best possible pipeline for your dataset. It may even not find any suitable pipeline at all, in which case a RuntimeError('A pipeline has not yet been optimized. Please call fit() first.') will be raised. Often it is worthwhile to run multiple instances of TPOT in parallel for a long time (hours to days) to allow TPOT to thoroughly search the pipeline space for your dataset. AutoML algorithms can take a long time to finish their search AutoML algorithms aren't as simple as fitting one model on the dataset; they are considering multiple machine learning algorithms (random forests, linear models, SVMs, etc.) in a pipeline with multiple preprocessing steps (missing value imputation, scaling, PCA, feature selection, etc.), the hyperparameters for all of the models and preprocessing steps, as well as multiple ways to ensemble or stack the algorithms within the pipeline. As such, TPOT will take a while to run on larger datasets, but it's important to realize why. With the default TPOT settings (100 generations with 100 population size), TPOT will evaluate 10,000 pipeline configurations before finishing. To put this number into context, think about a grid search of 10,000 hyperparameter combinations for a machine learning algorithm and how long that grid search will take. That is 10,000 model configurations to evaluate with 10-fold cross-validation, which means that roughly 100,000 models are fit and evaluated on the training data in one grid search. That's a time-consuming procedure, even for simpler models like decision trees. Typical TPOT runs will take hours to days to finish (unless it's a small dataset), but you can always interrupt the run partway through and see the best results so far. TPOT also provides a warm_start parameter that lets you restart a TPOT run from where it left off. AutoML algorithms can recommend different solutions for the same dataset If you're working with a reasonably complex dataset or run TPOT for a short amount of time, different TPOT runs may result in different pipeline recommendations. TPOT's optimization algorithm is stochastic in nature, which means that it uses randomness (in part) to search the possible pipeline space. When two TPOT runs recommend different pipelines, this means that the TPOT runs didn't converge due to lack of time or that multiple pipelines perform more-or-less the same on your dataset. This is actually an advantage over fixed grid search techniques: TPOT is meant to be an assistant that gives you ideas on how to solve a particular machine learning problem by exploring pipeline configurations that you might have never considered, then leaves the fine-tuning to more constrained parameter tuning techniques such as grid search. TPOT with code We've taken care to design the TPOT interface to be as similar as possible to scikit-learn. TPOT can be imported just like any regular Python module. To import TPOT, type: from tpot import TPOTClassifier then create an instance of TPOT as follows: pipeline_optimizer = TPOTClassifier() It's also possible to use TPOT for regression problems with the TPOTRegressor class. Other than the class name, a TPOTRegressor is used the same way as a TPOTClassifier . You can read more about the TPOTClassifier and TPOTRegressor classes in the API documentation . Some example code with custom TPOT parameters might look like: pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) Now TPOT is ready to optimize a pipeline for you. You can tell TPOT to optimize a pipeline based on a data set with the fit function: pipeline_optimizer.fit(X_train, y_train) The fit function initializes the genetic programming algorithm to find the highest-scoring pipeline based on average k-fold cross-validation Then, the pipeline is trained on the entire set of provided samples, and the TPOT instance can be used as a fitted model. You can then proceed to evaluate the final pipeline on the testing set with the score function: print(pipeline_optimizer.score(X_test, y_test)) Finally, you can tell TPOT to export the corresponding Python code for the optimized pipeline to a text file with the export function: pipeline_optimizer.export('tpot_exported_pipeline.py') Once this code finishes running, tpot_exported_pipeline.py will contain the Python code for the optimized pipeline. Below is a full example script using TPOT to optimize a pipeline, score it, and export the best pipeline to a file. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) pipeline_optimizer.fit(X_train, y_train) print(pipeline_optimizer.score(X_test, y_test)) pipeline_optimizer.export('tpot_exported_pipeline.py') Check our examples to see TPOT applied to some specific data sets. TPOT on the command line To use TPOT via the command line, enter the following command with a path to the data file: tpot /path_to/data_file.csv An example command-line call to TPOT may look like: tpot data/mnist.csv -is , -target class -o tpot_exported_pipeline.py -g 5 -p 20 -cv 5 -s 42 -v 2 TPOT offers several arguments that can be provided at the command line. To see brief descriptions of these arguments, enter the following command: tpot --help Detailed descriptions of the command-line arguments are below. Argument Parameter Valid values Effect -is INPUT_SEPARATOR Any string Character used to separate columns in the input file. -target TARGET_NAME Any string Name of the target column in the input file. -mode TPOT_MODE ['classification', 'regression'] Whether TPOT is being used for a supervised classification or regression problem. -o OUTPUT_FILE String path to a file File to export the code for the final optimized pipeline. -g GENERATIONS Any positive integer Number of iterations to run the pipeline optimization process. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -p POPULATION_SIZE Any positive integer Number of individuals to retain in the GP population every generation. Generally, TPOT will work better when you give it more individuals (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -os OFFSPRING_SIZE Any positive integer Number of offspring to produce in each GP generation. By default, OFFSPRING_SIZE = POPULATION_SIZE. -mr MUTATION_RATE [0.0, 1.0] GP mutation rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to apply random changes to every generation. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. -xr CROSSOVER_RATE [0.0, 1.0] GP crossover rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to \"breed\" every generation. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. -scoring SCORING_FN 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'my_module.scorer_name*' Function used to evaluate the quality of a given pipeline for the problem. By default, accuracy is used for classification and mean squared error (MSE) is used for regression. TPOT assumes that any function with \"error\" or \"loss\" in the name is meant to be minimized, whereas any other functions will be maximized. my_module.scorer_name: You can also specify your own function or a full python path to an existing one. See the section on scoring functions for more details. -cv CV Any integer > 1 Number of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process. -sub SUBSAMPLE (0.0, 1.0] Subsample ratio of the training instance. Setting it to 0.5 means that TPOT randomly collects half of training samples for pipeline optimization process. -njobs NUM_JOBS Any positive integer or -1 Number of CPUs for evaluating pipelines in parallel during the TPOT optimization process. Assigning this to -1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. -maxtime MAX_TIME_MINS Any positive integer How many minutes TPOT has to optimize the pipeline. If provided, this setting will override the \"generations\" parameter and allow TPOT to run until it runs out of time. -maxeval MAX_EVAL_MINS Any positive float How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to consider more complex pipelines but will also allow TPOT to run longer. -s RANDOM_STATE Any positive integer Random number generator seed for reproducibility. Set this seed if you want your TPOT run to be reproducible with the same seed and data set in the future. -config CONFIG_FILE String or file path Operators and parameter configurations in TPOT: Path for configuration file: TPOT will use the path to a configuration file for customizing the operators and parameters that TPOT uses in the optimization process string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. -template TEMPLATE String Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. -memory MEMORY String or file path If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. Memory caching mode in TPOT: Path for a caching directory: TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown. string 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown. -cf CHECKPOINT_FOLDER Folder path If supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing. This is useful in multiple cases: sudden death before tpot could save an optimized pipeline progress tracking grabbing a pipeline while tpot is working Example: mkdir my_checkpoints -cf ./my_checkpoints -es EARLY_STOP Any positive integer How many generations TPOT checks whether there is no improvement in optimization process. End optimization process if there is no improvement in the set number of generations. -v VERBOSITY {0, 1, 2, 3} How much information TPOT communicates while it is running. 0 = none, 1 = minimal, 2 = high, 3 = all. A setting of 2 or higher will add a progress bar during the optimization procedure. --no-update-check Flag indicating whether the TPOT version checker should be disabled. --version Show TPOT's version number and exit. --help Show TPOT's help documentation and exit. Scoring functions TPOT makes use of sklearn.model_selection.cross_val_score for evaluating pipelines, and as such offers the same support for scoring functions. There are two ways to make use of scoring functions with TPOT: You can pass in a string to the scoring parameter from the list above. Any other strings will cause TPOT to throw an exception. You can pass the callable object/function with signature scorer(estimator, X, y) , where estimator is trained estimator to use for scoring, X are features that will be passed to estimator.predict and y are target values for X . To do this, you should implement your own function. See the example below for further explanation. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split from sklearn.metrics.scorer import make_scorer digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) # Make a custom metric function def my_custom_accuracy(y_true, y_pred): return float(sum(y_pred == y_true)) / len(y_true) # Make a custom a scorer from the custom metric function # Note: greater_is_better=False in make_scorer below would mean that the scoring function should be minimized. my_custom_scorer = make_scorer(my_custom_accuracy, greater_is_better=True) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, scoring=my_custom_scorer) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') You can pass a metric function with the signature score_func(y_true, y_pred) (e.g. my_custom_accuracy in the example above), where y_true are the true target values and y_pred are the predicted target values from an estimator. To do this, you should implement your own function. See the example above for further explanation. TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized ( greater_is_better=False in make_scorer ), whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. my_module.scorer_name : You can also use a custom score_func(y_true, y_pred) or scorer(estimator, X, y) function through the command line by adding the argument -scoring my_module.scorer to your command-line call. TPOT will import your module and use the custom scoring function from there. TPOT will include your current working directory when importing the module, so you can place it in the same directory where you are going to run TPOT. Example: -scoring sklearn.metrics.auc will use the function auc from sklearn.metrics module. Built-in TPOT configurations TPOT comes with a handful of default operators and parameter configurations that we believe work well for optimizing machine learning pipelines. Below is a list of the current built-in configurations that come with TPOT. Configuration Name Description Operators Default TPOT TPOT will search over a broad range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Some of these operators are complex and may take a long time to run, especially on larger datasets. Note: This is the default configuration for TPOT. To use this configuration, use the default value (None) for the config_dict parameter. Classification Regression TPOT light TPOT will search over a restricted range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Only simpler and fast-running operators will be used in these pipelines, so TPOT light is useful for finding quick and simple pipelines for a classification or regression problem. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression TPOT MDR TPOT will search over a series of feature selectors and Multifactor Dimensionality Reduction models to find a series of operators that maximize prediction accuracy. The TPOT MDR configuration is specialized for genome-wide association studies (GWAS) , and is described in detail online here . Note that TPOT MDR may be slow to run because the feature selection routines are computationally expensive, especially on large datasets. Classification Regression TPOT sparse TPOT uses a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression To use any of these configurations, simply pass the string name of the configuration to the config_dict parameter (or -config on the command line). For example, to use the \"TPOT light\" configuration: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict='TPOT light') tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Customizing TPOT's operators and parameters Beyond the default configurations that come with TPOT, in some cases it is useful to limit the algorithms and parameters that TPOT considers. For that reason, we allow users to provide TPOT with a custom configuration for its operators and parameters. The custom TPOT configuration must be in nested dictionary format, where the first level key is the path and name of the operator (e.g., sklearn.naive_bayes.MultinomialNB ) and the second level key is the corresponding parameter name for that operator (e.g., fit_prior ). The second level key should point to a list of parameter values for that parameter, e.g., 'fit_prior': [True, False] . For a simple example, the configuration could be: tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } in which case TPOT would only consider pipelines containing GaussianNB , BernoulliNB , MultinomialNB , and tune those algorithm's parameters in the ranges provided. This dictionary can be passed directly within the code to the TPOTClassifier / TPOTRegressor config_dict parameter, described above. For example: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict=tpot_config) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Command-line users must create a separate .py file with the custom configuration and provide the path to the file to the tpot call. For example, if the simple example configuration above is saved in tpot_classifier_config.py , that configuration could be used on the command line with the command: tpot data/mnist.csv -is , -target class -config tpot_classifier_config.py -g 5 -p 20 -v 2 -o tpot_exported_pipeline.py When using the command-line interface, the configuration file specified in the -config parameter must name its custom TPOT configuration tpot_config . Otherwise, TPOT will not be able to locate the configuration dictionary. For more detailed examples of how to customize TPOT's operator configuration, see the default configurations for classification and regression in TPOT's source code. Note that you must have all of the corresponding packages for the operators installed on your computer, otherwise TPOT will not be able to use them. For example, if XGBoost is not installed on your computer, then TPOT will simply not import nor use XGBoost in the pipelines it considers. Template option in TPOT Template option provides a way to specify a desired structure for machine learning pipeline, which may reduce TPOT computation time and potentially provide more interpretable results. Current implementation only supports linear pipelines. Below is a simple example to use template option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of SelectorMixin ), 2nd step is a feature transformer (a subclass of TransformerMixin ) and 3rd step is a classifier for classification (a subclass of ClassifierMixin ). The last step must be Classifier for TPOTClassifier 's template but Regressor for TPOTRegressor . Note: although SelectorMixin is subclass of TransformerMixin in scikit-leawrn, but Transformer in this option excludes those subclasses of SelectorMixin . tpot_obj = TPOTClassifier( template='Selector-Transformer-Classifier' ) If a specific operator, e.g. SelectPercentile , is prefered to used in the 1st step of pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'. FeatureSetSelector in TPOT FeatureSetSelector is a special new operator in TPOT. This operator enables feature selection based on priori export knowledge. For example, in RNA-seq gene expression analysis, this operator can be used to select one or more gene (feature) set(s) based on GO (Gene Ontology) terms or annotated gene sets Molecular Signatures Database ( MSigDB ) in the 1st step of pipeline via template option above, in order to reduce dimensions and TPOT computation time. This operator requires a dataset list in csv format. In this csv file, there are only three columns: 1st column is feature set names, 2nd column is the total number of features in one set and 3rd column is a list of feature names (if input X is pandas.DataFrame) or indexes (if input X is numpy.ndarray) delimited by \";\". Below is a example how to use this operator in TPOT. Please check our preprint paper for more details. from tpot import TPOTClassifier import numpy as np import pandas as pd from tpot.config import classifier_config_dict test_data = pd.read_csv(\"https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/tests.csv\") test_X = test_data.drop(\"class\", axis=1) test_y = test_data['class'] # add FeatureSetSelector into tpot configuration classifier_config_dict['tpot.builtins.FeatureSetSelector'] = { 'subset_list': ['https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/subset_test.csv'], 'sel_subset': [0,1] # select only one feature set, a list of index of subset in the list above #'sel_subset': list(combinations(range(3), 2)) # select two feature sets } tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, template='FeatureSetSelector-Transformer-Classifier', config_dict=classifier_config_dict) tpot.fit(test_X, test_y) Pipeline caching in TPOT With the memory parameter, pipelines can cache the results of each transformer after fitting them. This feature is used to avoid repeated computation by transformers within a pipeline if the parameters and input data are identical to another fitted pipeline during optimization process. TPOT allows users to specify a custom directory path or joblib.Memory in case they want to re-use the memory cache in future TPOT runs (or a warm_start run). There are three methods for enabling memory caching in TPOT: from tpot import TPOTClassifier from tempfile import mkdtemp from joblib import Memory from shutil import rmtree # Method 1, auto mode: TPOT uses memory caching with a temporary directory and cleans it up upon shutdown tpot = TPOTClassifier(memory='auto') # Method 2, with a custom directory for memory caching tpot = TPOTClassifier(memory='/to/your/path') # Method 3, with a Memory object cachedir = mkdtemp() # Create a temporary folder memory = Memory(cachedir=cachedir, verbose=0) tpot = TPOTClassifier(memory=memory) # Clear the cache directory when you don't need it anymore rmtree(cachedir) Note: TPOT does NOT clean up memory caches if users set a custom directory path or Memory object. We recommend that you clean up the memory caches when you don't need it anymore. Crash/freeze issue with n_jobs > 1 under OSX or Linux Internally, TPOT uses joblib to fit estimators in parallel. This is the same parallelization framework used by scikit-learn. But it may crash/freeze with n_jobs > 1 under OSX or Linux as scikit-learn does , especially with large datasets. One solution is to configure Python's multiprocessing module to use the forkserver start method (instead of the default fork ) to manage the process pools. You can enable the forkserver mode globally for your program by putting the following codes into your main script: import multiprocessing # other imports, custom code, load data, define model... if __name__ == '__main__': multiprocessing.set_start_method('forkserver') # call scikit-learn utils or tpot utils with n_jobs > 1 here More information about these start methods can be found in the multiprocessing documentation . Parallel Training with Dask For large problems or working on Jupyter notebook, we highly recommend that you can distribute the work on a Dask cluster. The dask-examples binder has a runnable example with a small dask cluster. To use your Dask cluster to fit a TPOT model, specify the use_dask keyword when you create the TPOT estimator. Note: if use_dask=True , TPOT will use as many cores as available on the your Dask cluster. If n_jobs is specified, then it will control the chunk size (10* n_jobs if it is less then offspring size) of parallel training. estimator = TPOTEstimator(use_dask=True, n_jobs=-1) This will use use all the workers on your cluster to do the training, and use Dask-ML's pipeline rewriting to avoid re-fitting estimators multiple times on the same set of data. It will also provide fine-grained diagnostics in the distributed scheduler UI . Alternatively, Dask implements a joblib backend. You can instruct TPOT to use the distributed backend during training by specifying a joblib.parallel_backend : import joblib import distributed.joblib from dask.distributed import Client # connect to the cluster client = Client('schedueler-address') # create the estimator normally estimator = TPOTClassifier(n_jobs=-1) # perform the fit in this context manager with joblib.parallel_backend(\"dask\"): estimator.fit(X, y) See dask's distributed joblib integration for more.","title":"Using TPOT"},{"location":"using/#what-to-expect-from-automl-software","text":"Automated machine learning (AutoML) takes a higher-level approach to machine learning than most practitioners are used to, so we've gathered a handful of guidelines on what to expect when running AutoML software such as TPOT.","title":"What to expect from AutoML software"},{"location":"using/#tpot-with-code","text":"We've taken care to design the TPOT interface to be as similar as possible to scikit-learn. TPOT can be imported just like any regular Python module. To import TPOT, type: from tpot import TPOTClassifier then create an instance of TPOT as follows: pipeline_optimizer = TPOTClassifier() It's also possible to use TPOT for regression problems with the TPOTRegressor class. Other than the class name, a TPOTRegressor is used the same way as a TPOTClassifier . You can read more about the TPOTClassifier and TPOTRegressor classes in the API documentation . Some example code with custom TPOT parameters might look like: pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) Now TPOT is ready to optimize a pipeline for you. You can tell TPOT to optimize a pipeline based on a data set with the fit function: pipeline_optimizer.fit(X_train, y_train) The fit function initializes the genetic programming algorithm to find the highest-scoring pipeline based on average k-fold cross-validation Then, the pipeline is trained on the entire set of provided samples, and the TPOT instance can be used as a fitted model. You can then proceed to evaluate the final pipeline on the testing set with the score function: print(pipeline_optimizer.score(X_test, y_test)) Finally, you can tell TPOT to export the corresponding Python code for the optimized pipeline to a text file with the export function: pipeline_optimizer.export('tpot_exported_pipeline.py') Once this code finishes running, tpot_exported_pipeline.py will contain the Python code for the optimized pipeline. Below is a full example script using TPOT to optimize a pipeline, score it, and export the best pipeline to a file. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2) pipeline_optimizer.fit(X_train, y_train) print(pipeline_optimizer.score(X_test, y_test)) pipeline_optimizer.export('tpot_exported_pipeline.py') Check our examples to see TPOT applied to some specific data sets.","title":"TPOT with code"},{"location":"using/#tpot-on-the-command-line","text":"To use TPOT via the command line, enter the following command with a path to the data file: tpot /path_to/data_file.csv An example command-line call to TPOT may look like: tpot data/mnist.csv -is , -target class -o tpot_exported_pipeline.py -g 5 -p 20 -cv 5 -s 42 -v 2 TPOT offers several arguments that can be provided at the command line. To see brief descriptions of these arguments, enter the following command: tpot --help Detailed descriptions of the command-line arguments are below. Argument Parameter Valid values Effect -is INPUT_SEPARATOR Any string Character used to separate columns in the input file. -target TARGET_NAME Any string Name of the target column in the input file. -mode TPOT_MODE ['classification', 'regression'] Whether TPOT is being used for a supervised classification or regression problem. -o OUTPUT_FILE String path to a file File to export the code for the final optimized pipeline. -g GENERATIONS Any positive integer Number of iterations to run the pipeline optimization process. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -p POPULATION_SIZE Any positive integer Number of individuals to retain in the GP population every generation. Generally, TPOT will work better when you give it more individuals (and therefore time) to optimize the pipeline. TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total. -os OFFSPRING_SIZE Any positive integer Number of offspring to produce in each GP generation. By default, OFFSPRING_SIZE = POPULATION_SIZE. -mr MUTATION_RATE [0.0, 1.0] GP mutation rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to apply random changes to every generation. We recommend using the default parameter unless you understand how the mutation rate affects GP algorithms. -xr CROSSOVER_RATE [0.0, 1.0] GP crossover rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to \"breed\" every generation. We recommend using the default parameter unless you understand how the crossover rate affects GP algorithms. -scoring SCORING_FN 'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc', 'my_module.scorer_name*' Function used to evaluate the quality of a given pipeline for the problem. By default, accuracy is used for classification and mean squared error (MSE) is used for regression. TPOT assumes that any function with \"error\" or \"loss\" in the name is meant to be minimized, whereas any other functions will be maximized. my_module.scorer_name: You can also specify your own function or a full python path to an existing one. See the section on scoring functions for more details. -cv CV Any integer > 1 Number of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process. -sub SUBSAMPLE (0.0, 1.0] Subsample ratio of the training instance. Setting it to 0.5 means that TPOT randomly collects half of training samples for pipeline optimization process. -njobs NUM_JOBS Any positive integer or -1 Number of CPUs for evaluating pipelines in parallel during the TPOT optimization process. Assigning this to -1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. -maxtime MAX_TIME_MINS Any positive integer How many minutes TPOT has to optimize the pipeline. If provided, this setting will override the \"generations\" parameter and allow TPOT to run until it runs out of time. -maxeval MAX_EVAL_MINS Any positive float How many minutes TPOT has to evaluate a single pipeline. Setting this parameter to higher values will allow TPOT to consider more complex pipelines but will also allow TPOT to run longer. -s RANDOM_STATE Any positive integer Random number generator seed for reproducibility. Set this seed if you want your TPOT run to be reproducible with the same seed and data set in the future. -config CONFIG_FILE String or file path Operators and parameter configurations in TPOT: Path for configuration file: TPOT will use the path to a configuration file for customizing the operators and parameters that TPOT uses in the optimization process string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. See the built-in configurations section for the list of configurations included with TPOT, and the custom configuration section for more information and examples of how to create your own TPOT configurations. -template TEMPLATE String Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly. See the template option in tpot section for more details. -memory MEMORY String or file path If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. Memory caching mode in TPOT: Path for a caching directory: TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown. string 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown. -cf CHECKPOINT_FOLDER Folder path If supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing. This is useful in multiple cases: sudden death before tpot could save an optimized pipeline progress tracking grabbing a pipeline while tpot is working Example: mkdir my_checkpoints -cf ./my_checkpoints -es EARLY_STOP Any positive integer How many generations TPOT checks whether there is no improvement in optimization process. End optimization process if there is no improvement in the set number of generations. -v VERBOSITY {0, 1, 2, 3} How much information TPOT communicates while it is running. 0 = none, 1 = minimal, 2 = high, 3 = all. A setting of 2 or higher will add a progress bar during the optimization procedure. --no-update-check Flag indicating whether the TPOT version checker should be disabled. --version Show TPOT's version number and exit. --help Show TPOT's help documentation and exit.","title":"TPOT on the command line"},{"location":"using/#scoring-functions","text":"TPOT makes use of sklearn.model_selection.cross_val_score for evaluating pipelines, and as such offers the same support for scoring functions. There are two ways to make use of scoring functions with TPOT: You can pass in a string to the scoring parameter from the list above. Any other strings will cause TPOT to throw an exception. You can pass the callable object/function with signature scorer(estimator, X, y) , where estimator is trained estimator to use for scoring, X are features that will be passed to estimator.predict and y are target values for X . To do this, you should implement your own function. See the example below for further explanation. from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split from sklearn.metrics.scorer import make_scorer digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) # Make a custom metric function def my_custom_accuracy(y_true, y_pred): return float(sum(y_pred == y_true)) / len(y_true) # Make a custom a scorer from the custom metric function # Note: greater_is_better=False in make_scorer below would mean that the scoring function should be minimized. my_custom_scorer = make_scorer(my_custom_accuracy, greater_is_better=True) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, scoring=my_custom_scorer) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') You can pass a metric function with the signature score_func(y_true, y_pred) (e.g. my_custom_accuracy in the example above), where y_true are the true target values and y_pred are the predicted target values from an estimator. To do this, you should implement your own function. See the example above for further explanation. TPOT assumes that any function with \"error\" or \"loss\" in the function name is meant to be minimized ( greater_is_better=False in make_scorer ), whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11. my_module.scorer_name : You can also use a custom score_func(y_true, y_pred) or scorer(estimator, X, y) function through the command line by adding the argument -scoring my_module.scorer to your command-line call. TPOT will import your module and use the custom scoring function from there. TPOT will include your current working directory when importing the module, so you can place it in the same directory where you are going to run TPOT. Example: -scoring sklearn.metrics.auc will use the function auc from sklearn.metrics module.","title":"Scoring functions"},{"location":"using/#built-in-tpot-configurations","text":"TPOT comes with a handful of default operators and parameter configurations that we believe work well for optimizing machine learning pipelines. Below is a list of the current built-in configurations that come with TPOT. Configuration Name Description Operators Default TPOT TPOT will search over a broad range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Some of these operators are complex and may take a long time to run, especially on larger datasets. Note: This is the default configuration for TPOT. To use this configuration, use the default value (None) for the config_dict parameter. Classification Regression TPOT light TPOT will search over a restricted range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Only simpler and fast-running operators will be used in these pipelines, so TPOT light is useful for finding quick and simple pipelines for a classification or regression problem. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression TPOT MDR TPOT will search over a series of feature selectors and Multifactor Dimensionality Reduction models to find a series of operators that maximize prediction accuracy. The TPOT MDR configuration is specialized for genome-wide association studies (GWAS) , and is described in detail online here . Note that TPOT MDR may be slow to run because the feature selection routines are computationally expensive, especially on large datasets. Classification Regression TPOT sparse TPOT uses a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. This configuration works for both the TPOTClassifier and TPOTRegressor. Classification Regression To use any of these configurations, simply pass the string name of the configuration to the config_dict parameter (or -config on the command line). For example, to use the \"TPOT light\" configuration: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict='TPOT light') tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py')","title":"Built-in TPOT configurations"},{"location":"using/#customizing-tpots-operators-and-parameters","text":"Beyond the default configurations that come with TPOT, in some cases it is useful to limit the algorithms and parameters that TPOT considers. For that reason, we allow users to provide TPOT with a custom configuration for its operators and parameters. The custom TPOT configuration must be in nested dictionary format, where the first level key is the path and name of the operator (e.g., sklearn.naive_bayes.MultinomialNB ) and the second level key is the corresponding parameter name for that operator (e.g., fit_prior ). The second level key should point to a list of parameter values for that parameter, e.g., 'fit_prior': [True, False] . For a simple example, the configuration could be: tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } in which case TPOT would only consider pipelines containing GaussianNB , BernoulliNB , MultinomialNB , and tune those algorithm's parameters in the ranges provided. This dictionary can be passed directly within the code to the TPOTClassifier / TPOTRegressor config_dict parameter, described above. For example: from tpot import TPOTClassifier from sklearn.datasets import load_digits from sklearn.model_selection import train_test_split digits = load_digits() X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25) tpot_config = { 'sklearn.naive_bayes.GaussianNB': { }, 'sklearn.naive_bayes.BernoulliNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] }, 'sklearn.naive_bayes.MultinomialNB': { 'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.], 'fit_prior': [True, False] } } tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, config_dict=tpot_config) tpot.fit(X_train, y_train) print(tpot.score(X_test, y_test)) tpot.export('tpot_digits_pipeline.py') Command-line users must create a separate .py file with the custom configuration and provide the path to the file to the tpot call. For example, if the simple example configuration above is saved in tpot_classifier_config.py , that configuration could be used on the command line with the command: tpot data/mnist.csv -is , -target class -config tpot_classifier_config.py -g 5 -p 20 -v 2 -o tpot_exported_pipeline.py When using the command-line interface, the configuration file specified in the -config parameter must name its custom TPOT configuration tpot_config . Otherwise, TPOT will not be able to locate the configuration dictionary. For more detailed examples of how to customize TPOT's operator configuration, see the default configurations for classification and regression in TPOT's source code. Note that you must have all of the corresponding packages for the operators installed on your computer, otherwise TPOT will not be able to use them. For example, if XGBoost is not installed on your computer, then TPOT will simply not import nor use XGBoost in the pipelines it considers.","title":"Customizing TPOT's operators and parameters"},{"location":"using/#template-option-in-tpot","text":"Template option provides a way to specify a desired structure for machine learning pipeline, which may reduce TPOT computation time and potentially provide more interpretable results. Current implementation only supports linear pipelines. Below is a simple example to use template option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of SelectorMixin ), 2nd step is a feature transformer (a subclass of TransformerMixin ) and 3rd step is a classifier for classification (a subclass of ClassifierMixin ). The last step must be Classifier for TPOTClassifier 's template but Regressor for TPOTRegressor . Note: although SelectorMixin is subclass of TransformerMixin in scikit-leawrn, but Transformer in this option excludes those subclasses of SelectorMixin . tpot_obj = TPOTClassifier( template='Selector-Transformer-Classifier' ) If a specific operator, e.g. SelectPercentile , is prefered to used in the 1st step of pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'.","title":"Template option in TPOT"},{"location":"using/#featuresetselector-in-tpot","text":"FeatureSetSelector is a special new operator in TPOT. This operator enables feature selection based on priori export knowledge. For example, in RNA-seq gene expression analysis, this operator can be used to select one or more gene (feature) set(s) based on GO (Gene Ontology) terms or annotated gene sets Molecular Signatures Database ( MSigDB ) in the 1st step of pipeline via template option above, in order to reduce dimensions and TPOT computation time. This operator requires a dataset list in csv format. In this csv file, there are only three columns: 1st column is feature set names, 2nd column is the total number of features in one set and 3rd column is a list of feature names (if input X is pandas.DataFrame) or indexes (if input X is numpy.ndarray) delimited by \";\". Below is a example how to use this operator in TPOT. Please check our preprint paper for more details. from tpot import TPOTClassifier import numpy as np import pandas as pd from tpot.config import classifier_config_dict test_data = pd.read_csv(\"https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/tests.csv\") test_X = test_data.drop(\"class\", axis=1) test_y = test_data['class'] # add FeatureSetSelector into tpot configuration classifier_config_dict['tpot.builtins.FeatureSetSelector'] = { 'subset_list': ['https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/subset_test.csv'], 'sel_subset': [0,1] # select only one feature set, a list of index of subset in the list above #'sel_subset': list(combinations(range(3), 2)) # select two feature sets } tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, template='FeatureSetSelector-Transformer-Classifier', config_dict=classifier_config_dict) tpot.fit(test_X, test_y)","title":"FeatureSetSelector in TPOT"},{"location":"using/#pipeline-caching-in-tpot","text":"With the memory parameter, pipelines can cache the results of each transformer after fitting them. This feature is used to avoid repeated computation by transformers within a pipeline if the parameters and input data are identical to another fitted pipeline during optimization process. TPOT allows users to specify a custom directory path or joblib.Memory in case they want to re-use the memory cache in future TPOT runs (or a warm_start run). There are three methods for enabling memory caching in TPOT: from tpot import TPOTClassifier from tempfile import mkdtemp from joblib import Memory from shutil import rmtree # Method 1, auto mode: TPOT uses memory caching with a temporary directory and cleans it up upon shutdown tpot = TPOTClassifier(memory='auto') # Method 2, with a custom directory for memory caching tpot = TPOTClassifier(memory='/to/your/path') # Method 3, with a Memory object cachedir = mkdtemp() # Create a temporary folder memory = Memory(cachedir=cachedir, verbose=0) tpot = TPOTClassifier(memory=memory) # Clear the cache directory when you don't need it anymore rmtree(cachedir) Note: TPOT does NOT clean up memory caches if users set a custom directory path or Memory object. We recommend that you clean up the memory caches when you don't need it anymore.","title":"Pipeline caching in TPOT"},{"location":"using/#crashfreeze-issue-with-n_jobs-1-under-osx-or-linux","text":"Internally, TPOT uses joblib to fit estimators in parallel. This is the same parallelization framework used by scikit-learn. But it may crash/freeze with n_jobs > 1 under OSX or Linux as scikit-learn does , especially with large datasets. One solution is to configure Python's multiprocessing module to use the forkserver start method (instead of the default fork ) to manage the process pools. You can enable the forkserver mode globally for your program by putting the following codes into your main script: import multiprocessing # other imports, custom code, load data, define model... if __name__ == '__main__': multiprocessing.set_start_method('forkserver') # call scikit-learn utils or tpot utils with n_jobs > 1 here More information about these start methods can be found in the multiprocessing documentation .","title":"Crash/freeze issue with n_jobs &gt; 1 under OSX or Linux"},{"location":"using/#parallel-training-with-dask","text":"For large problems or working on Jupyter notebook, we highly recommend that you can distribute the work on a Dask cluster. The dask-examples binder has a runnable example with a small dask cluster. To use your Dask cluster to fit a TPOT model, specify the use_dask keyword when you create the TPOT estimator. Note: if use_dask=True , TPOT will use as many cores as available on the your Dask cluster. If n_jobs is specified, then it will control the chunk size (10* n_jobs if it is less then offspring size) of parallel training. estimator = TPOTEstimator(use_dask=True, n_jobs=-1) This will use use all the workers on your cluster to do the training, and use Dask-ML's pipeline rewriting to avoid re-fitting estimators multiple times on the same set of data. It will also provide fine-grained diagnostics in the distributed scheduler UI . Alternatively, Dask implements a joblib backend. You can instruct TPOT to use the distributed backend during training by specifying a joblib.parallel_backend : import joblib import distributed.joblib from dask.distributed import Client # connect to the cluster client = Client('schedueler-address') # create the estimator normally estimator = TPOTClassifier(n_jobs=-1) # perform the fit in this context manager with joblib.parallel_backend(\"dask\"): estimator.fit(X, y) See dask's distributed joblib integration for more.","title":"Parallel Training with Dask"}]}
\ No newline at end of file
+{
+    "docs": [
+        {
+            "location": "/",
+            "text": "Consider TPOT your \nData Science Assistant\n. TPOT is a Python Automated Machine Learning tool that optimizes machine learning pipelines using genetic programming.\n\n\n\n\n\n\n\n\n\n\n\n\nTPOT will automate the most tedious part of machine learning by intelligently exploring thousands of possible pipelines to find the best one for your data.\n\n\n\n\n\n\n\n\nAn example machine learning pipeline\n\n\n\n\n\n\nOnce TPOT is finished searching (or you get tired of waiting), it provides you with the Python code for the best pipeline it found so you can tinker with the pipeline from there.\n\n\n\n\n\n\n\n\nAn example TPOT pipeline\n\n\n\n\n\n\nTPOT is built on top of scikit-learn, so all of the code it generates should look familiar... if you're familiar with scikit-learn, anyway.\n\n\nTPOT is still under active development\n and we encourage you to check back on this repository regularly for updates.",
+            "title": "Home"
+        },
+        {
+            "location": "/installing/",
+            "text": "TPOT is built on top of several existing Python libraries, including:\n\n\n\n\n\n\nNumPy\n\n\n\n\n\n\nSciPy\n\n\n\n\n\n\nscikit-learn\n\n\n\n\n\n\nDEAP\n\n\n\n\n\n\nupdate_checker\n\n\n\n\n\n\ntqdm\n\n\n\n\n\n\nstopit\n\n\n\n\n\n\npandas\n\n\n\n\n\n\njoblib\n\n\n\n\n\n\nMost of the necessary Python packages can be installed via the \nAnaconda Python distribution\n, which we strongly recommend that you use. We also strongly recommend that you use of Python 3 over Python 2 if you're given the choice.\n\n\nNumPy, SciPy, scikit-learn, pandas and joblib can be installed in Anaconda via the command:\n\n\nconda install numpy scipy scikit-learn pandas joblib\n\n\n\n\nDEAP, update_checker, tqdm and stopit can be installed with \npip\n via the command:\n\n\npip install deap update_checker tqdm stopit\n\n\n\n\nOptionally\n, you can install \nXGBoost\n if you would like TPOT to use the eXtreme Gradient Boosting models. XGBoost is entirely optional, and TPOT will still function normally without XGBoost if you do not have it installed. \nWindows users: pip installation may not work on some Windows environments, and it may cause unexpected errors.\n\n\npip install xgboost\n\n\n\n\nIf you have issues installing XGBoost, check the \nXGBoost installation documentation\n.\n\n\nIf you plan to use \nDask\n for parallel training, make sure to install [dask[delay] and dask[dataframe]](https://docs.dask.org/en/latest/install.html) and \ndask_ml\n.\n\n\npip install dask[delayed] dask[dataframe] dask-ml fsspec>=0.3.3\n\n\n\n\nIf you plan to use the \nTPOT-MDR configuration\n, make sure to install \nscikit-mdr\n and \nscikit-rebate\n:\n\n\npip install scikit-mdr skrebate\n\n\n\n\nFinally to install TPOT itself, run the following command:\n\n\npip install tpot\n\n\n\n\nPlease \nfile a new issue\n if you run into installation problems.",
+            "title": "Installation"
+        },
+        {
+            "location": "/using/",
+            "text": "What to expect from AutoML software\n\n\nAutomated machine learning (AutoML) takes a higher-level approach to machine learning than most practitioners are used to,\nso we've gathered a handful of guidelines on what to expect when running AutoML software such as TPOT.\n\n\nAutoML algorithms aren't intended to run for only a few minutes\n\n\n\nOf course, you \ncan\n run TPOT for only a few minutes and it will find a reasonably good pipeline for your dataset.\nHowever, if you don't run TPOT for long enough, it may not find the best possible pipeline for your dataset. It may even not\nfind any suitable pipeline at all, in which case a \nRuntimeError('A pipeline has not yet been optimized. Please call fit() first.')\n\nwill be raised.\nOften it is worthwhile to run multiple instances of TPOT in parallel for a long time (hours to days) to allow TPOT to thoroughly search\nthe pipeline space for your dataset.\n\n\nAutoML algorithms can take a long time to finish their search\n\n\n\nAutoML algorithms aren't as simple as fitting one model on the dataset; they are considering multiple machine learning algorithms\n(random forests, linear models, SVMs, etc.) in a pipeline with multiple preprocessing steps (missing value imputation, scaling,\nPCA, feature selection, etc.), the hyperparameters for all of the models and preprocessing steps, as well as multiple ways\nto ensemble or stack the algorithms within the pipeline.\n\n\nAs such, TPOT will take a while to run on larger datasets, but it's important to realize why. With the default TPOT settings\n(100 generations with 100 population size), TPOT will evaluate 10,000 pipeline configurations before finishing.\nTo put this number into context, think about a grid search of 10,000 hyperparameter combinations for a machine learning algorithm\nand how long that grid search will take. That is 10,000 model configurations to evaluate with 10-fold cross-validation,\nwhich means that roughly 100,000 models are fit and evaluated on the training data in one grid search.\nThat's a time-consuming procedure, even for simpler models like decision trees.\n\n\nTypical TPOT runs will take hours to days to finish (unless it's a small dataset), but you can always interrupt\nthe run partway through and see the best results so far. TPOT also \nprovides\n a \nwarm_start\n parameter that\nlets you restart a TPOT run from where it left off.\n\n\nAutoML algorithms can recommend different solutions for the same dataset\n\n\n\nIf you're working with a reasonably complex dataset or run TPOT for a short amount of time, different TPOT runs\nmay result in different pipeline recommendations. TPOT's optimization algorithm is stochastic in nature, which means\nthat it uses randomness (in part) to search the possible pipeline space. When two TPOT runs recommend different\npipelines, this means that the TPOT runs didn't converge due to lack of time \nor\n that multiple pipelines\nperform more-or-less the same on your dataset.\n\n\nThis is actually an advantage over fixed grid search techniques: TPOT is meant to be an assistant that gives\nyou ideas on how to solve a particular machine learning problem by exploring pipeline configurations that you\nmight have never considered, then leaves the fine-tuning to more constrained parameter tuning techniques such\nas grid search.\n\n\nTPOT with code\n\n\nWe've taken care to design the TPOT interface to be as similar as possible to scikit-learn.\n\n\nTPOT can be imported just like any regular Python module. To import TPOT, type:\n\n\nfrom tpot import TPOTClassifier\n\n\n\n\nthen create an instance of TPOT as follows:\n\n\npipeline_optimizer = TPOTClassifier()\n\n\n\n\nIt's also possible to use TPOT for regression problems with the \nTPOTRegressor\n class. Other than the class name,\na \nTPOTRegressor\n is used the same way as a \nTPOTClassifier\n. You can read more about the \nTPOTClassifier\n and \nTPOTRegressor\n classes in the \nAPI documentation\n.\n\n\nSome example code with custom TPOT parameters might look like:\n\n\npipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5,\n                                    random_state=42, verbosity=2)\n\n\n\n\nNow TPOT is ready to optimize a pipeline for you. You can tell TPOT to optimize a pipeline based on a data set with the \nfit\n function:\n\n\npipeline_optimizer.fit(X_train, y_train)\n\n\n\n\nThe \nfit\n function initializes the genetic programming algorithm to find the highest-scoring pipeline based on average k-fold cross-validation\nThen, the pipeline is trained on the entire set of provided samples, and the TPOT instance can be used as a fitted model.\n\n\nYou can then proceed to evaluate the final pipeline on the testing set with the \nscore\n function:\n\n\nprint(pipeline_optimizer.score(X_test, y_test))\n\n\n\n\nFinally, you can tell TPOT to export the corresponding Python code for the optimized pipeline to a text file with the \nexport\n function:\n\n\npipeline_optimizer.export('tpot_exported_pipeline.py')\n\n\n\n\nOnce this code finishes running, \ntpot_exported_pipeline.py\n will contain the Python code for the optimized pipeline.\n\n\nBelow is a full example script using TPOT to optimize a pipeline, score it, and export the best pipeline to a file.\n\n\nfrom tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\npipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5,\n                                    random_state=42, verbosity=2)\npipeline_optimizer.fit(X_train, y_train)\nprint(pipeline_optimizer.score(X_test, y_test))\npipeline_optimizer.export('tpot_exported_pipeline.py')\n\n\n\n\nCheck our \nexamples\n to see TPOT applied to some specific data sets.\n\n\nTPOT on the command line\n\n\nTo use TPOT via the command line, enter the following command with a path to the data file:\n\n\ntpot /path_to/data_file.csv\n\n\n\n\nAn example command-line call to TPOT may look like:\n\n\ntpot data/mnist.csv -is , -target class -o tpot_exported_pipeline.py -g 5 -p 20 -cv 5 -s 42 -v 2\n\n\n\n\nTPOT offers several arguments that can be provided at the command line. To see brief descriptions of these arguments,\nenter the following command:\n\n\ntpot --help\n\n\n\n\nDetailed descriptions of the command-line arguments are below.\n\n\n\n\n\n\nArgument\n\n\nParameter\n\n\nValid values\n\n\nEffect\n\n\n\n\n\n\n-is\n\n\nINPUT_SEPARATOR\n\n\nAny string\n\n\nCharacter used to separate columns in the input file.\n\n\n\n\n\n\n-target\n\n\nTARGET_NAME\n\n\nAny string\n\n\nName of the target column in the input file.\n\n\n\n\n\n\n-mode\n\n\nTPOT_MODE\n\n\n['classification', 'regression']\n\n\nWhether TPOT is being used for a supervised classification or regression problem.\n\n\n\n\n\n\n-o\n\n\nOUTPUT_FILE\n\n\nString path to a file\n\n\nFile to export the code for the final optimized pipeline.\n\n\n\n\n\n\n-g\n\n\nGENERATIONS\n\n\nAny positive integer or None\n\n\nNumber of iterations to run the pipeline optimization process. It must be a positive number or None. If None, the parameter max_time_mins must be defined as the runtime limit. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.\n\n\nTPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total.\n\n\n\n\n\n\n-p\n\n\nPOPULATION_SIZE\n\n\nAny positive integer\n\n\nNumber of individuals to retain in the GP population every generation. Generally, TPOT will work better when you give it more individuals (and therefore time) to optimize the pipeline.\n\n\nTPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total.\n\n\n\n\n\n\n-os\n\n\nOFFSPRING_SIZE\n\n\nAny positive integer\n\n\nNumber of offspring to produce in each GP generation.\n\n\nBy default, OFFSPRING_SIZE = POPULATION_SIZE.\n\n\n\n\n\n\n-mr\n\n\nMUTATION_RATE\n\n\n[0.0, 1.0]\n\n\nGP mutation rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to apply random changes to every generation.\n\n\nWe recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.\n\n\n\n\n\n\n-xr\n\n\nCROSSOVER_RATE\n\n\n[0.0, 1.0]\n\n\nGP crossover rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to \"breed\" every generation.\n\n\nWe recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.\n\n\n\n\n\n\n-scoring\n\n\nSCORING_FN\n\n\n'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy',\n'f1',\n'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss', 'neg_mean_absolute_error',\n'neg_mean_squared_error', 'neg_median_absolute_error', 'precision', 'precision_macro', 'precision_micro',\n'precision_samples', 'precision_weighted',\n'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples',\n'recall_weighted', 'roc_auc', 'my_module.scorer_name*'\n\n\nFunction used to evaluate the quality of a given pipeline for the problem. By default, accuracy is used for classification and mean squared error (MSE) is used for regression.\n\n\nTPOT assumes that any function with \"error\" or \"loss\" in the name is meant to be minimized, whereas any other functions will be maximized.\n\n\nmy_module.scorer_name: You can also specify your own function or a full python path to an existing one.\n\n\nSee the section on \nscoring functions\n for more details.\n\n\n\n\n\n\n-cv\n\n\nCV\n\n\nAny integer > 1\n\n\nNumber of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process.\n\n\n\n\n-sub\n\n\nSUBSAMPLE\n\n\n(0.0, 1.0]\n\n\nSubsample ratio of the training instance. Setting it to 0.5 means that TPOT randomly collects half of training samples for pipeline optimization process.\n\n\n\n\n\n\n-njobs\n\n\nNUM_JOBS\n\n\nAny positive integer or -1\n\n\nNumber of CPUs for evaluating pipelines in parallel during the TPOT optimization process.\n\n\nAssigning this to -1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used.\n\n\n\n\n\n\n-maxtime\n\n\nMAX_TIME_MINS\n\n\nAny positive integer\n\n\nHow many minutes TPOT has to optimize the pipeline.\n\n\nHow many minutes TPOT has to optimize the pipeline.If not None, this setting will allow TPOT to run until max_time_mins minutes elapsed and then stop. TPOT will stop earlier if generationsis set and all generations are already evaluated.\n\n\n\n\n\n\n-maxeval\n\n\nMAX_EVAL_MINS\n\n\nAny positive float\n\n\nHow many minutes TPOT has to evaluate a single pipeline.\n\n\nSetting this parameter to higher values will allow TPOT to consider more complex pipelines but will also allow TPOT to run longer.\n\n\n\n\n\n\n-s\n\n\nRANDOM_STATE\n\n\nAny positive integer\n\n\nRandom number generator seed for reproducibility.\n\n\nSet this seed if you want your TPOT run to be reproducible with the same seed and data set in the future.\n\n\n\n\n\n\n-config\n\n\nCONFIG_FILE\n\n\nString or file path\n\n\nOperators and parameter configurations in TPOT:\n\n\n\n\n\nPath for configuration file: TPOT will use the path to a configuration file for customizing the operators and parameters that TPOT uses in the optimization process\n\n\nstring 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors\n\n\nstring 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies\n\n\nstring 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices.\n\n\n\nSee the \nbuilt-in configurations\n section for the list of configurations included with TPOT, and the \ncustom configuration\n section for more information and examples of how to create your own TPOT configurations.\n\n\n\n\n\n\n\n-template\n\n\nTEMPLATE\n\n\nString\n\n\nTemplate of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly.\n\nSee the \n template option in tpot\n section for more details.\n\n\n\n\n\n\n\n-memory\n\n\nMEMORY\n\n\nString or file path\n\n\nIf supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. Memory caching mode in TPOT:\n\n\n\n\n\nPath for a caching directory: TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown.\n\n\nstring 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown.\n\n\n\n\n\n\n\n\n\n\n-cf\n\n\nCHECKPOINT_FOLDER\n\n\nFolder path\n\n\n\nIf supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing.\n\n\nThis is useful in multiple cases:\n\n\n\nsudden death before tpot could save an optimized pipeline\n\n\nprogress tracking\n\n\ngrabbing a pipeline while tpot is working\n\n\n\n\n\nExample:\n\n\nmkdir my_checkpoints\n\n\n-cf ./my_checkpoints\n\n\n\n\n\n-es\n\n\nEARLY_STOP\n\n\nAny positive integer\n\n\n\nHow many generations TPOT checks whether there is no improvement in optimization process.\n\n\nEnd optimization process if there is no improvement in the set number of generations.\n\n\n\n\n\n-v\n\n\nVERBOSITY\n\n\n{0, 1, 2, 3}\n\n\nHow much information TPOT communicates while it is running.\n\n\n0 = none, 1 = minimal, 2 = high, 3 = all.\n\n\nA setting of 2 or higher will add a progress bar during the optimization procedure.\n\n\n\n\n\n\n--no-update-check\n\n\nFlag indicating whether the TPOT version checker should be disabled.\n\n\n\n\n\n\n--version\n\n\nShow TPOT's version number and exit.\n\n\n\n\n\n\n--help\n\n\nShow TPOT's help documentation and exit.\n\n\n\n\n\n\n\nScoring functions\n\n\nTPOT makes use of \nsklearn.model_selection.cross_val_score\n for evaluating pipelines, and as such offers the same support for scoring functions. There are two ways to make use of scoring functions with TPOT:\n\n\n\n\n\n\nYou can pass in a string to the \nscoring\n parameter from the list above. Any other strings will cause TPOT to throw an exception.\n\n\n\n\n\n\nYou can pass the callable object/function with signature \nscorer(estimator, X, y)\n, where \nestimator\n is trained estimator to use for scoring, \nX\n are features that will be passed to \nestimator.predict\n and \ny\n are target values for \nX\n. To do this, you should implement your own function. See the example below for further explanation.\n\n\n\n\n\n\nfrom tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics.scorer import make_scorer\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n# Make a custom metric function\ndef my_custom_accuracy(y_true, y_pred):\n    return float(sum(y_pred == y_true)) / len(y_true)\n\n# Make a custom a scorer from the custom metric function\n# Note: greater_is_better=False in make_scorer below would mean that the scoring function should be minimized.\nmy_custom_scorer = make_scorer(my_custom_accuracy, greater_is_better=True)\n\ntpot = TPOTClassifier(generations=5, population_size=20, verbosity=2,\n                      scoring=my_custom_scorer)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_digits_pipeline.py')\n\n\n\n\n\n\nmy_module.scorer_name\n: You can also use a custom \nscore_func(y_true, y_pred)\n or \nscorer(estimator, X, y)\n function through the command line by adding the argument \n-scoring my_module.scorer\n to your command-line call. TPOT will import your module and use the custom scoring function from there. TPOT will include your current working directory when importing the module, so you can place it in the same directory where you are going to run TPOT.\nExample: \n-scoring sklearn.metrics.auc\n will use the function auc from sklearn.metrics module.\n\n\n\n\nBuilt-in TPOT configurations\n\n\nTPOT comes with a handful of default operators and parameter configurations that we believe work well for optimizing machine learning pipelines. Below is a list of the current built-in configurations that come with TPOT.\n\n\n\n\n\n\nConfiguration Name\n\n\nDescription\n\n\nOperators\n\n\n\n\n\n\n\nDefault TPOT\n\n\nTPOT will search over a broad range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Some of these operators are complex and may take a long time to run, especially on larger datasets.\n\n\n\nNote: This is the default configuration for TPOT.\n To use this configuration, use the default value (None) for the config_dict parameter.\n\n\nClassification\n\n\n\n\nRegression\n\n\n\n\n\n\n\nTPOT light\n\n\nTPOT will search over a restricted range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Only simpler and fast-running operators will be used in these pipelines, so TPOT light is useful for finding quick and simple pipelines for a classification or regression problem.\n\n\nThis configuration works for both the TPOTClassifier and TPOTRegressor.\n\n\nClassification\n\n\n\n\nRegression\n\n\n\n\n\n\n\nTPOT MDR\n\n\nTPOT will search over a series of feature selectors and \nMultifactor Dimensionality Reduction\n models to find a series of operators that maximize prediction accuracy. The TPOT MDR configuration is specialized for \ngenome-wide association studies (GWAS)\n, and is described in detail online \nhere\n.\n\n\nNote that TPOT MDR may be slow to run because the feature selection routines are computationally expensive, especially on large datasets.\n\n\nClassification\n\n\n\n\nRegression\n\n\n\n\n\n\n\nTPOT sparse\n\n\nTPOT uses a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices.\n\n\nThis configuration works for both the TPOTClassifier and TPOTRegressor.\n\n\nClassification\n\n\n\n\nRegression\n\n\n\n\n\n\n\n\nTo use any of these configurations, simply pass the string name of the configuration to the \nconfig_dict\n parameter (or \n-config\n on the command line). For example, to use the \"TPOT light\" configuration:\n\n\nfrom tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot = TPOTClassifier(generations=5, population_size=20, verbosity=2,\n                      config_dict='TPOT light')\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_digits_pipeline.py')\n\n\n\n\n\nCustomizing TPOT's operators and parameters\n\n\nBeyond the default configurations that come with TPOT, in some cases it is useful to limit the algorithms and parameters that TPOT considers. For that reason, we allow users to provide TPOT with a custom configuration for its operators and parameters.\n\n\nThe custom TPOT configuration must be in nested dictionary format, where the first level key is the path and name of the operator (e.g., \nsklearn.naive_bayes.MultinomialNB\n) and the second level key is the corresponding parameter name for that operator (e.g., \nfit_prior\n). The second level key should point to a list of parameter values for that parameter, e.g., \n'fit_prior': [True, False]\n.\n\n\nFor a simple example, the configuration could be:\n\n\ntpot_config = {\n    'sklearn.naive_bayes.GaussianNB': {\n    },\n\n    'sklearn.naive_bayes.BernoulliNB': {\n        'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.],\n        'fit_prior': [True, False]\n    },\n\n    'sklearn.naive_bayes.MultinomialNB': {\n        'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.],\n        'fit_prior': [True, False]\n    }\n}\n\n\n\n\nin which case TPOT would only consider pipelines containing \nGaussianNB\n, \nBernoulliNB\n, \nMultinomialNB\n, and tune those algorithm's parameters in the ranges provided. This dictionary can be passed directly within the code to the \nTPOTClassifier\n/\nTPOTRegressor\n \nconfig_dict\n parameter, described above. For example:\n\n\nfrom tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot_config = {\n    'sklearn.naive_bayes.GaussianNB': {\n    },\n\n    'sklearn.naive_bayes.BernoulliNB': {\n        'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.],\n        'fit_prior': [True, False]\n    },\n\n    'sklearn.naive_bayes.MultinomialNB': {\n        'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.],\n        'fit_prior': [True, False]\n    }\n}\n\ntpot = TPOTClassifier(generations=5, population_size=20, verbosity=2,\n                      config_dict=tpot_config)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_digits_pipeline.py')\n\n\n\n\nCommand-line users must create a separate \n.py\n file with the custom configuration and provide the path to the file to the \ntpot\n call. For example, if the simple example configuration above is saved in \ntpot_classifier_config.py\n, that configuration could be used on the command line with the command:\n\n\ntpot data/mnist.csv -is , -target class -config tpot_classifier_config.py -g 5 -p 20 -v 2 -o tpot_exported_pipeline.py\n\n\n\n\nWhen using the command-line interface, the configuration file specified in the \n-config\n parameter \nmust\n name its custom TPOT configuration \ntpot_config\n. Otherwise, TPOT will not be able to locate the configuration dictionary.\n\n\nFor more detailed examples of how to customize TPOT's operator configuration, see the default configurations for \nclassification\n and \nregression\n in TPOT's source code.\n\n\nNote that you must have all of the corresponding packages for the operators installed on your computer, otherwise TPOT will not be able to use them. For example, if XGBoost is not installed on your computer, then TPOT will simply not import nor use XGBoost in the pipelines it considers.\n\n\nTemplate option in TPOT\n\n\nTemplate option provides a way to specify a desired structure for machine learning pipeline, which may reduce TPOT computation time and potentially provide more interpretable results. Current implementation only supports linear pipelines.\n\n\nBelow is a simple example to use \ntemplate\n option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of \nSelectorMixin\n), 2nd step is a feature transformer (a subclass of \nTransformerMixin\n) and 3rd step is a classifier for classification (a subclass of \nClassifierMixin\n). The last step must be \nClassifier\n for \nTPOTClassifier\n's template but \nRegressor\n for \nTPOTRegressor\n. \nNote: although \nSelectorMixin\n is subclass of \nTransformerMixin\n in scikit-learn, but \nTransformer\n in this option excludes those subclasses of \nSelectorMixin\n.\n\n\ntpot_obj = TPOTClassifier(\n                template='Selector-Transformer-Classifier'\n                )\n\n\n\n\nIf a specific operator, e.g. \nSelectPercentile\n, is preferred for usage in the 1st step of the pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'.\n\n\nFeatureSetSelector in TPOT\n\n\nFeatureSetSelector\n is a special new operator in TPOT. This operator enables feature selection based on \npriori\n export knowledge. For example, in RNA-seq gene expression analysis, this operator can be used to select one or more gene (feature) set(s) based on GO (Gene Ontology) terms or annotated gene sets Molecular Signatures Database (\nMSigDB\n) in the 1st step of pipeline via \ntemplate\n option above, in order to reduce dimensions and TPOT computation time. This operator requires a dataset list in csv format. In this csv file, there are only three columns: 1st column is feature set names, 2nd column is the total number of features in one set and 3rd column is a list of feature names (if input X is pandas.DataFrame) or indexes (if input X is numpy.ndarray) delimited by \";\". Below is a example how to use this operator in TPOT.\n\n\nPlease check our \npreprint paper\n for more details.\n\n\nfrom tpot import TPOTClassifier\nimport numpy as np\nimport pandas as pd\nfrom tpot.config import classifier_config_dict\ntest_data = pd.read_csv(\"https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/tests.csv\")\ntest_X = test_data.drop(\"class\", axis=1)\ntest_y = test_data['class']\n\n# add FeatureSetSelector into tpot configuration\nclassifier_config_dict['tpot.builtins.FeatureSetSelector'] = {\n    'subset_list': ['https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/subset_test.csv'],\n    'sel_subset': [0,1] # select only one feature set, a list of index of subset in the list above\n    #'sel_subset': list(combinations(range(3), 2)) # select two feature sets\n}\n\n\ntpot = TPOTClassifier(generations=5,\n                           population_size=50, verbosity=2,\n                           template='FeatureSetSelector-Transformer-Classifier',\n                           config_dict=classifier_config_dict)\ntpot.fit(test_X, test_y)\n\n\n\n\nPipeline caching in TPOT\n\n\nWith the \nmemory\n parameter, pipelines can cache the results of each transformer after fitting them. This feature is used to avoid repeated computation by transformers within a pipeline if the parameters and input data are identical to another fitted pipeline during optimization process. TPOT allows users to specify a custom directory path or \njoblib.Memory\n in case they want to re-use the memory cache in future TPOT runs (or a \nwarm_start\n run).\n\n\nThere are three methods for enabling memory caching in TPOT:\n\n\nfrom tpot import TPOTClassifier\nfrom tempfile import mkdtemp\nfrom joblib import Memory\nfrom shutil import rmtree\n\n# Method 1, auto mode: TPOT uses memory caching with a temporary directory and cleans it up upon shutdown\ntpot = TPOTClassifier(memory='auto')\n\n# Method 2, with a custom directory for memory caching\ntpot = TPOTClassifier(memory='/to/your/path')\n\n# Method 3, with a Memory object\ncachedir = mkdtemp() # Create a temporary folder\nmemory = Memory(cachedir=cachedir, verbose=0)\ntpot = TPOTClassifier(memory=memory)\n\n# Clear the cache directory when you don't need it anymore\nrmtree(cachedir)\n\n\n\n\nNote: TPOT does NOT clean up memory caches if users set a custom directory path or Memory object. We recommend that you clean up the memory caches when you don't need it anymore.\n\n\nCrash/freeze issue with n_jobs > 1 under OSX or Linux\n\n\nInternally, TPOT uses \njoblib\n to fit estimators in parallel.\nThis is the same parallelization framework used by scikit-learn. But it may crash/freeze with n_jobs > 1 under OSX or Linux \nas scikit-learn does\n, especially with large datasets.\n\n\nOne solution is to configure Python's \nmultiprocessing\n module to use the \nforkserver\n start method (instead of the default \nfork\n) to manage the process pools. You can enable the \nforkserver\n mode globally for your program by putting the following codes into your main script:\n\n\nimport multiprocessing\n\n# other imports, custom code, load data, define model...\n\nif __name__ == '__main__':\n    multiprocessing.set_start_method('forkserver')\n\n    # call scikit-learn utils or tpot utils with n_jobs > 1 here\n\n\n\n\nMore information about these start methods can be found in the \nmultiprocessing documentation\n.\n\n\nParallel Training with Dask\n\n\nFor large problems or working on Jupyter notebook, we highly recommend that you can distribute the work on a \nDask\n cluster.\nThe \ndask-examples binder\n has a runnable example\nwith a small dask cluster.\n\n\nTo use your Dask cluster to fit a TPOT model, specify the \nuse_dask\n keyword when you create the TPOT estimator. \nNote: if \nuse_dask=True\n, TPOT will use as many cores as available on the your Dask cluster. If \nn_jobs\n is specified, then it will control the chunk size (10*\nn_jobs\n if it is less then offspring size) of parallel training. \n\n\nestimator = TPOTEstimator(use_dask=True, n_jobs=-1)\n\n\n\n\nThis will use use all the workers on your cluster to do the training, and use \nDask-ML's pipeline rewriting\n to avoid re-fitting estimators multiple times on the same set of data.\nIt will also provide fine-grained diagnostics in the \ndistributed scheduler UI\n.\n\n\nAlternatively, Dask implements a joblib backend.\nYou can instruct TPOT to use the distributed backend during training by specifying a \njoblib.parallel_backend\n:\n\n\nimport joblib\nimport distributed.joblib\nfrom dask.distributed import Client\n\n# connect to the cluster\nclient = Client('schedueler-address')\n\n# create the estimator normally\nestimator = TPOTClassifier(n_jobs=-1)\n\n# perform the fit in this context manager\nwith joblib.parallel_backend(\"dask\"):\n    estimator.fit(X, y)\n\n\n\n\nSee \ndask's distributed joblib integration\n for more.",
+            "title": "Using TPOT"
+        },
+        {
+            "location": "/using/#what-to-expect-from-automl-software",
+            "text": "Automated machine learning (AutoML) takes a higher-level approach to machine learning than most practitioners are used to,\nso we've gathered a handful of guidelines on what to expect when running AutoML software such as TPOT.",
+            "title": "What to expect from AutoML software"
+        },
+        {
+            "location": "/using/#tpot-with-code",
+            "text": "We've taken care to design the TPOT interface to be as similar as possible to scikit-learn.  TPOT can be imported just like any regular Python module. To import TPOT, type:  from tpot import TPOTClassifier  then create an instance of TPOT as follows:  pipeline_optimizer = TPOTClassifier()  It's also possible to use TPOT for regression problems with the  TPOTRegressor  class. Other than the class name,\na  TPOTRegressor  is used the same way as a  TPOTClassifier . You can read more about the  TPOTClassifier  and  TPOTRegressor  classes in the  API documentation .  Some example code with custom TPOT parameters might look like:  pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5,\n                                    random_state=42, verbosity=2)  Now TPOT is ready to optimize a pipeline for you. You can tell TPOT to optimize a pipeline based on a data set with the  fit  function:  pipeline_optimizer.fit(X_train, y_train)  The  fit  function initializes the genetic programming algorithm to find the highest-scoring pipeline based on average k-fold cross-validation\nThen, the pipeline is trained on the entire set of provided samples, and the TPOT instance can be used as a fitted model.  You can then proceed to evaluate the final pipeline on the testing set with the  score  function:  print(pipeline_optimizer.score(X_test, y_test))  Finally, you can tell TPOT to export the corresponding Python code for the optimized pipeline to a text file with the  export  function:  pipeline_optimizer.export('tpot_exported_pipeline.py')  Once this code finishes running,  tpot_exported_pipeline.py  will contain the Python code for the optimized pipeline.  Below is a full example script using TPOT to optimize a pipeline, score it, and export the best pipeline to a file.  from tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\npipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5,\n                                    random_state=42, verbosity=2)\npipeline_optimizer.fit(X_train, y_train)\nprint(pipeline_optimizer.score(X_test, y_test))\npipeline_optimizer.export('tpot_exported_pipeline.py')  Check our  examples  to see TPOT applied to some specific data sets.",
+            "title": "TPOT with code"
+        },
+        {
+            "location": "/using/#tpot-on-the-command-line",
+            "text": "To use TPOT via the command line, enter the following command with a path to the data file:  tpot /path_to/data_file.csv  An example command-line call to TPOT may look like:  tpot data/mnist.csv -is , -target class -o tpot_exported_pipeline.py -g 5 -p 20 -cv 5 -s 42 -v 2  TPOT offers several arguments that can be provided at the command line. To see brief descriptions of these arguments,\nenter the following command:  tpot --help  Detailed descriptions of the command-line arguments are below.    Argument  Parameter  Valid values  Effect    -is  INPUT_SEPARATOR  Any string  Character used to separate columns in the input file.    -target  TARGET_NAME  Any string  Name of the target column in the input file.    -mode  TPOT_MODE  ['classification', 'regression']  Whether TPOT is being used for a supervised classification or regression problem.    -o  OUTPUT_FILE  String path to a file  File to export the code for the final optimized pipeline.    -g  GENERATIONS  Any positive integer or None  Number of iterations to run the pipeline optimization process. It must be a positive number or None. If None, the parameter max_time_mins must be defined as the runtime limit. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. \nTPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total.    -p  POPULATION_SIZE  Any positive integer  Number of individuals to retain in the GP population every generation. Generally, TPOT will work better when you give it more individuals (and therefore time) to optimize the pipeline. \nTPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total.    -os  OFFSPRING_SIZE  Any positive integer  Number of offspring to produce in each GP generation. \nBy default, OFFSPRING_SIZE = POPULATION_SIZE.    -mr  MUTATION_RATE  [0.0, 1.0]  GP mutation rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to apply random changes to every generation. \nWe recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.    -xr  CROSSOVER_RATE  [0.0, 1.0]  GP crossover rate in the range [0.0, 1.0]. This tells the GP algorithm how many pipelines to \"breed\" every generation. \nWe recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.    -scoring  SCORING_FN  'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1',\n'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss', 'neg_mean_absolute_error',\n'neg_mean_squared_error', 'neg_median_absolute_error', 'precision', 'precision_macro', 'precision_micro',\n'precision_samples', 'precision_weighted', 'r2', 'recall', 'recall_macro', 'recall_micro', 'recall_samples',\n'recall_weighted', 'roc_auc', 'my_module.scorer_name*'  Function used to evaluate the quality of a given pipeline for the problem. By default, accuracy is used for classification and mean squared error (MSE) is used for regression. \nTPOT assumes that any function with \"error\" or \"loss\" in the name is meant to be minimized, whereas any other functions will be maximized. \nmy_module.scorer_name: You can also specify your own function or a full python path to an existing one. \nSee the section on  scoring functions  for more details.    -cv  CV  Any integer > 1  Number of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process.   -sub  SUBSAMPLE  (0.0, 1.0]  Subsample ratio of the training instance. Setting it to 0.5 means that TPOT randomly collects half of training samples for pipeline optimization process.    -njobs  NUM_JOBS  Any positive integer or -1  Number of CPUs for evaluating pipelines in parallel during the TPOT optimization process. \nAssigning this to -1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used.    -maxtime  MAX_TIME_MINS  Any positive integer  How many minutes TPOT has to optimize the pipeline. \nHow many minutes TPOT has to optimize the pipeline.If not None, this setting will allow TPOT to run until max_time_mins minutes elapsed and then stop. TPOT will stop earlier if generationsis set and all generations are already evaluated.    -maxeval  MAX_EVAL_MINS  Any positive float  How many minutes TPOT has to evaluate a single pipeline. \nSetting this parameter to higher values will allow TPOT to consider more complex pipelines but will also allow TPOT to run longer.    -s  RANDOM_STATE  Any positive integer  Random number generator seed for reproducibility. \nSet this seed if you want your TPOT run to be reproducible with the same seed and data set in the future.    -config  CONFIG_FILE  String or file path  Operators and parameter configurations in TPOT:   Path for configuration file: TPOT will use the path to a configuration file for customizing the operators and parameters that TPOT uses in the optimization process  string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors  string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies  string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices.  \nSee the  built-in configurations  section for the list of configurations included with TPOT, and the  custom configuration  section for more information and examples of how to create your own TPOT configurations.    -template  TEMPLATE  String  Template of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. So far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly.\n\nSee the   template option in tpot  section for more details.    -memory  MEMORY  String or file path  If supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. Memory caching mode in TPOT:   Path for a caching directory: TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown.  string 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown.      -cf  CHECKPOINT_FOLDER  Folder path  \nIf supplied, a folder you created, in which tpot will periodically save pipelines in pareto front so far while optimizing. \nThis is useful in multiple cases:  sudden death before tpot could save an optimized pipeline  progress tracking  grabbing a pipeline while tpot is working   \nExample: \nmkdir my_checkpoints \n-cf ./my_checkpoints   -es  EARLY_STOP  Any positive integer  \nHow many generations TPOT checks whether there is no improvement in optimization process. \nEnd optimization process if there is no improvement in the set number of generations.   -v  VERBOSITY  {0, 1, 2, 3}  How much information TPOT communicates while it is running. \n0 = none, 1 = minimal, 2 = high, 3 = all. \nA setting of 2 or higher will add a progress bar during the optimization procedure.    --no-update-check  Flag indicating whether the TPOT version checker should be disabled.    --version  Show TPOT's version number and exit.    --help  Show TPOT's help documentation and exit.",
+            "title": "TPOT on the command line"
+        },
+        {
+            "location": "/using/#scoring-functions",
+            "text": "TPOT makes use of  sklearn.model_selection.cross_val_score  for evaluating pipelines, and as such offers the same support for scoring functions. There are two ways to make use of scoring functions with TPOT:    You can pass in a string to the  scoring  parameter from the list above. Any other strings will cause TPOT to throw an exception.    You can pass the callable object/function with signature  scorer(estimator, X, y) , where  estimator  is trained estimator to use for scoring,  X  are features that will be passed to  estimator.predict  and  y  are target values for  X . To do this, you should implement your own function. See the example below for further explanation.    from tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics.scorer import make_scorer\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n# Make a custom metric function\ndef my_custom_accuracy(y_true, y_pred):\n    return float(sum(y_pred == y_true)) / len(y_true)\n\n# Make a custom a scorer from the custom metric function\n# Note: greater_is_better=False in make_scorer below would mean that the scoring function should be minimized.\nmy_custom_scorer = make_scorer(my_custom_accuracy, greater_is_better=True)\n\ntpot = TPOTClassifier(generations=5, population_size=20, verbosity=2,\n                      scoring=my_custom_scorer)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_digits_pipeline.py')   my_module.scorer_name : You can also use a custom  score_func(y_true, y_pred)  or  scorer(estimator, X, y)  function through the command line by adding the argument  -scoring my_module.scorer  to your command-line call. TPOT will import your module and use the custom scoring function from there. TPOT will include your current working directory when importing the module, so you can place it in the same directory where you are going to run TPOT.\nExample:  -scoring sklearn.metrics.auc  will use the function auc from sklearn.metrics module.",
+            "title": "Scoring functions"
+        },
+        {
+            "location": "/using/#built-in-tpot-configurations",
+            "text": "TPOT comes with a handful of default operators and parameter configurations that we believe work well for optimizing machine learning pipelines. Below is a list of the current built-in configurations that come with TPOT.    Configuration Name  Description  Operators    Default TPOT  TPOT will search over a broad range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Some of these operators are complex and may take a long time to run, especially on larger datasets.  Note: This is the default configuration for TPOT.  To use this configuration, use the default value (None) for the config_dict parameter.  Classification   Regression    TPOT light  TPOT will search over a restricted range of preprocessors, feature constructors, feature selectors, models, and parameters to find a series of operators that minimize the error of the model predictions. Only simpler and fast-running operators will be used in these pipelines, so TPOT light is useful for finding quick and simple pipelines for a classification or regression problem. \nThis configuration works for both the TPOTClassifier and TPOTRegressor.  Classification   Regression    TPOT MDR  TPOT will search over a series of feature selectors and  Multifactor Dimensionality Reduction  models to find a series of operators that maximize prediction accuracy. The TPOT MDR configuration is specialized for  genome-wide association studies (GWAS) , and is described in detail online  here . \nNote that TPOT MDR may be slow to run because the feature selection routines are computationally expensive, especially on large datasets.  Classification   Regression    TPOT sparse  TPOT uses a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices. \nThis configuration works for both the TPOTClassifier and TPOTRegressor.  Classification   Regression    To use any of these configurations, simply pass the string name of the configuration to the  config_dict  parameter (or  -config  on the command line). For example, to use the \"TPOT light\" configuration:  from tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot = TPOTClassifier(generations=5, population_size=20, verbosity=2,\n                      config_dict='TPOT light')\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_digits_pipeline.py')",
+            "title": "Built-in TPOT configurations"
+        },
+        {
+            "location": "/using/#customizing-tpots-operators-and-parameters",
+            "text": "Beyond the default configurations that come with TPOT, in some cases it is useful to limit the algorithms and parameters that TPOT considers. For that reason, we allow users to provide TPOT with a custom configuration for its operators and parameters.  The custom TPOT configuration must be in nested dictionary format, where the first level key is the path and name of the operator (e.g.,  sklearn.naive_bayes.MultinomialNB ) and the second level key is the corresponding parameter name for that operator (e.g.,  fit_prior ). The second level key should point to a list of parameter values for that parameter, e.g.,  'fit_prior': [True, False] .  For a simple example, the configuration could be:  tpot_config = {\n    'sklearn.naive_bayes.GaussianNB': {\n    },\n\n    'sklearn.naive_bayes.BernoulliNB': {\n        'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.],\n        'fit_prior': [True, False]\n    },\n\n    'sklearn.naive_bayes.MultinomialNB': {\n        'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.],\n        'fit_prior': [True, False]\n    }\n}  in which case TPOT would only consider pipelines containing  GaussianNB ,  BernoulliNB ,  MultinomialNB , and tune those algorithm's parameters in the ranges provided. This dictionary can be passed directly within the code to the  TPOTClassifier / TPOTRegressor   config_dict  parameter, described above. For example:  from tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot_config = {\n    'sklearn.naive_bayes.GaussianNB': {\n    },\n\n    'sklearn.naive_bayes.BernoulliNB': {\n        'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.],\n        'fit_prior': [True, False]\n    },\n\n    'sklearn.naive_bayes.MultinomialNB': {\n        'alpha': [1e-3, 1e-2, 1e-1, 1., 10., 100.],\n        'fit_prior': [True, False]\n    }\n}\n\ntpot = TPOTClassifier(generations=5, population_size=20, verbosity=2,\n                      config_dict=tpot_config)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_digits_pipeline.py')  Command-line users must create a separate  .py  file with the custom configuration and provide the path to the file to the  tpot  call. For example, if the simple example configuration above is saved in  tpot_classifier_config.py , that configuration could be used on the command line with the command:  tpot data/mnist.csv -is , -target class -config tpot_classifier_config.py -g 5 -p 20 -v 2 -o tpot_exported_pipeline.py  When using the command-line interface, the configuration file specified in the  -config  parameter  must  name its custom TPOT configuration  tpot_config . Otherwise, TPOT will not be able to locate the configuration dictionary.  For more detailed examples of how to customize TPOT's operator configuration, see the default configurations for  classification  and  regression  in TPOT's source code.  Note that you must have all of the corresponding packages for the operators installed on your computer, otherwise TPOT will not be able to use them. For example, if XGBoost is not installed on your computer, then TPOT will simply not import nor use XGBoost in the pipelines it considers.",
+            "title": "Customizing TPOT's operators and parameters"
+        },
+        {
+            "location": "/using/#template-option-in-tpot",
+            "text": "Template option provides a way to specify a desired structure for machine learning pipeline, which may reduce TPOT computation time and potentially provide more interpretable results. Current implementation only supports linear pipelines.  Below is a simple example to use  template  option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of  SelectorMixin ), 2nd step is a feature transformer (a subclass of  TransformerMixin ) and 3rd step is a classifier for classification (a subclass of  ClassifierMixin ). The last step must be  Classifier  for  TPOTClassifier 's template but  Regressor  for  TPOTRegressor .  Note: although  SelectorMixin  is subclass of  TransformerMixin  in scikit-learn, but  Transformer  in this option excludes those subclasses of  SelectorMixin .  tpot_obj = TPOTClassifier(\n                template='Selector-Transformer-Classifier'\n                )  If a specific operator, e.g.  SelectPercentile , is preferred for usage in the 1st step of the pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'.",
+            "title": "Template option in TPOT"
+        },
+        {
+            "location": "/using/#featuresetselector-in-tpot",
+            "text": "FeatureSetSelector  is a special new operator in TPOT. This operator enables feature selection based on  priori  export knowledge. For example, in RNA-seq gene expression analysis, this operator can be used to select one or more gene (feature) set(s) based on GO (Gene Ontology) terms or annotated gene sets Molecular Signatures Database ( MSigDB ) in the 1st step of pipeline via  template  option above, in order to reduce dimensions and TPOT computation time. This operator requires a dataset list in csv format. In this csv file, there are only three columns: 1st column is feature set names, 2nd column is the total number of features in one set and 3rd column is a list of feature names (if input X is pandas.DataFrame) or indexes (if input X is numpy.ndarray) delimited by \";\". Below is a example how to use this operator in TPOT.  Please check our  preprint paper  for more details.  from tpot import TPOTClassifier\nimport numpy as np\nimport pandas as pd\nfrom tpot.config import classifier_config_dict\ntest_data = pd.read_csv(\"https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/tests.csv\")\ntest_X = test_data.drop(\"class\", axis=1)\ntest_y = test_data['class']\n\n# add FeatureSetSelector into tpot configuration\nclassifier_config_dict['tpot.builtins.FeatureSetSelector'] = {\n    'subset_list': ['https://raw.githubusercontent.com/EpistasisLab/tpot/master/tests/subset_test.csv'],\n    'sel_subset': [0,1] # select only one feature set, a list of index of subset in the list above\n    #'sel_subset': list(combinations(range(3), 2)) # select two feature sets\n}\n\n\ntpot = TPOTClassifier(generations=5,\n                           population_size=50, verbosity=2,\n                           template='FeatureSetSelector-Transformer-Classifier',\n                           config_dict=classifier_config_dict)\ntpot.fit(test_X, test_y)",
+            "title": "FeatureSetSelector in TPOT"
+        },
+        {
+            "location": "/using/#pipeline-caching-in-tpot",
+            "text": "With the  memory  parameter, pipelines can cache the results of each transformer after fitting them. This feature is used to avoid repeated computation by transformers within a pipeline if the parameters and input data are identical to another fitted pipeline during optimization process. TPOT allows users to specify a custom directory path or  joblib.Memory  in case they want to re-use the memory cache in future TPOT runs (or a  warm_start  run).  There are three methods for enabling memory caching in TPOT:  from tpot import TPOTClassifier\nfrom tempfile import mkdtemp\nfrom joblib import Memory\nfrom shutil import rmtree\n\n# Method 1, auto mode: TPOT uses memory caching with a temporary directory and cleans it up upon shutdown\ntpot = TPOTClassifier(memory='auto')\n\n# Method 2, with a custom directory for memory caching\ntpot = TPOTClassifier(memory='/to/your/path')\n\n# Method 3, with a Memory object\ncachedir = mkdtemp() # Create a temporary folder\nmemory = Memory(cachedir=cachedir, verbose=0)\ntpot = TPOTClassifier(memory=memory)\n\n# Clear the cache directory when you don't need it anymore\nrmtree(cachedir)  Note: TPOT does NOT clean up memory caches if users set a custom directory path or Memory object. We recommend that you clean up the memory caches when you don't need it anymore.",
+            "title": "Pipeline caching in TPOT"
+        },
+        {
+            "location": "/using/#crashfreeze-issue-with-n_jobs-1-under-osx-or-linux",
+            "text": "Internally, TPOT uses  joblib  to fit estimators in parallel.\nThis is the same parallelization framework used by scikit-learn. But it may crash/freeze with n_jobs > 1 under OSX or Linux  as scikit-learn does , especially with large datasets.  One solution is to configure Python's  multiprocessing  module to use the  forkserver  start method (instead of the default  fork ) to manage the process pools. You can enable the  forkserver  mode globally for your program by putting the following codes into your main script:  import multiprocessing\n\n# other imports, custom code, load data, define model...\n\nif __name__ == '__main__':\n    multiprocessing.set_start_method('forkserver')\n\n    # call scikit-learn utils or tpot utils with n_jobs > 1 here  More information about these start methods can be found in the  multiprocessing documentation .",
+            "title": "Crash/freeze issue with n_jobs &gt; 1 under OSX or Linux"
+        },
+        {
+            "location": "/using/#parallel-training-with-dask",
+            "text": "For large problems or working on Jupyter notebook, we highly recommend that you can distribute the work on a  Dask  cluster.\nThe  dask-examples binder  has a runnable example\nwith a small dask cluster.  To use your Dask cluster to fit a TPOT model, specify the  use_dask  keyword when you create the TPOT estimator.  Note: if  use_dask=True , TPOT will use as many cores as available on the your Dask cluster. If  n_jobs  is specified, then it will control the chunk size (10* n_jobs  if it is less then offspring size) of parallel training.   estimator = TPOTEstimator(use_dask=True, n_jobs=-1)  This will use use all the workers on your cluster to do the training, and use  Dask-ML's pipeline rewriting  to avoid re-fitting estimators multiple times on the same set of data.\nIt will also provide fine-grained diagnostics in the  distributed scheduler UI .  Alternatively, Dask implements a joblib backend.\nYou can instruct TPOT to use the distributed backend during training by specifying a  joblib.parallel_backend :  import joblib\nimport distributed.joblib\nfrom dask.distributed import Client\n\n# connect to the cluster\nclient = Client('schedueler-address')\n\n# create the estimator normally\nestimator = TPOTClassifier(n_jobs=-1)\n\n# perform the fit in this context manager\nwith joblib.parallel_backend(\"dask\"):\n    estimator.fit(X, y)  See  dask's distributed joblib integration  for more.",
+            "title": "Parallel Training with Dask"
+        },
+        {
+            "location": "/api/",
+            "text": "Classification\n\n\nclass\n tpot.\nTPOTClassifier\n(\ngenerations\n=100, \npopulation_size\n=100,\n                          \noffspring_size\n=None, \nmutation_rate\n=0.9,\n                          \ncrossover_rate\n=0.1,\n                          \nscoring\n='accuracy', \ncv\n=5,\n                          \nsubsample\n=1.0, \nn_jobs\n=1,\n                          \nmax_time_mins\n=None, \nmax_eval_time_mins\n=5,\n                          \nrandom_state\n=None, \nconfig_dict\n=None,\n                          \ntemplate\n=None,\n                          \nwarm_start\n=False,\n                          \nmemory\n=None,\n                          \nuse_dask\n=False,\n                          \nperiodic_checkpoint_folder\n=None,\n                          \nearly_stop\n=None,\n                          \nverbosity\n=0,\n                          \ndisable_update_check\n=False\n)\n\n\n\nsource\n\n\n\nAutomated machine learning for supervised classification tasks.\n\n\nThe TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models,\npreprocessors, feature selection techniques, and any other estimator or transformer that follows the \nscikit-learn API\n.\nThe TPOTClassifier will also search over the hyperparameters of all objects in the pipeline.\n\n\nBy default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters.\nHowever, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the \nconfig_dict\n parameter.\n\n\nRead more in the \nUser Guide\n.\n\n\n\n\n\n\nParameters:\n\n\n\n\ngenerations\n: int or None optional (default=100)\n\n\nNumber of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter \nmax_time_mins\n must be defined as the runtime limit.\n\n\nGenerally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.\n\n\nTPOT will evaluate \npopulation_size\n + \ngenerations\n \u00d7 \noffspring_size\n pipelines in total.\n\n\n\n\npopulation_size\n: int, optional (default=100)\n\n\nNumber of individuals to retain in the genetic programming population every generation. Must be a positive number.\n\n\nGenerally, TPOT will work better when you give it more individuals with which to optimize the pipeline.\n\n\n\n\noffspring_size\n: int, optional (default=None)\n\n\nNumber of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.\n\n\n\n\nmutation_rate\n: float, optional (default=0.9)\n\n\nMutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation.\n\n\n\nmutation_rate\n + \ncrossover_rate\n cannot exceed 1.0.\n\n\nWe recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.\n\n\n\n\ncrossover_rate\n: float, optional (default=0.1)\n\n\nCrossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation.\n\n\n\nmutation_rate\n + \ncrossover_rate\n cannot exceed 1.0.\n\n\nWe recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.\n\n\n\n\nscoring\n: string or callable, optional (default='accuracy')\n\n\nFunction used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used:\n\n\n'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision',\n'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc'\n\n\nIf you would like to use a custom scorer, you can pass the callable object/function with signature \nscorer(estimator, X, y)\n.\n\n\nSee the section on \nscoring functions\n for more details.\n\n\n\n\n\ncv\n: int, cross-validation generator, or an iterable, optional (default=5)\n\n\nCross-validation strategy used when evaluating pipelines.\n\n\nPossible inputs:\n\n\n\ninteger, to specify the number of folds in a StratifiedKFold,\n\n\nAn object to be used as a cross-validation generator, or\n\n\nAn iterable yielding train/test splits.\n\n\n\n\n\nsubsample\n: float, optional (default=1.0)\n\n\nFraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0].\n\n\nSetting \nsubsample\n=0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process.\n\n\n\n\nn_jobs\n: integer, optional (default=1)\n\n\nNumber of processes to use in parallel for evaluating pipelines during the TPOT optimization process.\n\n\nSetting \nn_jobs\n=-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets.\n\n\n\n\nmax_time_mins\n: integer or None, optional (default=None)\n\n\nHow many minutes TPOT has to optimize the pipeline.\n\n\nIf not None, this setting will allow TPOT to run until \nmax_time_mins\n minutes elapsed and then stop. TPOT will stop earlier if \ngenerations\n is set and all generations are already evaluated.\n\n\n\n\nmax_eval_time_mins\n: float, optional (default=5)\n\n\nHow many minutes TPOT has to evaluate a single pipeline.\n\n\nSetting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines.\n\n\n\n\nrandom_state\n: integer or None, optional (default=None)\n\n\nThe seed of the pseudo random number generator used in TPOT.\n\n\nUse this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed.\n\n\n\n\nconfig_dict\n: Python dictionary, string, or None, optional (default=None)\n\n\nA configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process.\n\n\nPossible inputs are:\n\n\n\nPython dictionary, TPOT will use your custom configuration,\n\n\nstring 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or\n\n\nstring 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or\n\n\nstring 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or\n\n\nNone, TPOT will use the default TPOTClassifier configuration.\n\n\n\nSee the \nbuilt-in configurations\n section for the list of configurations included with TPOT, and the \ncustom configuration\n section for more information and examples of how to create your own TPOT configurations.\n\n\n\n\ntemplate\n: string (default=None)\n\n\nTemplate of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT.\n\n\nSo far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly.\n\nSee the \n template option in tpot\n section for more details.\n\n\n\n\nwarm_start\n: boolean, optional (default=False)\n\n\nFlag indicating whether the TPOT instance will reuse the population from previous calls to \nfit()\n.\n\n\nSetting \nwarm_start\n=True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off.\n\n\n\n\nmemory\n: a joblib.Memory object or string, optional (default=None)\n\n\nIf supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in \nscikit-learn documentation\n\n\n\nPossible inputs are:\n\n\n\nString 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or\n\n\nPath of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or\n\n\nMemory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or\n\n\nNone, TPOT does not use memory caching.\n\n\n\n\n\n\n\nuse_dask\n: boolean, optional (default: False)\n\n\nWhether to use Dask-ML's pipeline optimiziations. This avoid re-fitting\nthe same estimator on the same split of data multiple times. It\nwill also provide more detailed diagnostics when using Dask's\ndistributed scheduler.\n\n\nSee \navoid repeated work\n for more details.\n\n\n\n\nperiodic_checkpoint_folder\n: path string, optional (default: None)\n\n\nIf supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.\n\nCurrently once per generation but not more often than once per 30 seconds.\n\nUseful in multiple cases:\n\n\n\nSudden death before TPOT could save optimized pipeline\n\n\nTrack its progress\n\n\nGrab pipelines while it's still optimizing\n\n\n\n\n\n\n\nearly_stop\n: integer, optional (default: None)\n\n\nHow many generations TPOT checks whether there is no improvement in optimization process.\n\n\nEnds the optimization process if there is no improvement in the given number of generations.\n\n\n\n\nverbosity\n: integer, optional (default=0)\n\n\nHow much information TPOT communicates while it's running.\n\n\nPossible inputs are:\n\n\n\n0, TPOT will print nothing,\n\n\n1, TPOT will print minimal information,\n\n\n2, TPOT will print more information and provide a progress bar, or\n\n\n3, TPOT will print everything and provide a progress bar.\n\n\n\n\n\n\n\ndisable_update_check\n: boolean, optional (default=False)\n\n\nFlag indicating whether the TPOT version checker should be disabled.\n\n\nThe update checker will tell you when a new version of TPOT has been released.\n\n\n\n\n\n\n\n\n\n\nAttributes:\n\n\n\n\nfitted_pipeline_\n: scikit-learn Pipeline object\n\n\nThe best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset.\n\n\n\n\npareto_front_fitted_pipelines_\n: Python dictionary\n\n\nDictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset.\n\n\nThe TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline.\n\n\nNote: \npareto_front_fitted_pipelines_\n is only available when \nverbosity\n=3.\n\n\n\n\nevaluated_individuals_\n: Python dictionary\n\n\nDictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline).\n\n\nThis attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated.\n\n\n\n\n\n\n\n\n\n\nExample\n\n\nfrom tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot = TPOTClassifier(generations=5, population_size=50, verbosity=2)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_digits_pipeline.py')\n\n\n\n\nFunctions\n\n\n\n\n\n\nfit\n(features, classes[, sample_weight, groups])\n\n\nRun the TPOT optimization process on the given training data.\n\n\n\n\n\n\n\npredict\n(features)\n\n\nUse the optimized pipeline to predict the classes for a feature set.\n\n\n\n\n\n\n\npredict_proba\n(features)\n\n\nUse the optimized pipeline to estimate the class probabilities for a feature set.\n\n\n\n\n\n\n\nscore\n(testing_features, testing_classes)\n\n\nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function.\n\n\n\n\n\n\n\nexport\n(output_file_name)\n\n\nExport the optimized pipeline as Python code.\n\n\n\n\n\n\n\n\n\nfit(features, classes, sample_weight=None, groups=None)\n\n\n\n\n\nRun the TPOT optimization process on the given training data.\n\n\nUses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\nfeatures\n: array-like {n_samples, n_features}\n\n\nFeature matrix\n\n\nTPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values.\nAs such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed)\nusing \nmedian value imputation\n.\n\n\nIf you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT.\n\n\n\n\nclasses\n: array-like {n_samples}\n\n\nList of class labels for prediction\n\n\n\n\nsample_weight\n: array-like {n_samples}, optional\n\n\nPer-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines.\n\n\n\n\ngroups\n: array-like, with shape {n_samples, }, optional\n\n\nGroup labels for the samples used when performing cross-validation.\n\n\nThis parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as \nsklearn.model_selection.GroupKFold\n.\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\nself\n: object\n\n\nReturns a copy of the fitted TPOT object\n\n\n\n\n\n\n\n\n\n\n\n\n\n\npredict(features)\n\n\n\n\n\nUse the optimized pipeline to predict the classes for a feature set.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\nfeatures\n: array-like {n_samples, n_features}\n\n\nFeature matrix\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\npredictions\n: array-like {n_samples}\n\n\nPredicted classes for the samples in the feature matrix\n\n\n\n\n\n\n\n\n\n\n\n\n\n\npredict_proba(features)\n\n\n\n\n\nUse the optimized pipeline to estimate the class probabilities for a feature set.\n\n\nNote: This function will only work for pipelines whose final classifier supports the \npredict_proba\n function. TPOT will raise an error otherwise.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\nfeatures\n: array-like {n_samples, n_features}\n\n\nFeature matrix\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\npredictions\n: array-like {n_samples, n_classes}\n\n\nThe class probabilities of the input samples\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nscore(testing_features, testing_classes)\n\n\n\n\n\nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function.\n\n\nThe default scoring function for TPOTClassifier is 'accuracy'.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\ntesting_features\n: array-like {n_samples, n_features}\n\n\nFeature matrix of the testing set\n\n\n\n\ntesting_classes\n: array-like {n_samples}\n\n\nList of class labels for prediction in the testing set\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\naccuracy_score\n: float\n\n\nThe estimated test set accuracy according to the user-specified scoring function.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nexport(output_file_name)\n\n\n\n\n\nExport the optimized pipeline as Python code.\n\n\nSee the \nusage documentation\n for example usage of the export function.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\noutput_file_name\n: string\n\n\nString containing the path and file name of the desired output file\n\n\n\n\n\n\n\nReturns:\n\n\n\nDoes not return anything\n\n\n\n\n\n\n\n\n\n\nRegression\n\n\nclass\n tpot.\nTPOTRegressor\n(\ngenerations\n=100, \npopulation_size\n=100,\n                         \noffspring_size\n=None, \nmutation_rate\n=0.9,\n                         \ncrossover_rate\n=0.1,\n                         \nscoring\n='neg_mean_squared_error', \ncv\n=5,\n                         \nsubsample\n=1.0, \nn_jobs\n=1,\n                         \nmax_time_mins\n=None, \nmax_eval_time_mins\n=5,\n                         \nrandom_state\n=None, \nconfig_dict\n=None,\n                         \ntemplate\n=None,\n                         \nwarm_start\n=False,\n                         \nmemory\n=None,\n                         \nuse_dask\n=False,\n                         \nperiodic_checkpoint_folder\n=None,\n                         \nearly_stop\n=None,\n                         \nverbosity\n=0,\n                         \ndisable_update_check\n=False\n)\n\n\n\nsource\n\n\n\nAutomated machine learning for supervised regression tasks.\n\n\nThe TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models,\npreprocessors, feature selection techniques, and any other estimator or transformer that follows the \nscikit-learn API\n.\nThe TPOTRegressor will also search over the hyperparameters of all objects in the pipeline.\n\n\nBy default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters.\nHowever, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the \nconfig_dict\n parameter.\n\n\nRead more in the \nUser Guide\n.\n\n\n\n\n\n\nParameters:\n\n\n\n\ngenerations\n: int or None, optional (default=100)\n\n\nNumber of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter \nmax_time_mins\n must be defined as the runtime limit.\n\n\nGenerally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.\n\n\nTPOT will evaluate \npopulation_size\n + \ngenerations\n \u00d7 \noffspring_size\n pipelines in total.\n\n\n\n\npopulation_size\n: int, optional (default=100)\n\n\nNumber of individuals to retain in the genetic programming population every generation. Must be a positive number.\n\n\nGenerally, TPOT will work better when you give it more individuals with which to optimize the pipeline.\n\n\n\n\noffspring_size\n: int, optional (default=None)\n\n\nNumber of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.\n\n\n\n\nmutation_rate\n: float, optional (default=0.9)\n\n\nMutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation.\n\n\n\nmutation_rate\n + \ncrossover_rate\n cannot exceed 1.0.\n\n\nWe recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.\n\n\n\n\ncrossover_rate\n: float, optional (default=0.1)\n\n\nCrossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation.\n\n\n\nmutation_rate\n + \ncrossover_rate\n cannot exceed 1.0.\n\n\nWe recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.\n\n\n\n\nscoring\n: string or callable, optional (default='neg_mean_squared_error')\n\n\nFunction used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used:\n\n\n'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2'\n\n\nNote that we recommend using the \nneg\n version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric.\n\n\nIf you would like to use a custom scorer, you can pass the callable object/function with signature \nscorer(estimator, X, y)\n.\n\n\nSee the section on \nscoring functions\n for more details.\n\n\n\n\ncv\n: int, cross-validation generator, or an iterable, optional (default=5)\n\n\nCross-validation strategy used when evaluating pipelines.\n\n\nPossible inputs:\n\n\n\ninteger, to specify the number of folds in a KFold,\n\n\nAn object to be used as a cross-validation generator, or\n\n\nAn iterable yielding train/test splits.\n\n\n\n\n\n\n\nsubsample\n: float, optional (default=1.0)\n\n\nFraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0].\n\n\nSetting \nsubsample\n=0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process.\n\n\n\n\nn_jobs\n: integer, optional (default=1)\n\n\nNumber of processes to use in parallel for evaluating pipelines during the TPOT optimization process.\n\n\nSetting \nn_jobs\n=-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets\n\n\n\n\nmax_time_mins\n: integer or None, optional (default=None)\n\n\nHow many minutes TPOT has to optimize the pipeline.\n\n\nIf not None, this setting will allow TPOT to run until \nmax_time_mins\n minutes elapsed and then stop. TPOT will stop earlier if \ngenerations\n is set and all generations are already evaluated.\n\n\n\n\nmax_eval_time_mins\n: float, optional (default=5)\n\n\nHow many minutes TPOT has to evaluate a single pipeline.\n\n\nSetting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines.\n\n\n\n\nrandom_state\n: integer or None, optional (default=None)\n\n\nThe seed of the pseudo random number generator used in TPOT.\n\n\nUse this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed.\n\n\n\n\nconfig_dict\n: Python dictionary, string, or None, optional (default=None)\n\n\nA configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process.\n\n\nPossible inputs are:\n\n\n\nPython dictionary, TPOT will use your custom configuration,\n\n\nstring 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or\n\n\nstring 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or\n\n\nstring 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or\n\n\nNone, TPOT will use the default TPOTRegressor configuration.\n\n\n\nSee the \nbuilt-in configurations\n section for the list of configurations included with TPOT, and the \ncustom configuration\n section for more information and examples of how to create your own TPOT configurations.\n\n\n\n\ntemplate\n: string (default=None)\n\n\nTemplate of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT.\n\n\nSo far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly.\n\nSee the \n template option in tpot\n section for more details.\n\n\n\n\nwarm_start\n: boolean, optional (default=False)\n\n\nFlag indicating whether the TPOT instance will reuse the population from previous calls to \nfit()\n.\n\n\nSetting \nwarm_start\n=True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off.\n\n\n\n\nmemory\n: a joblib.Memory object or string, optional (default=None)\n\n\nIf supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in \nscikit-learn documentation\n\n\n\nPossible inputs are:\n\n\n\nString 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or\n\n\nPath of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or\n\n\nMemory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or\n\n\nNone, TPOT does not use memory caching.\n\n\n\n\n\n\n\nuse_dask\n: boolean, optional (default: False)\n\n\nWhether to use Dask-ML's pipeline optimiziations. This avoid re-fitting\nthe same estimator on the same split of data multiple times. It\nwill also provide more detailed diagnostics when using Dask's\ndistributed scheduler.\n\n\nSee \navoid repeated work\n for more details.\n\n\n\n\nperiodic_checkpoint_folder\n: path string, optional (default: None)\n\n\nIf supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.\n\nCurrently once per generation but not more often than once per 30 seconds.\n\nUseful in multiple cases:\n\n\n\nSudden death before TPOT could save optimized pipeline\n\n\nTrack its progress\n\n\nGrab pipelines while it's still optimizing\n\n\n\n\n\n\n\nearly_stop\n: integer, optional (default: None)\n\n\nHow many generations TPOT checks whether there is no improvement in optimization process.\n\n\nEnds the optimization process if there is no improvement in the given number of generations.\n\n\n\n\nverbosity\n: integer, optional (default=0)\n\n\nHow much information TPOT communicates while it's running.\n\n\nPossible inputs are:\n\n\n\n0, TPOT will print nothing,\n\n\n1, TPOT will print minimal information,\n\n\n2, TPOT will print more information and provide a progress bar, or\n\n\n3, TPOT will print everything and provide a progress bar.\n\n\n\n\n\n\n\ndisable_update_check\n: boolean, optional (default=False)\n\n\nFlag indicating whether the TPOT version checker should be disabled.\n\n\nThe update checker will tell you when a new version of TPOT has been released.\n\n\n\n\n\n\n\n\n\n\nAttributes:\n\n\n\n\nfitted_pipeline_\n: scikit-learn Pipeline object\n\n\nThe best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset.\n\n\n\n\npareto_front_fitted_pipelines_\n: Python dictionary\n\n\nDictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset.\n\n\nThe TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline.\n\n\nNote: \n_pareto_front_fitted_pipelines\n is only available when \nverbosity\n=3.\n\n\n\n\nevaluated_individuals_\n: Python dictionary\n\n\nDictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline).\n\n\nThis attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated.\n\n\n\n\n\n\n\n\n\n\nExample\n\n\nfrom tpot import TPOTRegressor\nfrom sklearn.datasets import load_boston\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_boston()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot = TPOTRegressor(generations=5, population_size=50, verbosity=2)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_boston_pipeline.py')\n\n\n\n\nFunctions\n\n\n\n\n\n\nfit\n(features, target[, sample_weight, groups])\n\n\nRun the TPOT optimization process on the given training data.\n\n\n\n\n\n\n\npredict\n(features)\n\n\nUse the optimized pipeline to predict the target values for a feature set.\n\n\n\n\n\n\n\nscore\n(testing_features, testing_target)\n\n\nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function.\n\n\n\n\n\n\n\nexport\n(output_file_name)\n\n\nExport the optimized pipeline as Python code.\n\n\n\n\n\n\n\n\n\nfit(features, target, sample_weight=None, groups=None)\n\n\n\n\n\nRun the TPOT optimization process on the given training data.\n\n\nUses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\nfeatures\n: array-like {n_samples, n_features}\n\n\nFeature matrix\n\n\nTPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values.\nAs such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed)\nusing \nmedian value imputation\n.\n\n\nIf you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT.\n\n\n\n\ntarget\n: array-like {n_samples}\n\n\nList of target labels for prediction\n\n\n\n\nsample_weight\n: array-like {n_samples}, optional\n\n\nPer-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines.\n\n\n\n\ngroups\n: array-like, with shape {n_samples, }, optional\n\n\nGroup labels for the samples used when performing cross-validation.\n\n\nThis parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as \nsklearn.model_selection.GroupKFold\n.\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\nself\n: object\n\n\nReturns a copy of the fitted TPOT object\n\n\n\n\n\n\n\n\n\n\n\n\n\n\npredict(features)\n\n\n\n\n\nUse the optimized pipeline to predict the target values for a feature set.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\nfeatures\n: array-like {n_samples, n_features}\n\n\nFeature matrix\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\npredictions\n: array-like {n_samples}\n\n\nPredicted target values for the samples in the feature matrix\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nscore(testing_features, testing_target)\n\n\n\n\n\nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function.\n\n\nThe default scoring function for TPOTClassifier is 'mean_squared_error'.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\ntesting_features\n: array-like {n_samples, n_features}\n\n\nFeature matrix of the testing set\n\n\n\n\ntesting_target\n: array-like {n_samples}\n\n\nList of target labels for prediction in the testing set\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\naccuracy_score\n: float\n\n\nThe estimated test set accuracy according to the user-specified scoring function.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nexport(output_file_name)\n\n\n\n\n\nExport the optimized pipeline as Python code.\n\n\nSee the \nusage documentation\n for example usage of the export function.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\noutput_file_name\n: string\n\n\nString containing the path and file name of the desired output file\n\n\n\n\n\n\n\nReturns:\n\n\n\nDoes not return anything",
+            "title": "TPOT API"
+        },
+        {
+            "location": "/api/#classification",
+            "text": "class  tpot. TPOTClassifier ( generations =100,  population_size =100,\n                           offspring_size =None,  mutation_rate =0.9,\n                           crossover_rate =0.1,\n                           scoring ='accuracy',  cv =5,\n                           subsample =1.0,  n_jobs =1,\n                           max_time_mins =None,  max_eval_time_mins =5,\n                           random_state =None,  config_dict =None,\n                           template =None,\n                           warm_start =False,\n                           memory =None,\n                           use_dask =False,\n                           periodic_checkpoint_folder =None,\n                           early_stop =None,\n                           verbosity =0,\n                           disable_update_check =False )  source  Automated machine learning for supervised classification tasks.  The TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models,\npreprocessors, feature selection techniques, and any other estimator or transformer that follows the  scikit-learn API .\nThe TPOTClassifier will also search over the hyperparameters of all objects in the pipeline.  By default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters.\nHowever, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the  config_dict  parameter.  Read more in the  User Guide .    Parameters:   generations : int or None optional (default=100) \nNumber of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter  max_time_mins  must be defined as the runtime limit. \nGenerally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. \nTPOT will evaluate  population_size  +  generations  \u00d7  offspring_size  pipelines in total.  population_size : int, optional (default=100) \nNumber of individuals to retain in the genetic programming population every generation. Must be a positive number. \nGenerally, TPOT will work better when you give it more individuals with which to optimize the pipeline.  offspring_size : int, optional (default=None) \nNumber of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.  mutation_rate : float, optional (default=0.9) \nMutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation.  mutation_rate  +  crossover_rate  cannot exceed 1.0. \nWe recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.  crossover_rate : float, optional (default=0.1) \nCrossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation.  mutation_rate  +  crossover_rate  cannot exceed 1.0. \nWe recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.  scoring : string or callable, optional (default='accuracy') \nFunction used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used: \n'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision',\n'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc' \nIf you would like to use a custom scorer, you can pass the callable object/function with signature  scorer(estimator, X, y) . \nSee the section on  scoring functions  for more details.  cv : int, cross-validation generator, or an iterable, optional (default=5) \nCross-validation strategy used when evaluating pipelines. \nPossible inputs:  integer, to specify the number of folds in a StratifiedKFold,  An object to be used as a cross-validation generator, or  An iterable yielding train/test splits.   subsample : float, optional (default=1.0) \nFraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. \nSetting  subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process.  n_jobs : integer, optional (default=1) \nNumber of processes to use in parallel for evaluating pipelines during the TPOT optimization process. \nSetting  n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets.  max_time_mins : integer or None, optional (default=None) \nHow many minutes TPOT has to optimize the pipeline. \nIf not None, this setting will allow TPOT to run until  max_time_mins  minutes elapsed and then stop. TPOT will stop earlier if  generations  is set and all generations are already evaluated.  max_eval_time_mins : float, optional (default=5) \nHow many minutes TPOT has to evaluate a single pipeline. \nSetting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines.  random_state : integer or None, optional (default=None) \nThe seed of the pseudo random number generator used in TPOT. \nUse this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed.  config_dict : Python dictionary, string, or None, optional (default=None) \nA configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. \nPossible inputs are:  Python dictionary, TPOT will use your custom configuration,  string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or  string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or  string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or  None, TPOT will use the default TPOTClassifier configuration.  \nSee the  built-in configurations  section for the list of configurations included with TPOT, and the  custom configuration  section for more information and examples of how to create your own TPOT configurations.  template : string (default=None) \nTemplate of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. \nSo far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly.\n\nSee the   template option in tpot  section for more details.  warm_start : boolean, optional (default=False) \nFlag indicating whether the TPOT instance will reuse the population from previous calls to  fit() . \nSetting  warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off.  memory : a joblib.Memory object or string, optional (default=None) \nIf supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in  scikit-learn documentation  \nPossible inputs are:  String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or  Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or  Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or  None, TPOT does not use memory caching.    use_dask : boolean, optional (default: False) \nWhether to use Dask-ML's pipeline optimiziations. This avoid re-fitting\nthe same estimator on the same split of data multiple times. It\nwill also provide more detailed diagnostics when using Dask's\ndistributed scheduler. \nSee  avoid repeated work  for more details.  periodic_checkpoint_folder : path string, optional (default: None) \nIf supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. \nCurrently once per generation but not more often than once per 30 seconds. \nUseful in multiple cases:  Sudden death before TPOT could save optimized pipeline  Track its progress  Grab pipelines while it's still optimizing    early_stop : integer, optional (default: None) \nHow many generations TPOT checks whether there is no improvement in optimization process. \nEnds the optimization process if there is no improvement in the given number of generations.  verbosity : integer, optional (default=0) \nHow much information TPOT communicates while it's running. \nPossible inputs are:  0, TPOT will print nothing,  1, TPOT will print minimal information,  2, TPOT will print more information and provide a progress bar, or  3, TPOT will print everything and provide a progress bar.    disable_update_check : boolean, optional (default=False) \nFlag indicating whether the TPOT version checker should be disabled. \nThe update checker will tell you when a new version of TPOT has been released.     Attributes:   fitted_pipeline_ : scikit-learn Pipeline object \nThe best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset.  pareto_front_fitted_pipelines_ : Python dictionary \nDictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. \nThe TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. \nNote:  pareto_front_fitted_pipelines_  is only available when  verbosity =3.  evaluated_individuals_ : Python dictionary \nDictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). \nThis attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated.     Example  from tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot = TPOTClassifier(generations=5, population_size=50, verbosity=2)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_digits_pipeline.py')  Functions    fit (features, classes[, sample_weight, groups])  Run the TPOT optimization process on the given training data.    predict (features)  Use the optimized pipeline to predict the classes for a feature set.    predict_proba (features)  Use the optimized pipeline to estimate the class probabilities for a feature set.    score (testing_features, testing_classes)  Returns the optimized pipeline's score on the given testing data using the user-specified scoring function.    export (output_file_name)  Export the optimized pipeline as Python code.     fit(features, classes, sample_weight=None, groups=None)  \nRun the TPOT optimization process on the given training data. \nUses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples.    Parameters:   features : array-like {n_samples, n_features} \nFeature matrix \nTPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values.\nAs such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed)\nusing  median value imputation . \nIf you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT.  classes : array-like {n_samples} \nList of class labels for prediction  sample_weight : array-like {n_samples}, optional \nPer-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines.  groups : array-like, with shape {n_samples, }, optional \nGroup labels for the samples used when performing cross-validation. \nThis parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as  sklearn.model_selection.GroupKFold .     Returns:   self : object \nReturns a copy of the fitted TPOT object       predict(features)  \nUse the optimized pipeline to predict the classes for a feature set.    Parameters:   features : array-like {n_samples, n_features} \nFeature matrix     Returns:   predictions : array-like {n_samples} \nPredicted classes for the samples in the feature matrix       predict_proba(features)  \nUse the optimized pipeline to estimate the class probabilities for a feature set. \nNote: This function will only work for pipelines whose final classifier supports the  predict_proba  function. TPOT will raise an error otherwise.    Parameters:   features : array-like {n_samples, n_features} \nFeature matrix     Returns:   predictions : array-like {n_samples, n_classes} \nThe class probabilities of the input samples       score(testing_features, testing_classes)  \nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function. \nThe default scoring function for TPOTClassifier is 'accuracy'.    Parameters:   testing_features : array-like {n_samples, n_features} \nFeature matrix of the testing set  testing_classes : array-like {n_samples} \nList of class labels for prediction in the testing set     Returns:   accuracy_score : float \nThe estimated test set accuracy according to the user-specified scoring function.       export(output_file_name)  \nExport the optimized pipeline as Python code. \nSee the  usage documentation  for example usage of the export function.    Parameters:   output_file_name : string \nString containing the path and file name of the desired output file    Returns:  \nDoes not return anything",
+            "title": "Classification"
+        },
+        {
+            "location": "/api/#regression",
+            "text": "class  tpot. TPOTRegressor ( generations =100,  population_size =100,\n                          offspring_size =None,  mutation_rate =0.9,\n                          crossover_rate =0.1,\n                          scoring ='neg_mean_squared_error',  cv =5,\n                          subsample =1.0,  n_jobs =1,\n                          max_time_mins =None,  max_eval_time_mins =5,\n                          random_state =None,  config_dict =None,\n                          template =None,\n                          warm_start =False,\n                          memory =None,\n                          use_dask =False,\n                          periodic_checkpoint_folder =None,\n                          early_stop =None,\n                          verbosity =0,\n                          disable_update_check =False )  source  Automated machine learning for supervised regression tasks.  The TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models,\npreprocessors, feature selection techniques, and any other estimator or transformer that follows the  scikit-learn API .\nThe TPOTRegressor will also search over the hyperparameters of all objects in the pipeline.  By default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters.\nHowever, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the  config_dict  parameter.  Read more in the  User Guide .    Parameters:   generations : int or None, optional (default=100) \nNumber of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter  max_time_mins  must be defined as the runtime limit. \nGenerally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. \nTPOT will evaluate  population_size  +  generations  \u00d7  offspring_size  pipelines in total.  population_size : int, optional (default=100) \nNumber of individuals to retain in the genetic programming population every generation. Must be a positive number. \nGenerally, TPOT will work better when you give it more individuals with which to optimize the pipeline.  offspring_size : int, optional (default=None) \nNumber of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.  mutation_rate : float, optional (default=0.9) \nMutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation.  mutation_rate  +  crossover_rate  cannot exceed 1.0. \nWe recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.  crossover_rate : float, optional (default=0.1) \nCrossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation.  mutation_rate  +  crossover_rate  cannot exceed 1.0. \nWe recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.  scoring : string or callable, optional (default='neg_mean_squared_error') \nFunction used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used: \n'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2' \nNote that we recommend using the  neg  version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric. \nIf you would like to use a custom scorer, you can pass the callable object/function with signature  scorer(estimator, X, y) . \nSee the section on  scoring functions  for more details.  cv : int, cross-validation generator, or an iterable, optional (default=5) \nCross-validation strategy used when evaluating pipelines. \nPossible inputs:  integer, to specify the number of folds in a KFold,  An object to be used as a cross-validation generator, or  An iterable yielding train/test splits.    subsample : float, optional (default=1.0) \nFraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. \nSetting  subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process.  n_jobs : integer, optional (default=1) \nNumber of processes to use in parallel for evaluating pipelines during the TPOT optimization process. \nSetting  n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets  max_time_mins : integer or None, optional (default=None) \nHow many minutes TPOT has to optimize the pipeline. \nIf not None, this setting will allow TPOT to run until  max_time_mins  minutes elapsed and then stop. TPOT will stop earlier if  generations  is set and all generations are already evaluated.  max_eval_time_mins : float, optional (default=5) \nHow many minutes TPOT has to evaluate a single pipeline. \nSetting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines.  random_state : integer or None, optional (default=None) \nThe seed of the pseudo random number generator used in TPOT. \nUse this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed.  config_dict : Python dictionary, string, or None, optional (default=None) \nA configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. \nPossible inputs are:  Python dictionary, TPOT will use your custom configuration,  string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or  string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or  string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or  None, TPOT will use the default TPOTRegressor configuration.  \nSee the  built-in configurations  section for the list of configurations included with TPOT, and the  custom configuration  section for more information and examples of how to create your own TPOT configurations.  template : string (default=None) \nTemplate of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. \nSo far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly.\n\nSee the   template option in tpot  section for more details.  warm_start : boolean, optional (default=False) \nFlag indicating whether the TPOT instance will reuse the population from previous calls to  fit() . \nSetting  warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off.  memory : a joblib.Memory object or string, optional (default=None) \nIf supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in  scikit-learn documentation  \nPossible inputs are:  String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or  Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or  Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or  None, TPOT does not use memory caching.    use_dask : boolean, optional (default: False) \nWhether to use Dask-ML's pipeline optimiziations. This avoid re-fitting\nthe same estimator on the same split of data multiple times. It\nwill also provide more detailed diagnostics when using Dask's\ndistributed scheduler. \nSee  avoid repeated work  for more details.  periodic_checkpoint_folder : path string, optional (default: None) \nIf supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. \nCurrently once per generation but not more often than once per 30 seconds. \nUseful in multiple cases:  Sudden death before TPOT could save optimized pipeline  Track its progress  Grab pipelines while it's still optimizing    early_stop : integer, optional (default: None) \nHow many generations TPOT checks whether there is no improvement in optimization process. \nEnds the optimization process if there is no improvement in the given number of generations.  verbosity : integer, optional (default=0) \nHow much information TPOT communicates while it's running. \nPossible inputs are:  0, TPOT will print nothing,  1, TPOT will print minimal information,  2, TPOT will print more information and provide a progress bar, or  3, TPOT will print everything and provide a progress bar.    disable_update_check : boolean, optional (default=False) \nFlag indicating whether the TPOT version checker should be disabled. \nThe update checker will tell you when a new version of TPOT has been released.     Attributes:   fitted_pipeline_ : scikit-learn Pipeline object \nThe best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset.  pareto_front_fitted_pipelines_ : Python dictionary \nDictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. \nThe TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. \nNote:  _pareto_front_fitted_pipelines  is only available when  verbosity =3.  evaluated_individuals_ : Python dictionary \nDictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). \nThis attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated.     Example  from tpot import TPOTRegressor\nfrom sklearn.datasets import load_boston\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_boston()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot = TPOTRegressor(generations=5, population_size=50, verbosity=2)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_boston_pipeline.py')  Functions    fit (features, target[, sample_weight, groups])  Run the TPOT optimization process on the given training data.    predict (features)  Use the optimized pipeline to predict the target values for a feature set.    score (testing_features, testing_target)  Returns the optimized pipeline's score on the given testing data using the user-specified scoring function.    export (output_file_name)  Export the optimized pipeline as Python code.     fit(features, target, sample_weight=None, groups=None)  \nRun the TPOT optimization process on the given training data. \nUses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples.    Parameters:   features : array-like {n_samples, n_features} \nFeature matrix \nTPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values.\nAs such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed)\nusing  median value imputation . \nIf you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT.  target : array-like {n_samples} \nList of target labels for prediction  sample_weight : array-like {n_samples}, optional \nPer-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines.  groups : array-like, with shape {n_samples, }, optional \nGroup labels for the samples used when performing cross-validation. \nThis parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as  sklearn.model_selection.GroupKFold .     Returns:   self : object \nReturns a copy of the fitted TPOT object       predict(features)  \nUse the optimized pipeline to predict the target values for a feature set.    Parameters:   features : array-like {n_samples, n_features} \nFeature matrix     Returns:   predictions : array-like {n_samples} \nPredicted target values for the samples in the feature matrix       score(testing_features, testing_target)  \nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function. \nThe default scoring function for TPOTClassifier is 'mean_squared_error'.    Parameters:   testing_features : array-like {n_samples, n_features} \nFeature matrix of the testing set  testing_target : array-like {n_samples} \nList of target labels for prediction in the testing set     Returns:   accuracy_score : float \nThe estimated test set accuracy according to the user-specified scoring function.       export(output_file_name)  \nExport the optimized pipeline as Python code. \nSee the  usage documentation  for example usage of the export function.    Parameters:   output_file_name : string \nString containing the path and file name of the desired output file    Returns:  \nDoes not return anything",
+            "title": "Regression"
+        },
+        {
+            "location": "/examples/",
+            "text": "Overview\n\n\nThe following sections illustrate the usage of TPOT with various datasets, each\nbelonging to a typical class of machine learning tasks.\n\n\n\n\n\n\n\n\nDataset\n\n\nTask\n\n\nTask class\n\n\nDataset description\n\n\nJupyter notebook\n\n\n\n\n\n\n\n\n\n\nIris\n\n\nflower classification\n\n\nclassification\n\n\nlink\n\n\nlink\n\n\n\n\n\n\nOptical Recognition of Handwritten Digits\n\n\ndigit recognition\n\n\n(image) classification\n\n\nlink\n\n\nlink\n\n\n\n\n\n\nBoston\n\n\nhousing prices modeling\n\n\nregression\n\n\nlink\n\n\nN/A\n\n\n\n\n\n\nTitanic\n\n\nsurvival analysis\n\n\nclassification\n\n\nlink\n\n\nlink\n\n\n\n\n\n\nBank Marketing\n\n\nsubscription prediction\n\n\nclassification\n\n\nlink\n\n\nlink\n\n\n\n\n\n\nMAGIC Gamma Telescope\n\n\nevent detection\n\n\nclassification\n\n\nlink\n\n\nlink\n\n\n\n\n\n\n\n\nNotes:\n\n- For details on how the \nfit()\n, \nscore()\n and \nexport()\n methods work, refer to the \nusage documentation\n.\n- Upon re-running the experiments, your resulting pipelines \nmay\n differ (to some extent) from the ones demonstrated here.\n\n\nIris flower classification\n\n\nThe following code illustrates how TPOT can be employed for performing a simple \nclassification task\n over the Iris dataset.\n\n\nfrom tpot import TPOTClassifier\nfrom sklearn.datasets import load_iris\nfrom sklearn.model_selection import train_test_split\nimport numpy as np\n\niris = load_iris()\nX_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64),\n    iris.target.astype(np.float64), train_size=0.75, test_size=0.25, random_state=42)\n\ntpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=42)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_iris_pipeline.py')\n\n\n\n\nRunning this code should discover a pipeline (exported as \ntpot_iris_pipeline.py\n) that achieves about 97% test accuracy:\n\n\nimport numpy as np\nimport pandas as pd\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import Normalizer\nfrom tpot.export_utils import set_param_recursive\n\n# NOTE: Make sure that the outcome column is labeled 'target' in the data file\ntpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)\nfeatures = tpot_data.drop('target', axis=1)\ntraining_features, testing_features, training_target, testing_target = \\\n            train_test_split(features, tpot_data['target'], random_state=42)\n\n# Average CV score on the training set was: 0.9826086956521738\nexported_pipeline = make_pipeline(\n    Normalizer(norm=\"l2\"),\n    KNeighborsClassifier(n_neighbors=5, p=2, weights=\"distance\")\n)\n# Fix random state for all the steps in exported pipeline\nset_param_recursive(exported_pipeline.steps, 'random_state', 42)\n\nexported_pipeline.fit(training_features, training_target)\nresults = exported_pipeline.predict(testing_features)\n\n\n\n\nDigits dataset\n\n\nBelow is a minimal working example with the optical recognition of handwritten digits dataset, which is an \nimage classification problem\n.\n\n\nfrom tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25, random_state=42)\n\ntpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=42)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_digits_pipeline.py')\n\n\n\n\nRunning this code should discover a pipeline (exported as \ntpot_digits_pipeline.py\n) that achieves about 98% test accuracy:\n\n\nimport numpy as np\nimport pandas as pd\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.pipeline import make_pipeline, make_union\nfrom sklearn.preprocessing import PolynomialFeatures\nfrom tpot.builtins import StackingEstimator\nfrom tpot.export_utils import set_param_recursive\n\n# NOTE: Make sure that the outcome column is labeled 'target' in the data file\ntpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)\nfeatures = tpot_data.drop('target', axis=1)\ntraining_features, testing_features, training_target, testing_target = \\\n            train_test_split(features, tpot_data['target'], random_state=42)\n\n# Average CV score on the training set was: 0.9799428471757372\nexported_pipeline = make_pipeline(\n    PolynomialFeatures(degree=2, include_bias=False, interaction_only=False),\n    StackingEstimator(estimator=LogisticRegression(C=0.1, dual=False, penalty=\"l1\")),\n    RandomForestClassifier(bootstrap=True, criterion=\"entropy\", max_features=0.35000000000000003, min_samples_leaf=20, min_samples_split=19, n_estimators=100)\n)\n# Fix random state for all the steps in exported pipeline\nset_param_recursive(exported_pipeline.steps, 'random_state', 42)\n\nexported_pipeline.fit(training_features, training_target)\nresults = exported_pipeline.predict(testing_features)\n\n\n\n\nBoston housing prices modeling\n\n\nThe following code illustrates how TPOT can be employed for performing a \nregression task\n over the Boston housing prices dataset.\n\n\nfrom tpot import TPOTRegressor\nfrom sklearn.datasets import load_boston\nfrom sklearn.model_selection import train_test_split\n\nhousing = load_boston()\nX_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target,\n                                                    train_size=0.75, test_size=0.25, random_state=42)\n\ntpot = TPOTRegressor(generations=5, population_size=50, verbosity=2, random_state=42)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_boston_pipeline.py')\n\n\n\n\nRunning this code should discover a pipeline (exported as \ntpot_boston_pipeline.py\n) that achieves at least 10 mean squared error (MSE) on the test set:\n\n\nimport numpy as np\nimport pandas as pd\nfrom sklearn.ensemble import ExtraTreesRegressor\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import PolynomialFeatures\nfrom tpot.export_utils import set_param_recursive\n\n# NOTE: Make sure that the outcome column is labeled 'target' in the data file\ntpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)\nfeatures = tpot_data.drop('target', axis=1)\ntraining_features, testing_features, training_target, testing_target = \\\n            train_test_split(features, tpot_data['target'], random_state=42)\n\n# Average CV score on the training set was: -10.812040755234403\nexported_pipeline = make_pipeline(\n    PolynomialFeatures(degree=2, include_bias=False, interaction_only=False),\n    ExtraTreesRegressor(bootstrap=False, max_features=0.5, min_samples_leaf=2, min_samples_split=3, n_estimators=100)\n)\n# Fix random state for all the steps in exported pipeline\nset_param_recursive(exported_pipeline.steps, 'random_state', 42)\n\nexported_pipeline.fit(training_features, training_target)\nresults = exported_pipeline.predict(testing_features)\n\n\n\n\nTitanic survival analysis\n\n\nTo see the TPOT applied the Titanic Kaggle dataset, see the Jupyter notebook \nhere\n. This example shows how to take a messy dataset and preprocess it such that it can be used in scikit-learn and TPOT.\n\n\nPortuguese Bank Marketing\n\n\nThe corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found \nhere\n.\n\n\nMAGIC Gamma Telescope\n\n\nThe corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found \nhere\n.",
+            "title": "Examples"
+        },
+        {
+            "location": "/examples/#overview",
+            "text": "The following sections illustrate the usage of TPOT with various datasets, each\nbelonging to a typical class of machine learning tasks.     Dataset  Task  Task class  Dataset description  Jupyter notebook      Iris  flower classification  classification  link  link    Optical Recognition of Handwritten Digits  digit recognition  (image) classification  link  link    Boston  housing prices modeling  regression  link  N/A    Titanic  survival analysis  classification  link  link    Bank Marketing  subscription prediction  classification  link  link    MAGIC Gamma Telescope  event detection  classification  link  link     Notes: \n- For details on how the  fit() ,  score()  and  export()  methods work, refer to the  usage documentation .\n- Upon re-running the experiments, your resulting pipelines  may  differ (to some extent) from the ones demonstrated here.",
+            "title": "Overview"
+        },
+        {
+            "location": "/examples/#iris-flower-classification",
+            "text": "The following code illustrates how TPOT can be employed for performing a simple  classification task  over the Iris dataset.  from tpot import TPOTClassifier\nfrom sklearn.datasets import load_iris\nfrom sklearn.model_selection import train_test_split\nimport numpy as np\n\niris = load_iris()\nX_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64),\n    iris.target.astype(np.float64), train_size=0.75, test_size=0.25, random_state=42)\n\ntpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=42)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_iris_pipeline.py')  Running this code should discover a pipeline (exported as  tpot_iris_pipeline.py ) that achieves about 97% test accuracy:  import numpy as np\nimport pandas as pd\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import Normalizer\nfrom tpot.export_utils import set_param_recursive\n\n# NOTE: Make sure that the outcome column is labeled 'target' in the data file\ntpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)\nfeatures = tpot_data.drop('target', axis=1)\ntraining_features, testing_features, training_target, testing_target = \\\n            train_test_split(features, tpot_data['target'], random_state=42)\n\n# Average CV score on the training set was: 0.9826086956521738\nexported_pipeline = make_pipeline(\n    Normalizer(norm=\"l2\"),\n    KNeighborsClassifier(n_neighbors=5, p=2, weights=\"distance\")\n)\n# Fix random state for all the steps in exported pipeline\nset_param_recursive(exported_pipeline.steps, 'random_state', 42)\n\nexported_pipeline.fit(training_features, training_target)\nresults = exported_pipeline.predict(testing_features)",
+            "title": "Iris flower classification"
+        },
+        {
+            "location": "/examples/#digits-dataset",
+            "text": "Below is a minimal working example with the optical recognition of handwritten digits dataset, which is an  image classification problem .  from tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25, random_state=42)\n\ntpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=42)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_digits_pipeline.py')  Running this code should discover a pipeline (exported as  tpot_digits_pipeline.py ) that achieves about 98% test accuracy:  import numpy as np\nimport pandas as pd\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.pipeline import make_pipeline, make_union\nfrom sklearn.preprocessing import PolynomialFeatures\nfrom tpot.builtins import StackingEstimator\nfrom tpot.export_utils import set_param_recursive\n\n# NOTE: Make sure that the outcome column is labeled 'target' in the data file\ntpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)\nfeatures = tpot_data.drop('target', axis=1)\ntraining_features, testing_features, training_target, testing_target = \\\n            train_test_split(features, tpot_data['target'], random_state=42)\n\n# Average CV score on the training set was: 0.9799428471757372\nexported_pipeline = make_pipeline(\n    PolynomialFeatures(degree=2, include_bias=False, interaction_only=False),\n    StackingEstimator(estimator=LogisticRegression(C=0.1, dual=False, penalty=\"l1\")),\n    RandomForestClassifier(bootstrap=True, criterion=\"entropy\", max_features=0.35000000000000003, min_samples_leaf=20, min_samples_split=19, n_estimators=100)\n)\n# Fix random state for all the steps in exported pipeline\nset_param_recursive(exported_pipeline.steps, 'random_state', 42)\n\nexported_pipeline.fit(training_features, training_target)\nresults = exported_pipeline.predict(testing_features)",
+            "title": "Digits dataset"
+        },
+        {
+            "location": "/examples/#boston-housing-prices-modeling",
+            "text": "The following code illustrates how TPOT can be employed for performing a  regression task  over the Boston housing prices dataset.  from tpot import TPOTRegressor\nfrom sklearn.datasets import load_boston\nfrom sklearn.model_selection import train_test_split\n\nhousing = load_boston()\nX_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target,\n                                                    train_size=0.75, test_size=0.25, random_state=42)\n\ntpot = TPOTRegressor(generations=5, population_size=50, verbosity=2, random_state=42)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_boston_pipeline.py')  Running this code should discover a pipeline (exported as  tpot_boston_pipeline.py ) that achieves at least 10 mean squared error (MSE) on the test set:  import numpy as np\nimport pandas as pd\nfrom sklearn.ensemble import ExtraTreesRegressor\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import PolynomialFeatures\nfrom tpot.export_utils import set_param_recursive\n\n# NOTE: Make sure that the outcome column is labeled 'target' in the data file\ntpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)\nfeatures = tpot_data.drop('target', axis=1)\ntraining_features, testing_features, training_target, testing_target = \\\n            train_test_split(features, tpot_data['target'], random_state=42)\n\n# Average CV score on the training set was: -10.812040755234403\nexported_pipeline = make_pipeline(\n    PolynomialFeatures(degree=2, include_bias=False, interaction_only=False),\n    ExtraTreesRegressor(bootstrap=False, max_features=0.5, min_samples_leaf=2, min_samples_split=3, n_estimators=100)\n)\n# Fix random state for all the steps in exported pipeline\nset_param_recursive(exported_pipeline.steps, 'random_state', 42)\n\nexported_pipeline.fit(training_features, training_target)\nresults = exported_pipeline.predict(testing_features)",
+            "title": "Boston housing prices modeling"
+        },
+        {
+            "location": "/examples/#titanic-survival-analysis",
+            "text": "To see the TPOT applied the Titanic Kaggle dataset, see the Jupyter notebook  here . This example shows how to take a messy dataset and preprocess it such that it can be used in scikit-learn and TPOT.",
+            "title": "Titanic survival analysis"
+        },
+        {
+            "location": "/examples/#portuguese-bank-marketing",
+            "text": "The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found  here .",
+            "title": "Portuguese Bank Marketing"
+        },
+        {
+            "location": "/examples/#magic-gamma-telescope",
+            "text": "The corresponding Jupyter notebook, containing the associated data preprocessing and analysis, can be found  here .",
+            "title": "MAGIC Gamma Telescope"
+        },
+        {
+            "location": "/contributing/",
+            "text": "We welcome you to \ncheck the existing issues\n for bugs or enhancements to work on. If you have an idea for an extension to TPOT, please \nfile a new issue\n so we can discuss it.\n\n\nProject layout\n\n\nThe latest stable release of TPOT is on the \nmaster branch\n, whereas the latest version of TPOT in development is on the \ndevelopment branch\n. Make sure you are looking at and working on the correct branch if you're looking to contribute code.\n\n\nIn terms of directory structure:\n\n\n\n\nAll of TPOT's code sources are in the \ntpot\n directory\n\n\nThe documentation sources are in the \ndocs_sources\n directory\n\n\nImages in the documentation are in the \nimages\n directory\n\n\nTutorials for TPOT are in the \ntutorials\n directory\n\n\nUnit tests for TPOT are in the \ntests.py\n file\n\n\n\n\nMake sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the \ndevelopment\n branch.\n\n\nHow to contribute\n\n\nThe preferred way to contribute to TPOT is to fork the\n\nmain repository\n on\nGitHub:\n\n\n\n\n\n\nFork the \nproject repository\n:\n   click on the 'Fork' button near the top of the page. This creates\n   a copy of the code under your account on the GitHub server.\n\n\n\n\n\n\nClone this copy to your local disk:\n\n\n  $ git clone git@github.com:YourUsername/tpot.git\n  $ cd tpot\n\n\n\n\n\n\n\nCreate a branch to hold your changes:\n\n\n  $ git checkout -b my-contribution\n\n\n\n\n\n\n\nMake sure your local environment is setup correctly for development. Installation instructions are almost identical to \nthe user instructions\n except that TPOT should \nnot\n be installed. If you have TPOT installed on your computer then make sure you are using a virtual environment that does not have TPOT installed. Furthermore, you should make sure you have installed the \nnose\n package into your development environment so that you can test changes locally.\n\n\n  $ conda install nose\n\n\n\n\n\n\n\nStart making changes on your newly created branch, remembering to never work on the \nmaster\n branch! Work on this copy on your computer using Git to do the version control.\n\n\n\n\n\n\nOnce some changes are saved locally, you can use your tweaked version of TPOT by navigating to the project's base directory and running TPOT directly from the command line:\n\n\n  $ python -m tpot.driver\n\n\n\nor by running script that imports and uses the TPOT module with code similar to \nfrom tpot import TPOTClassifier\n\n\n\n\n\n\nTo check your changes haven't broken any existing tests and to check new tests you've added pass run the following (note, you must have the \nnose\n package installed within your dev environment for this to work):\n\n\n  $ nosetests -s -v\n\n\n\n\n\n\n\nWhen you're done editing and local testing, run:\n\n\n  $ git add modified_files\n  $ git commit\n\n\n\n\n\n\n\nto record your changes in Git, then push them to GitHub with:\n\n\n      $ git push -u origin my-contribution\n\n\n\nFinally, go to the web page of your fork of the TPOT repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. Make sure that you send your PR to the \ndevelopment\n branch, as the \nmaster\n branch is reserved for the latest stable release. This will start the CI server to check all the project's unit tests run and send an email to the maintainers.\n\n\n(If any of the above seems like magic to you, then look up the\n\nGit documentation\n on the web.)\n\n\nBefore submitting your pull request\n\n\nBefore you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes.\n\n\nIf your contribution changes TPOT in any way:\n\n\n\n\n\n\nUpdate the \ndocumentation\n so all of your changes are reflected there.\n\n\n\n\n\n\nUpdate the \nREADME\n if anything there has changed.\n\n\n\n\n\n\nIf your contribution involves any code changes:\n\n\n\n\n\n\nUpdate the \nproject unit tests\n to test your code changes.\n\n\n\n\n\n\nMake sure that your code is properly commented with \ndocstrings\n and comments explaining your rationale behind non-obvious coding practices.\n\n\n\n\n\n\nIf your code affected any of the pipeline operators, make sure that the corresponding \nexport functionality\n reflects those changes.\n\n\n\n\n\n\nIf your contribution requires a new library dependency:\n\n\n\n\n\n\nDouble-check that the new dependency is easy to install via \npip\n or Anaconda and supports both Python 2 and 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep TPOT easy to install.\n\n\n\n\n\n\nAdd the required version of the library to \n.travis.yml\n\n\n\n\n\n\nAdd a line to pip install the library to \n.travis_install.sh\n\n\n\n\n\n\nAdd a line to print the version of the library to \n.travis_install.sh\n\n\n\n\n\n\nSimilarly add a line to print the version of the library to \n.travis_test.sh\n\n\n\n\n\n\nAfter submitting your pull request\n\n\nAfter submitting your pull request, \nTravis-CI\n will automatically run unit tests on your changes and make sure that your updated code builds and runs on Python 2 and 3. We also use services that automatically check code quality and test coverage.\n\n\nCheck back shortly after submitting your pull request to make sure that your code passes these checks. If any of the checks come back with a red X, then do your best to address the errors.",
+            "title": "Contributing"
+        },
+        {
+            "location": "/contributing/#project-layout",
+            "text": "The latest stable release of TPOT is on the  master branch , whereas the latest version of TPOT in development is on the  development branch . Make sure you are looking at and working on the correct branch if you're looking to contribute code.  In terms of directory structure:   All of TPOT's code sources are in the  tpot  directory  The documentation sources are in the  docs_sources  directory  Images in the documentation are in the  images  directory  Tutorials for TPOT are in the  tutorials  directory  Unit tests for TPOT are in the  tests.py  file   Make sure to familiarize yourself with the project layout before making any major contributions, and especially make sure to send all code changes to the  development  branch.",
+            "title": "Project layout"
+        },
+        {
+            "location": "/contributing/#how-to-contribute",
+            "text": "The preferred way to contribute to TPOT is to fork the main repository  on\nGitHub:    Fork the  project repository :\n   click on the 'Fork' button near the top of the page. This creates\n   a copy of the code under your account on the GitHub server.    Clone this copy to your local disk:    $ git clone git@github.com:YourUsername/tpot.git\n  $ cd tpot    Create a branch to hold your changes:    $ git checkout -b my-contribution    Make sure your local environment is setup correctly for development. Installation instructions are almost identical to  the user instructions  except that TPOT should  not  be installed. If you have TPOT installed on your computer then make sure you are using a virtual environment that does not have TPOT installed. Furthermore, you should make sure you have installed the  nose  package into your development environment so that you can test changes locally.    $ conda install nose    Start making changes on your newly created branch, remembering to never work on the  master  branch! Work on this copy on your computer using Git to do the version control.    Once some changes are saved locally, you can use your tweaked version of TPOT by navigating to the project's base directory and running TPOT directly from the command line:    $ python -m tpot.driver  or by running script that imports and uses the TPOT module with code similar to  from tpot import TPOTClassifier    To check your changes haven't broken any existing tests and to check new tests you've added pass run the following (note, you must have the  nose  package installed within your dev environment for this to work):    $ nosetests -s -v    When you're done editing and local testing, run:    $ git add modified_files\n  $ git commit    to record your changes in Git, then push them to GitHub with:        $ git push -u origin my-contribution  Finally, go to the web page of your fork of the TPOT repo, and click 'Pull Request' (PR) to send your changes to the maintainers for review. Make sure that you send your PR to the  development  branch, as the  master  branch is reserved for the latest stable release. This will start the CI server to check all the project's unit tests run and send an email to the maintainers.  (If any of the above seems like magic to you, then look up the Git documentation  on the web.)",
+            "title": "How to contribute"
+        },
+        {
+            "location": "/contributing/#before-submitting-your-pull-request",
+            "text": "Before you submit a pull request for your contribution, please work through this checklist to make sure that you have done everything necessary so we can efficiently review and accept your changes.  If your contribution changes TPOT in any way:    Update the  documentation  so all of your changes are reflected there.    Update the  README  if anything there has changed.    If your contribution involves any code changes:    Update the  project unit tests  to test your code changes.    Make sure that your code is properly commented with  docstrings  and comments explaining your rationale behind non-obvious coding practices.    If your code affected any of the pipeline operators, make sure that the corresponding  export functionality  reflects those changes.    If your contribution requires a new library dependency:    Double-check that the new dependency is easy to install via  pip  or Anaconda and supports both Python 2 and 3. If the dependency requires a complicated installation, then we most likely won't merge your changes because we want to keep TPOT easy to install.    Add the required version of the library to  .travis.yml    Add a line to pip install the library to  .travis_install.sh    Add a line to print the version of the library to  .travis_install.sh    Similarly add a line to print the version of the library to  .travis_test.sh",
+            "title": "Before submitting your pull request"
+        },
+        {
+            "location": "/contributing/#after-submitting-your-pull-request",
+            "text": "After submitting your pull request,  Travis-CI  will automatically run unit tests on your changes and make sure that your updated code builds and runs on Python 2 and 3. We also use services that automatically check code quality and test coverage.  Check back shortly after submitting your pull request to make sure that your code passes these checks. If any of the checks come back with a red X, then do your best to address the errors.",
+            "title": "After submitting your pull request"
+        },
+        {
+            "location": "/releases/",
+            "text": "Version 0.11.0\n\n\n\n\nSupport for Python 3.4 and below has been officially dropped.\n Also support for scikit-learn 0.20 or below has been dropped.\n\n\nThe support of a metric function with the signature \nscore_func(y_true, y_pred)\n for \nscoring parameter\n has been dropped.\n\n\nRefine \nStackingEstimator\n for not stacking NaN/Infinity predication probabilities.\n\n\nFix a bug that population doesn't persist by \nwarm_start=True\n when \nmax_time_mins\n is not default value.\n\n\nNow the \nrandom_state\n parameter in TPOT is used for pipeline evaluation instead of using a fixed random seed of 42 before. The \nset_param_recursive\n function has been moved to \nexport_utils.py\n and it can be used in exported codes for setting \nrandom_state\n recursively in scikit-learn Pipeline. It is used to set \nrandom_state\n in \nfitted_pipeline_\n attribute and exported pipelines.\n\n\nTPOT can independently use \ngenerations\n and \nmax_time_mins\n to limit the optimization process through using one of the parameters or both.\n\n\n.export()\n function will return string of exported pipeline if output filename is not specified.\n\n\nAdd \nSGDClassifier\n and \nSGDRegressor\n into TPOT default configs.\n\n\nDocumentation has been updated.\n\n\n\n\nVersion 0.10.2\n\n\n\n\nTPOT v0.10.2 is the last version to support Python 2.7 and Python 3.4.\n\n\nMinor updates for fixing compatibility issues with the latest version of scikit-learn (version > 0.21) and xgboost (v0.90)\n\n\nDefault value of \ntemplate\n parameter is changed to \nNone\n instead.\n\n\nFix errors in documentation\n\n\n\n\nVersion 0.10.1\n\n\n\n\nAdd \ndata_file_path\n option into \nexpert\n function for replacing \n'PATH/TO/DATA/FILE'\n to customized dataset path in exported scripts. (Related issue #838)\n\n\nChange python version in CI tests to 3.7\n\n\nAdd CI tests for macOS.\n\n\n\n\nVersion 0.10.0\n\n\n\n\nAdd a new \ntemplate\n option to specify a desired structure for machine learning pipeline in TPOT. Check \nTPOT API\n (it will be updated once it is merge to master branch).\n\n\nAdd \nFeatureSetSelector\n operator into TPOT for feature selection based on \npriori\n export knowledge. Please check our \npreprint paper\n for more details (\nNote: it was named \nDatasetSelector\n in 1st version paper but we will rename to FeatureSetSelector in next version of the paper\n)\n\n\nRefine \nn_jobs\n parameter to accept value below -1. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used.\n\n\nNow \nmemory\n  parameter can create memory cache directory if it does not exist.\n\n\nFix minor bugs.\n\n\n\n\nVersion 0.9.6\n\n\n\n\nFix a bug causing that \nmax_time_mins\n parameter doesn't work when \nuse_dask=True\n in TPOT 0.9.5\n\n\nNow TPOT saves best pareto values best pareto pipeline s in checkpoint folder\n\n\nTPOT raises \nImportError\n if operators in the TPOT configuration are not available when \nverbosity>2\n\n\nThank @PGijsbers for the suggestions. Now TPOT can save scores of individuals already evaluated in any generation even the evaluation process of that generation is interrupted/stopped. But it is noted that, in this case, TPOT will raise this \nwarning message\n: \nWARNING: TPOT may not provide a good pipeline if TPOT is stopped/interrupted in a early generation.\n, because the pipelines in early generation, e.g. 1st generation, are evolved/modified very limited times via evolutionary algorithm.\n\n\nFix bugs in configuration of \nTPOTRegressor\n\n\nError fixes in documentation\n\n\n\n\nVersion 0.9.5\n\n\n\n\n\n\nTPOT now supports integration with Dask for parallelization + smart caching\n. Big thanks to the Dask dev team for making this happen!\n\n\n\n\n\n\nTPOT now supports for imputation/sparse matrices into \npredict\n and \npredict_proba\n functions.\n\n\n\n\n\n\nTPOTClassifier\n and \nTPOTRegressor\n now follows scikit-learn estimator API.\n\n\n\n\n\n\nWe refined scoring parameter in TPOT API for accepting \nScorer\n object\n.\n\n\n\n\n\n\nWe refined parameters in VarianceThreshold and FeatureAgglomeration.\n\n\n\n\n\n\nTPOT now supports using memory caching within a Pipeline via a optional \nmemory\n parameter.\n\n\n\n\n\n\nWe improved documentation of TPOT.\n\n\n\n\n\n\nVersion 0.9\n\n\n\n\n\n\nTPOT now supports sparse matrices\n with a new built-in TPOT configuration, \"TPOT sparse\". We are using a custom OneHotEncoder implementation that supports missing values and continuous features.\n\n\n\n\n\n\nWe have added an \"early stopping\" option for stopping the optimization process if no improvement is made within a set number of generations. Look up the \nearly_stop\n parameter to access this functionality.\n\n\n\n\n\n\nTPOT now reduces the number of duplicated pipelines between generations, which saves you time during the optimization process.\n\n\n\n\n\n\nTPOT now supports custom scoring functions via the command-line mode.\n\n\n\n\n\n\nWe have added a new optional argument, \nperiodic_checkpoint_folder\n, that allows TPOT to periodically save the best pipeline so far to a local folder during optimization process.\n\n\n\n\n\n\nTPOT no longer uses \nsklearn.externals.joblib\n when \nn_jobs=1\n to avoid the potential freezing issue \nthat scikit-learn suffers from\n.\n\n\n\n\n\n\nWe have added \npandas\n as a dependency to read input datasets instead of \nnumpy.recfromcsv\n. NumPy's \nrecfromcsv\n function is unable to parse datasets with complex data types.\n\n\n\n\n\n\nFixed a bug that \nDEFAULT\n in the parameter(s) of nested estimator raises \nKeyError\n when exporting pipelines.\n\n\n\n\n\n\nFixed a bug related to setting \nrandom_state\n in nested estimators. The issue would happen with pipeline with \nSelectFromModel\n (\nExtraTreesClassifier\n as nested estimator) or \nStackingEstimator\n if nested estimator has \nrandom_state\n parameter.\n\n\n\n\n\n\nFixed a bug in the missing value imputation function in TPOT to impute along columns instead rows.\n\n\n\n\n\n\nRefined input checking for sparse matrices in TPOT.\n\n\n\n\n\n\nRefined the TPOT pipeline mutation operator.\n\n\n\n\n\n\nVersion 0.8\n\n\n\n\n\n\nTPOT now detects whether there are missing values in your dataset\n and replaces them with the median value of the column.\n\n\n\n\n\n\nTPOT now allows you to set a \ngroup\n parameter in the \nfit\n function so you can use the \nGroupKFold\n cross-validation strategy.\n\n\n\n\n\n\nTPOT now allows you to set a subsample ratio of the training instance with the \nsubsample\n parameter. For example, setting \nsubsample\n=0.5 tells TPOT to create a fixed subsample of half of the training data for the pipeline optimization process. This parameter can be useful for speeding up the pipeline optimization process, but may give less accurate performance estimates from cross-validation.\n\n\n\n\n\n\nTPOT now has more \nbuilt-in configurations\n, including TPOT MDR and TPOT light, for both classification and regression problems.\n\n\n\n\n\n\nTPOTClassifier\n and \nTPOTRegressor\n now expose three useful internal attributes, \nfitted_pipeline_\n, \npareto_front_fitted_pipelines_\n, and \nevaluated_individuals_\n. These attributes are described in the \nAPI documentation\n.\n\n\n\n\n\n\nOh, \nTPOT now has \nthorough API documentation\n. Check it out!\n\n\n\n\n\n\nFixed a reproducibility issue where setting \nrandom_seed\n didn't necessarily result in the same results every time. This bug was present since TPOT v0.7.\n\n\n\n\n\n\nRefined input checking in TPOT.\n\n\n\n\n\n\nRemoved Python 2 uncompliant code.\n\n\n\n\n\n\nVersion 0.7\n\n\n\n\n\n\nTPOT now has multiprocessing support.\n TPOT allows you to use multiple processes in parallel to accelerate the pipeline optimization process in TPOT with the \nn_jobs\n parameter.\n\n\n\n\n\n\nTPOT now allows you to \ncustomize the operators and parameters considered during the optimization process\n, which can be accomplished with the new \nconfig_dict\n parameter. The format of this customized dictionary can be found in the \nonline documentation\n, along with a list of \nbuilt-in configurations\n.\n\n\n\n\n\n\nTPOT now allows you to \nspecify a time limit for evaluating a single pipeline\n  (default limit is 5 minutes) in optimization process with the \nmax_eval_time_mins\n parameter, so TPOT won't spend hours evaluating overly-complex pipelines.\n\n\n\n\n\n\nWe tweaked TPOT's underlying evolutionary optimization algorithm to work even better, including using the \nmu+lambda algorithm\n. This algorithm gives you more control of how many pipelines are generated every iteration with the \noffspring_size\n parameter.\n\n\n\n\n\n\nRefined the default operators and parameters in TPOT, so TPOT 0.7 should work even better than 0.6.\n\n\n\n\n\n\nTPOT now supports sample weights in the fitness function if some if your samples are more important to classify correctly than others. The sample weights option works the same as in scikit-learn, e.g., \ntpot.fit(x_train, y_train, sample_weights=sample_weights)\n.\n\n\n\n\n\n\nThe default scoring metric in TPOT has been changed from balanced accuracy to accuracy, the same default metric for classification algorithms in scikit-learn. Balanced accuracy can still be used by setting \nscoring='balanced_accuracy'\n when creating a TPOT instance.\n\n\n\n\n\n\nVersion 0.6\n\n\n\n\n\n\nTPOT now supports regression problems!\n We have created two separate \nTPOTClassifier\n and \nTPOTRegressor\n classes to support classification and regression problems, respectively. The \ncommand-line interface\n also supports this feature through the \n-mode\n parameter.\n\n\n\n\n\n\nTPOT now allows you to \nspecify a time limit\n for the optimization process with the \nmax_time_mins\n parameter, so you don't need to guess how long TPOT will take any more to recommend a pipeline to you.\n\n\n\n\n\n\nAdded a new operator that performs feature selection using \nExtraTrees\n feature importance scores.\n\n\n\n\n\n\nXGBoost\n has been added as an optional dependency to TPOT.\n If you have XGBoost installed, TPOT will automatically detect your installation and use the \nXGBoostClassifier\n and \nXGBoostRegressor\n in its pipelines.\n\n\n\n\n\n\nTPOT now offers a verbosity level of 3 (\"science mode\"), which outputs the entire Pareto front instead of only the current best score. This feature may be useful for users looking to make a trade-off between pipeline complexity and score.\n\n\n\n\n\n\nVersion 0.5\n\n\n\n\nMajor refactor: Each operator is defined in a separate class file. Hooray for easier-to-maintain code!\n\n\nTPOT now \nexports directly to scikit-learn Pipelines\n instead of hacky code.\n\n\nInternal representation of individuals now uses scikit-learn pipelines.\n\n\nParameters for each operator have been optimized so TPOT spends less time exploring useless parameters.\n\n\nWe have removed pandas as a dependency and instead use numpy matrices to store the data.\n\n\nTPOT now uses \nk-fold cross-validation\n when evaluating pipelines, with a default k = 3. This k parameter can be tuned when creating a new TPOT instance.\n\n\nImproved \nscoring function support\n: Even though TPOT uses balanced accuracy by default, you can now have TPOT use \nany of the scoring functions\n that \ncross_val_score\n supports.\n\n\nAdded the scikit-learn \nNormalizer\n preprocessor.\n\n\nMinor text fixes.\n\n\n\n\nVersion 0.4\n\n\nIn TPOT 0.4, we've made some major changes to the internals of TPOT and added some convenience functions. We've summarized the changes below.\n\n\n\n\nAdded new sklearn models and preprocessors\n\n\n\n\nAdaBoostClassifier\n\n\nBernoulliNB\n\n\nExtraTreesClassifier\n\n\nGaussianNB\n\n\nMultinomialNB\n\n\nLinearSVC\n\n\nPassiveAggressiveClassifier\n\n\nGradientBoostingClassifier\n\n\nRBFSampler\n\n\nFastICA\n\n\nFeatureAgglomeration\n\n\nNystroem\n\n\n\n\nAdded operator that inserts virtual features for the count of features with values of zero\n\n\nReworked parameterization of TPOT operators\n\n\n\nReduced parameter search space with information from a scikit-learn benchmark\n\n\nTPOT no longer generates arbitrary parameter values, but uses a fixed parameter set instead\n\n\n\n\nRemoved XGBoost as a dependency\n\n\n\nToo many users were having install issues with XGBoost\n\n\nReplaced with scikit-learn's GradientBoostingClassifier\n\n\n\n\nImproved descriptiveness of TPOT command line parameter documentation\n\n\nRemoved min/max/avg details during fit() when verbosity > 1\n\n\n\n\nReplaced with tqdm progress bar\n\n\nAdded tqdm as a dependency\n\n\n\n\nAdded \nfit_predict()\n convenience function\n\n\nAdded \nget_params()\n function so TPOT can operate in scikit-learn's \ncross_val_score\n & related functions\n\n\n\n\n\nVersion 0.3\n\n\n\n\nWe revised the internal optimization process of TPOT to make it more efficient, in particular in regards to the model parameters that TPOT optimizes over.\n\n\n\n\nVersion 0.2\n\n\n\n\n\n\nTPOT now has the ability to export the optimized pipelines to sklearn code.\n\n\n\n\n\n\nLogistic regression, SVM, and k-nearest neighbors classifiers were added as pipeline operators. Previously, TPOT only included decision tree and random forest classifiers.\n\n\n\n\n\n\nTPOT can now use arbitrary scoring functions for the optimization process.\n\n\n\n\n\n\nTPOT now performs multi-objective Pareto optimization to balance model complexity (i.e., # of pipeline operators) and the score of the pipeline.\n\n\n\n\n\n\nVersion 0.1\n\n\n\n\n\n\nFirst public release of TPOT.\n\n\n\n\n\n\nOptimizes pipelines with decision trees and random forest classifiers as the model, and uses a handful of feature preprocessors.",
+            "title": "Release Notes"
+        },
+        {
+            "location": "/releases/#version-0110",
+            "text": "Support for Python 3.4 and below has been officially dropped.  Also support for scikit-learn 0.20 or below has been dropped.  The support of a metric function with the signature  score_func(y_true, y_pred)  for  scoring parameter  has been dropped.  Refine  StackingEstimator  for not stacking NaN/Infinity predication probabilities.  Fix a bug that population doesn't persist by  warm_start=True  when  max_time_mins  is not default value.  Now the  random_state  parameter in TPOT is used for pipeline evaluation instead of using a fixed random seed of 42 before. The  set_param_recursive  function has been moved to  export_utils.py  and it can be used in exported codes for setting  random_state  recursively in scikit-learn Pipeline. It is used to set  random_state  in  fitted_pipeline_  attribute and exported pipelines.  TPOT can independently use  generations  and  max_time_mins  to limit the optimization process through using one of the parameters or both.  .export()  function will return string of exported pipeline if output filename is not specified.  Add  SGDClassifier  and  SGDRegressor  into TPOT default configs.  Documentation has been updated.",
+            "title": "Version 0.11.0"
+        },
+        {
+            "location": "/releases/#version-0102",
+            "text": "TPOT v0.10.2 is the last version to support Python 2.7 and Python 3.4.  Minor updates for fixing compatibility issues with the latest version of scikit-learn (version > 0.21) and xgboost (v0.90)  Default value of  template  parameter is changed to  None  instead.  Fix errors in documentation",
+            "title": "Version 0.10.2"
+        },
+        {
+            "location": "/releases/#version-0101",
+            "text": "Add  data_file_path  option into  expert  function for replacing  'PATH/TO/DATA/FILE'  to customized dataset path in exported scripts. (Related issue #838)  Change python version in CI tests to 3.7  Add CI tests for macOS.",
+            "title": "Version 0.10.1"
+        },
+        {
+            "location": "/releases/#version-0100",
+            "text": "Add a new  template  option to specify a desired structure for machine learning pipeline in TPOT. Check  TPOT API  (it will be updated once it is merge to master branch).  Add  FeatureSetSelector  operator into TPOT for feature selection based on  priori  export knowledge. Please check our  preprint paper  for more details ( Note: it was named  DatasetSelector  in 1st version paper but we will rename to FeatureSetSelector in next version of the paper )  Refine  n_jobs  parameter to accept value below -1. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used.  Now  memory   parameter can create memory cache directory if it does not exist.  Fix minor bugs.",
+            "title": "Version 0.10.0"
+        },
+        {
+            "location": "/releases/#version-096",
+            "text": "Fix a bug causing that  max_time_mins  parameter doesn't work when  use_dask=True  in TPOT 0.9.5  Now TPOT saves best pareto values best pareto pipeline s in checkpoint folder  TPOT raises  ImportError  if operators in the TPOT configuration are not available when  verbosity>2  Thank @PGijsbers for the suggestions. Now TPOT can save scores of individuals already evaluated in any generation even the evaluation process of that generation is interrupted/stopped. But it is noted that, in this case, TPOT will raise this  warning message :  WARNING: TPOT may not provide a good pipeline if TPOT is stopped/interrupted in a early generation. , because the pipelines in early generation, e.g. 1st generation, are evolved/modified very limited times via evolutionary algorithm.  Fix bugs in configuration of  TPOTRegressor  Error fixes in documentation",
+            "title": "Version 0.9.6"
+        },
+        {
+            "location": "/releases/#version-095",
+            "text": "TPOT now supports integration with Dask for parallelization + smart caching . Big thanks to the Dask dev team for making this happen!    TPOT now supports for imputation/sparse matrices into  predict  and  predict_proba  functions.    TPOTClassifier  and  TPOTRegressor  now follows scikit-learn estimator API.    We refined scoring parameter in TPOT API for accepting  Scorer  object .    We refined parameters in VarianceThreshold and FeatureAgglomeration.    TPOT now supports using memory caching within a Pipeline via a optional  memory  parameter.    We improved documentation of TPOT.",
+            "title": "Version 0.9.5"
+        },
+        {
+            "location": "/releases/#version-09",
+            "text": "TPOT now supports sparse matrices  with a new built-in TPOT configuration, \"TPOT sparse\". We are using a custom OneHotEncoder implementation that supports missing values and continuous features.    We have added an \"early stopping\" option for stopping the optimization process if no improvement is made within a set number of generations. Look up the  early_stop  parameter to access this functionality.    TPOT now reduces the number of duplicated pipelines between generations, which saves you time during the optimization process.    TPOT now supports custom scoring functions via the command-line mode.    We have added a new optional argument,  periodic_checkpoint_folder , that allows TPOT to periodically save the best pipeline so far to a local folder during optimization process.    TPOT no longer uses  sklearn.externals.joblib  when  n_jobs=1  to avoid the potential freezing issue  that scikit-learn suffers from .    We have added  pandas  as a dependency to read input datasets instead of  numpy.recfromcsv . NumPy's  recfromcsv  function is unable to parse datasets with complex data types.    Fixed a bug that  DEFAULT  in the parameter(s) of nested estimator raises  KeyError  when exporting pipelines.    Fixed a bug related to setting  random_state  in nested estimators. The issue would happen with pipeline with  SelectFromModel  ( ExtraTreesClassifier  as nested estimator) or  StackingEstimator  if nested estimator has  random_state  parameter.    Fixed a bug in the missing value imputation function in TPOT to impute along columns instead rows.    Refined input checking for sparse matrices in TPOT.    Refined the TPOT pipeline mutation operator.",
+            "title": "Version 0.9"
+        },
+        {
+            "location": "/releases/#version-08",
+            "text": "TPOT now detects whether there are missing values in your dataset  and replaces them with the median value of the column.    TPOT now allows you to set a  group  parameter in the  fit  function so you can use the  GroupKFold  cross-validation strategy.    TPOT now allows you to set a subsample ratio of the training instance with the  subsample  parameter. For example, setting  subsample =0.5 tells TPOT to create a fixed subsample of half of the training data for the pipeline optimization process. This parameter can be useful for speeding up the pipeline optimization process, but may give less accurate performance estimates from cross-validation.    TPOT now has more  built-in configurations , including TPOT MDR and TPOT light, for both classification and regression problems.    TPOTClassifier  and  TPOTRegressor  now expose three useful internal attributes,  fitted_pipeline_ ,  pareto_front_fitted_pipelines_ , and  evaluated_individuals_ . These attributes are described in the  API documentation .    Oh,  TPOT now has  thorough API documentation . Check it out!    Fixed a reproducibility issue where setting  random_seed  didn't necessarily result in the same results every time. This bug was present since TPOT v0.7.    Refined input checking in TPOT.    Removed Python 2 uncompliant code.",
+            "title": "Version 0.8"
+        },
+        {
+            "location": "/releases/#version-07",
+            "text": "TPOT now has multiprocessing support.  TPOT allows you to use multiple processes in parallel to accelerate the pipeline optimization process in TPOT with the  n_jobs  parameter.    TPOT now allows you to  customize the operators and parameters considered during the optimization process , which can be accomplished with the new  config_dict  parameter. The format of this customized dictionary can be found in the  online documentation , along with a list of  built-in configurations .    TPOT now allows you to  specify a time limit for evaluating a single pipeline   (default limit is 5 minutes) in optimization process with the  max_eval_time_mins  parameter, so TPOT won't spend hours evaluating overly-complex pipelines.    We tweaked TPOT's underlying evolutionary optimization algorithm to work even better, including using the  mu+lambda algorithm . This algorithm gives you more control of how many pipelines are generated every iteration with the  offspring_size  parameter.    Refined the default operators and parameters in TPOT, so TPOT 0.7 should work even better than 0.6.    TPOT now supports sample weights in the fitness function if some if your samples are more important to classify correctly than others. The sample weights option works the same as in scikit-learn, e.g.,  tpot.fit(x_train, y_train, sample_weights=sample_weights) .    The default scoring metric in TPOT has been changed from balanced accuracy to accuracy, the same default metric for classification algorithms in scikit-learn. Balanced accuracy can still be used by setting  scoring='balanced_accuracy'  when creating a TPOT instance.",
+            "title": "Version 0.7"
+        },
+        {
+            "location": "/releases/#version-06",
+            "text": "TPOT now supports regression problems!  We have created two separate  TPOTClassifier  and  TPOTRegressor  classes to support classification and regression problems, respectively. The  command-line interface  also supports this feature through the  -mode  parameter.    TPOT now allows you to  specify a time limit  for the optimization process with the  max_time_mins  parameter, so you don't need to guess how long TPOT will take any more to recommend a pipeline to you.    Added a new operator that performs feature selection using  ExtraTrees  feature importance scores.    XGBoost  has been added as an optional dependency to TPOT.  If you have XGBoost installed, TPOT will automatically detect your installation and use the  XGBoostClassifier  and  XGBoostRegressor  in its pipelines.    TPOT now offers a verbosity level of 3 (\"science mode\"), which outputs the entire Pareto front instead of only the current best score. This feature may be useful for users looking to make a trade-off between pipeline complexity and score.",
+            "title": "Version 0.6"
+        },
+        {
+            "location": "/releases/#version-05",
+            "text": "Major refactor: Each operator is defined in a separate class file. Hooray for easier-to-maintain code!  TPOT now  exports directly to scikit-learn Pipelines  instead of hacky code.  Internal representation of individuals now uses scikit-learn pipelines.  Parameters for each operator have been optimized so TPOT spends less time exploring useless parameters.  We have removed pandas as a dependency and instead use numpy matrices to store the data.  TPOT now uses  k-fold cross-validation  when evaluating pipelines, with a default k = 3. This k parameter can be tuned when creating a new TPOT instance.  Improved  scoring function support : Even though TPOT uses balanced accuracy by default, you can now have TPOT use  any of the scoring functions  that  cross_val_score  supports.  Added the scikit-learn  Normalizer  preprocessor.  Minor text fixes.",
+            "title": "Version 0.5"
+        },
+        {
+            "location": "/releases/#version-04",
+            "text": "In TPOT 0.4, we've made some major changes to the internals of TPOT and added some convenience functions. We've summarized the changes below.   Added new sklearn models and preprocessors  AdaBoostClassifier  BernoulliNB  ExtraTreesClassifier  GaussianNB  MultinomialNB  LinearSVC  PassiveAggressiveClassifier  GradientBoostingClassifier  RBFSampler  FastICA  FeatureAgglomeration  Nystroem   Added operator that inserts virtual features for the count of features with values of zero  Reworked parameterization of TPOT operators  Reduced parameter search space with information from a scikit-learn benchmark  TPOT no longer generates arbitrary parameter values, but uses a fixed parameter set instead   Removed XGBoost as a dependency  Too many users were having install issues with XGBoost  Replaced with scikit-learn's GradientBoostingClassifier   Improved descriptiveness of TPOT command line parameter documentation  Removed min/max/avg details during fit() when verbosity > 1  Replaced with tqdm progress bar  Added tqdm as a dependency   Added  fit_predict()  convenience function  Added  get_params()  function so TPOT can operate in scikit-learn's  cross_val_score  & related functions",
+            "title": "Version 0.4"
+        },
+        {
+            "location": "/releases/#version-03",
+            "text": "We revised the internal optimization process of TPOT to make it more efficient, in particular in regards to the model parameters that TPOT optimizes over.",
+            "title": "Version 0.3"
+        },
+        {
+            "location": "/releases/#version-02",
+            "text": "TPOT now has the ability to export the optimized pipelines to sklearn code.    Logistic regression, SVM, and k-nearest neighbors classifiers were added as pipeline operators. Previously, TPOT only included decision tree and random forest classifiers.    TPOT can now use arbitrary scoring functions for the optimization process.    TPOT now performs multi-objective Pareto optimization to balance model complexity (i.e., # of pipeline operators) and the score of the pipeline.",
+            "title": "Version 0.2"
+        },
+        {
+            "location": "/releases/#version-01",
+            "text": "First public release of TPOT.    Optimizes pipelines with decision trees and random forest classifiers as the model, and uses a handful of feature preprocessors.",
+            "title": "Version 0.1"
+        },
+        {
+            "location": "/citing/",
+            "text": "If you use TPOT in a scientific publication, please consider citing at least one of the following papers:\n\n\nRandal S. Olson, Ryan J. Urbanowicz, Peter C. Andrews, Nicole A. Lavender, La Creis Kidd, and Jason H. Moore (2016). \nAutomating biomedical data science through tree-based pipeline optimization\n. \nApplications of Evolutionary Computation\n, pages 123-137.\n\n\nBibTeX entry:\n\n\n@inbook{Olson2016EvoBio,\n    author={Olson, Randal S. and Urbanowicz, Ryan J. and Andrews, Peter C. and Lavender, Nicole A. and Kidd, La Creis and Moore, Jason H.},\n    editor={Squillero, Giovanni and Burelli, Paolo},\n    chapter={Automating Biomedical Data Science Through Tree-Based Pipeline Optimization},\n    title={Applications of Evolutionary Computation: 19th European Conference, EvoApplications 2016, Porto, Portugal, March 30 -- April 1, 2016, Proceedings, Part I},\n    year={2016},\n    publisher={Springer International Publishing},\n    pages={123--137},\n    isbn={978-3-319-31204-0},\n    doi={10.1007/978-3-319-31204-0_9},\n    url={http://dx.doi.org/10.1007/978-3-319-31204-0_9}\n}\n\n\n\n\nEvaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science\n\n\nRandal S. Olson, Nathan Bartley, Ryan J. Urbanowicz, and Jason H. Moore (2016). \nEvaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science\n. \nProceedings of GECCO 2016\n, pages 485-492.\n\n\nBibTeX entry:\n\n\n@inproceedings{OlsonGECCO2016,\n    author = {Olson, Randal S. and Bartley, Nathan and Urbanowicz, Ryan J. and Moore, Jason H.},\n    title = {Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science},\n    booktitle = {Proceedings of the Genetic and Evolutionary Computation Conference 2016},\n    series = {GECCO '16},\n    year = {2016},\n    isbn = {978-1-4503-4206-3},\n    location = {Denver, Colorado, USA},\n    pages = {485--492},\n    numpages = {8},\n    url = {http://doi.acm.org/10.1145/2908812.2908918},\n    doi = {10.1145/2908812.2908918},\n    acmid = {2908918},\n    publisher = {ACM},\n    address = {New York, NY, USA},\n}\n\n\n\n\nAlternatively, you can cite the repository directly with the following DOI:",
+            "title": "Citing"
+        },
+        {
+            "location": "/support/",
+            "text": "TPOT was developed in the \nComputational Genetics Lab\n at the \nUniversity of Pennsylvania\n with funding from the \nNIH\n under grant R01 AI117694. We are incredibly grateful for the support of the NIH and the University of Pennsylvania during the development of this project.\n\n\nThe TPOT logo was designed by Todd Newmuis, who generously donated his time to the project.",
+            "title": "Support"
+        },
+        {
+            "location": "/related/",
+            "text": "Other Automated Machine Learning (AutoML) tools and related projects:\n\n\n\n\n\n\nName\n\n\nLanguage\n\n\nLicense\n\n\nDescription\n\n\n\n\n\n\nAuto-WEKA\n\n\nJava\n\n\nGPL-v3\n\n\nAutomated model selection and hyper-parameter tuning for Weka models.\n\n\n\n\n\n\nauto-sklearn\n\n\nPython\n\n\nBSD-3-Clause\n\n\nAn automated machine learning toolkit and a drop-in replacement for a scikit-learn estimator.\n\n\n\n\n\n\nauto_ml\n\n\nPython\n\n\nMIT\n\n\nAutomated machine learning for analytics & production. Supports manual feature type declarations.\n\n\n\n\n\n\nH2O AutoML\n\n\nJava with Python, Scala & R APIs and web GUI\n\n\nApache 2.0\n\n\nAutomated: data prep, hyperparameter tuning, random grid search and stacked ensembles in a distributed ML platform.\n\n\n\n\n\n\ndevol\n\n\nPython\n\n\nMIT\n\n\nAutomated deep neural network design via genetic programming.\n\n\n\n\n\n\nMLBox\n\n\nPython\n\n\nBSD-3-Clause\n\n\nAccurate hyper-parameter optimization in high-dimensional space with support for distributed computing.\n\n\n\n\n\n\nRecipe\n\n\nC\n\n\nGPL-v3\n\n\nMachine-learning pipeline optimization through genetic programming. Uses grammars to define pipeline structure.\n\n\n\n\n\n\nXcessiv\n\n\nPython\n\n\nApache 2.0\n\n\nA web-based application for quick, scalable, and automated hyper-parameter tuning and stacked ensembling in Python.\n\n\n\n\n\n\nGAMA\n\n\nPython\n\n\nApache 2.0\n\n\nMachine-learning pipeline optimization through asynchronous evaluation based genetic programming.",
+            "title": "Related"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/docs/search/text.js b/docs/search/text.js
new file mode 100644
index 00000000..17921b6e
--- /dev/null
+++ b/docs/search/text.js
@@ -0,0 +1,390 @@
+/**
+ * @license RequireJS text 2.0.12 Copyright (c) 2010-2014, The Dojo Foundation All Rights Reserved.
+ * Available via the MIT or new BSD license.
+ * see: http://github.com/requirejs/text for details
+ */
+/*jslint regexp: true */
+/*global require, XMLHttpRequest, ActiveXObject,
+  define, window, process, Packages,
+  java, location, Components, FileUtils */
+
+define(['module'], function (module) {
+    'use strict';
+
+    var text, fs, Cc, Ci, xpcIsWindows,
+        progIds = ['Msxml2.XMLHTTP', 'Microsoft.XMLHTTP', 'Msxml2.XMLHTTP.4.0'],
+        xmlRegExp = /^\s*<\?xml(\s)+version=[\'\"](\d)*.(\d)*[\'\"](\s)*\?>/im,
+        bodyRegExp = /<body[^>]*>\s*([\s\S]+)\s*<\/body>/im,
+        hasLocation = typeof location !== 'undefined' && location.href,
+        defaultProtocol = hasLocation && location.protocol && location.protocol.replace(/\:/, ''),
+        defaultHostName = hasLocation && location.hostname,
+        defaultPort = hasLocation && (location.port || undefined),
+        buildMap = {},
+        masterConfig = (module.config && module.config()) || {};
+
+    text = {
+        version: '2.0.12',
+
+        strip: function (content) {
+            //Strips <?xml ...?> declarations so that external SVG and XML
+            //documents can be added to a document without worry. Also, if the string
+            //is an HTML document, only the part inside the body tag is returned.
+            if (content) {
+                content = content.replace(xmlRegExp, "");
+                var matches = content.match(bodyRegExp);
+                if (matches) {
+                    content = matches[1];
+                }
+            } else {
+                content = "";
+            }
+            return content;
+        },
+
+        jsEscape: function (content) {
+            return content.replace(/(['\\])/g, '\\$1')
+                .replace(/[\f]/g, "\\f")
+                .replace(/[\b]/g, "\\b")
+                .replace(/[\n]/g, "\\n")
+                .replace(/[\t]/g, "\\t")
+                .replace(/[\r]/g, "\\r")
+                .replace(/[\u2028]/g, "\\u2028")
+                .replace(/[\u2029]/g, "\\u2029");
+        },
+
+        createXhr: masterConfig.createXhr || function () {
+            //Would love to dump the ActiveX crap in here. Need IE 6 to die first.
+            var xhr, i, progId;
+            if (typeof XMLHttpRequest !== "undefined") {
+                return new XMLHttpRequest();
+            } else if (typeof ActiveXObject !== "undefined") {
+                for (i = 0; i < 3; i += 1) {
+                    progId = progIds[i];
+                    try {
+                        xhr = new ActiveXObject(progId);
+                    } catch (e) {}
+
+                    if (xhr) {
+                        progIds = [progId];  // so faster next time
+                        break;
+                    }
+                }
+            }
+
+            return xhr;
+        },
+
+        /**
+         * Parses a resource name into its component parts. Resource names
+         * look like: module/name.ext!strip, where the !strip part is
+         * optional.
+         * @param {String} name the resource name
+         * @returns {Object} with properties "moduleName", "ext" and "strip"
+         * where strip is a boolean.
+         */
+        parseName: function (name) {
+            var modName, ext, temp,
+                strip = false,
+                index = name.indexOf("."),
+                isRelative = name.indexOf('./') === 0 ||
+                             name.indexOf('../') === 0;
+
+            if (index !== -1 && (!isRelative || index > 1)) {
+                modName = name.substring(0, index);
+                ext = name.substring(index + 1, name.length);
+            } else {
+                modName = name;
+            }
+
+            temp = ext || modName;
+            index = temp.indexOf("!");
+            if (index !== -1) {
+                //Pull off the strip arg.
+                strip = temp.substring(index + 1) === "strip";
+                temp = temp.substring(0, index);
+                if (ext) {
+                    ext = temp;
+                } else {
+                    modName = temp;
+                }
+            }
+
+            return {
+                moduleName: modName,
+                ext: ext,
+                strip: strip
+            };
+        },
+
+        xdRegExp: /^((\w+)\:)?\/\/([^\/\\]+)/,
+
+        /**
+         * Is an URL on another domain. Only works for browser use, returns
+         * false in non-browser environments. Only used to know if an
+         * optimized .js version of a text resource should be loaded
+         * instead.
+         * @param {String} url
+         * @returns Boolean
+         */
+        useXhr: function (url, protocol, hostname, port) {
+            var uProtocol, uHostName, uPort,
+                match = text.xdRegExp.exec(url);
+            if (!match) {
+                return true;
+            }
+            uProtocol = match[2];
+            uHostName = match[3];
+
+            uHostName = uHostName.split(':');
+            uPort = uHostName[1];
+            uHostName = uHostName[0];
+
+            return (!uProtocol || uProtocol === protocol) &&
+                   (!uHostName || uHostName.toLowerCase() === hostname.toLowerCase()) &&
+                   ((!uPort && !uHostName) || uPort === port);
+        },
+
+        finishLoad: function (name, strip, content, onLoad) {
+            content = strip ? text.strip(content) : content;
+            if (masterConfig.isBuild) {
+                buildMap[name] = content;
+            }
+            onLoad(content);
+        },
+
+        load: function (name, req, onLoad, config) {
+            //Name has format: some.module.filext!strip
+            //The strip part is optional.
+            //if strip is present, then that means only get the string contents
+            //inside a body tag in an HTML string. For XML/SVG content it means
+            //removing the <?xml ...?> declarations so the content can be inserted
+            //into the current doc without problems.
+
+            // Do not bother with the work if a build and text will
+            // not be inlined.
+            if (config && config.isBuild && !config.inlineText) {
+                onLoad();
+                return;
+            }
+
+            masterConfig.isBuild = config && config.isBuild;
+
+            var parsed = text.parseName(name),
+                nonStripName = parsed.moduleName +
+                    (parsed.ext ? '.' + parsed.ext : ''),
+                url = req.toUrl(nonStripName),
+                useXhr = (masterConfig.useXhr) ||
+                         text.useXhr;
+
+            // Do not load if it is an empty: url
+            if (url.indexOf('empty:') === 0) {
+                onLoad();
+                return;
+            }
+
+            //Load the text. Use XHR if possible and in a browser.
+            if (!hasLocation || useXhr(url, defaultProtocol, defaultHostName, defaultPort)) {
+                text.get(url, function (content) {
+                    text.finishLoad(name, parsed.strip, content, onLoad);
+                }, function (err) {
+                    if (onLoad.error) {
+                        onLoad.error(err);
+                    }
+                });
+            } else {
+                //Need to fetch the resource across domains. Assume
+                //the resource has been optimized into a JS module. Fetch
+                //by the module name + extension, but do not include the
+                //!strip part to avoid file system issues.
+                req([nonStripName], function (content) {
+                    text.finishLoad(parsed.moduleName + '.' + parsed.ext,
+                                    parsed.strip, content, onLoad);
+                });
+            }
+        },
+
+        write: function (pluginName, moduleName, write, config) {
+            if (buildMap.hasOwnProperty(moduleName)) {
+                var content = text.jsEscape(buildMap[moduleName]);
+                write.asModule(pluginName + "!" + moduleName,
+                               "define(function () { return '" +
+                                   content +
+                               "';});\n");
+            }
+        },
+
+        writeFile: function (pluginName, moduleName, req, write, config) {
+            var parsed = text.parseName(moduleName),
+                extPart = parsed.ext ? '.' + parsed.ext : '',
+                nonStripName = parsed.moduleName + extPart,
+                //Use a '.js' file name so that it indicates it is a
+                //script that can be loaded across domains.
+                fileName = req.toUrl(parsed.moduleName + extPart) + '.js';
+
+            //Leverage own load() method to load plugin value, but only
+            //write out values that do not have the strip argument,
+            //to avoid any potential issues with ! in file names.
+            text.load(nonStripName, req, function (value) {
+                //Use own write() method to construct full module value.
+                //But need to create shell that translates writeFile's
+                //write() to the right interface.
+                var textWrite = function (contents) {
+                    return write(fileName, contents);
+                };
+                textWrite.asModule = function (moduleName, contents) {
+                    return write.asModule(moduleName, fileName, contents);
+                };
+
+                text.write(pluginName, nonStripName, textWrite, config);
+            }, config);
+        }
+    };
+
+    if (masterConfig.env === 'node' || (!masterConfig.env &&
+            typeof process !== "undefined" &&
+            process.versions &&
+            !!process.versions.node &&
+            !process.versions['node-webkit'])) {
+        //Using special require.nodeRequire, something added by r.js.
+        fs = require.nodeRequire('fs');
+
+        text.get = function (url, callback, errback) {
+            try {
+                var file = fs.readFileSync(url, 'utf8');
+                //Remove BOM (Byte Mark Order) from utf8 files if it is there.
+                if (file.indexOf('\uFEFF') === 0) {
+                    file = file.substring(1);
+                }
+                callback(file);
+            } catch (e) {
+                if (errback) {
+                    errback(e);
+                }
+            }
+        };
+    } else if (masterConfig.env === 'xhr' || (!masterConfig.env &&
+            text.createXhr())) {
+        text.get = function (url, callback, errback, headers) {
+            var xhr = text.createXhr(), header;
+            xhr.open('GET', url, true);
+
+            //Allow plugins direct access to xhr headers
+            if (headers) {
+                for (header in headers) {
+                    if (headers.hasOwnProperty(header)) {
+                        xhr.setRequestHeader(header.toLowerCase(), headers[header]);
+                    }
+                }
+            }
+
+            //Allow overrides specified in config
+            if (masterConfig.onXhr) {
+                masterConfig.onXhr(xhr, url);
+            }
+
+            xhr.onreadystatechange = function (evt) {
+                var status, err;
+                //Do not explicitly handle errors, those should be
+                //visible via console output in the browser.
+                if (xhr.readyState === 4) {
+                    status = xhr.status || 0;
+                    if (status > 399 && status < 600) {
+                        //An http 4xx or 5xx error. Signal an error.
+                        err = new Error(url + ' HTTP status: ' + status);
+                        err.xhr = xhr;
+                        if (errback) {
+                            errback(err);
+                        }
+                    } else {
+                        callback(xhr.responseText);
+                    }
+
+                    if (masterConfig.onXhrComplete) {
+                        masterConfig.onXhrComplete(xhr, url);
+                    }
+                }
+            };
+            xhr.send(null);
+        };
+    } else if (masterConfig.env === 'rhino' || (!masterConfig.env &&
+            typeof Packages !== 'undefined' && typeof java !== 'undefined')) {
+        //Why Java, why is this so awkward?
+        text.get = function (url, callback) {
+            var stringBuffer, line,
+                encoding = "utf-8",
+                file = new java.io.File(url),
+                lineSeparator = java.lang.System.getProperty("line.separator"),
+                input = new java.io.BufferedReader(new java.io.InputStreamReader(new java.io.FileInputStream(file), encoding)),
+                content = '';
+            try {
+                stringBuffer = new java.lang.StringBuffer();
+                line = input.readLine();
+
+                // Byte Order Mark (BOM) - The Unicode Standard, version 3.0, page 324
+                // http://www.unicode.org/faq/utf_bom.html
+
+                // Note that when we use utf-8, the BOM should appear as "EF BB BF", but it doesn't due to this bug in the JDK:
+                // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4508058
+                if (line && line.length() && line.charAt(0) === 0xfeff) {
+                    // Eat the BOM, since we've already found the encoding on this file,
+                    // and we plan to concatenating this buffer with others; the BOM should
+                    // only appear at the top of a file.
+                    line = line.substring(1);
+                }
+
+                if (line !== null) {
+                    stringBuffer.append(line);
+                }
+
+                while ((line = input.readLine()) !== null) {
+                    stringBuffer.append(lineSeparator);
+                    stringBuffer.append(line);
+                }
+                //Make sure we return a JavaScript string and not a Java string.
+                content = String(stringBuffer.toString()); //String
+            } finally {
+                input.close();
+            }
+            callback(content);
+        };
+    } else if (masterConfig.env === 'xpconnect' || (!masterConfig.env &&
+            typeof Components !== 'undefined' && Components.classes &&
+            Components.interfaces)) {
+        //Avert your gaze!
+        Cc = Components.classes;
+        Ci = Components.interfaces;
+        Components.utils['import']('resource://gre/modules/FileUtils.jsm');
+        xpcIsWindows = ('@mozilla.org/windows-registry-key;1' in Cc);
+
+        text.get = function (url, callback) {
+            var inStream, convertStream, fileObj,
+                readData = {};
+
+            if (xpcIsWindows) {
+                url = url.replace(/\//g, '\\');
+            }
+
+            fileObj = new FileUtils.File(url);
+
+            //XPCOM, you so crazy
+            try {
+                inStream = Cc['@mozilla.org/network/file-input-stream;1']
+                           .createInstance(Ci.nsIFileInputStream);
+                inStream.init(fileObj, 1, 0, false);
+
+                convertStream = Cc['@mozilla.org/intl/converter-input-stream;1']
+                                .createInstance(Ci.nsIConverterInputStream);
+                convertStream.init(inStream, "utf-8", inStream.available(),
+                Ci.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
+
+                convertStream.readString(inStream.available(), readData);
+                convertStream.close();
+                inStream.close();
+                callback(readData.value);
+            } catch (e) {
+                throw new Error((fileObj && fileObj.path || '') + ': ' + e);
+            }
+        };
+    }
+    return text;
+});
diff --git a/docs/search/worker.js b/docs/search/worker.js
deleted file mode 100644
index a3ccc07f..00000000
--- a/docs/search/worker.js
+++ /dev/null
@@ -1,128 +0,0 @@
-var base_path = 'function' === typeof importScripts ? '.' : '/search/';
-var allowSearch = false;
-var index;
-var documents = {};
-var lang = ['en'];
-var data;
-
-function getScript(script, callback) {
-  console.log('Loading script: ' + script);
-  $.getScript(base_path + script).done(function () {
-    callback();
-  }).fail(function (jqxhr, settings, exception) {
-    console.log('Error: ' + exception);
-  });
-}
-
-function getScriptsInOrder(scripts, callback) {
-  if (scripts.length === 0) {
-    callback();
-    return;
-  }
-  getScript(scripts[0], function() {
-    getScriptsInOrder(scripts.slice(1), callback);
-  });
-}
-
-function loadScripts(urls, callback) {
-  if( 'function' === typeof importScripts ) {
-    importScripts.apply(null, urls);
-    callback();
-  } else {
-    getScriptsInOrder(urls, callback);
-  }
-}
-
-function onJSONLoaded () {
-  data = JSON.parse(this.responseText);
-  var scriptsToLoad = ['lunr.js'];
-  if (data.config && data.config.lang && data.config.lang.length) {
-    lang = data.config.lang;
-  }
-  if (lang.length > 1 || lang[0] !== "en") {
-    scriptsToLoad.push('lunr.stemmer.support.js');
-    if (lang.length > 1) {
-      scriptsToLoad.push('lunr.multi.js');
-    }
-    for (var i=0; i < lang.length; i++) {
-      if (lang[i] != 'en') {
-        scriptsToLoad.push(['lunr', lang[i], 'js'].join('.'));
-      }
-    }
-  }
-  loadScripts(scriptsToLoad, onScriptsLoaded);
-}
-
-function onScriptsLoaded () {
-  console.log('All search scripts loaded, building Lunr index...');
-  if (data.config && data.config.separator && data.config.separator.length) {
-    lunr.tokenizer.separator = new RegExp(data.config.separator);
-  }
-  if (data.index) {
-    index = lunr.Index.load(data.index);
-    data.docs.forEach(function (doc) {
-      documents[doc.location] = doc;
-    });
-    console.log('Lunr pre-built index loaded, search ready');
-  } else {
-    index = lunr(function () {
-      if (lang.length === 1 && lang[0] !== "en" && lunr[lang[0]]) {
-        this.use(lunr[lang[0]]);
-      } else if (lang.length > 1) {
-        this.use(lunr.multiLanguage.apply(null, lang));  // spread operator not supported in all browsers: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Spread_operator#Browser_compatibility
-      }
-      this.field('title');
-      this.field('text');
-      this.ref('location');
-
-      for (var i=0; i < data.docs.length; i++) {
-        var doc = data.docs[i];
-        this.add(doc);
-        documents[doc.location] = doc;
-      }
-    });
-    console.log('Lunr index built, search ready');
-  }
-  allowSearch = true;
-  postMessage({allowSearch: allowSearch});
-}
-
-function init () {
-  var oReq = new XMLHttpRequest();
-  oReq.addEventListener("load", onJSONLoaded);
-  var index_path = base_path + '/search_index.json';
-  if( 'function' === typeof importScripts ){
-      index_path = 'search_index.json';
-  }
-  oReq.open("GET", index_path);
-  oReq.send();
-}
-
-function search (query) {
-  if (!allowSearch) {
-    console.error('Assets for search still loading');
-    return;
-  }
-
-  var resultDocuments = [];
-  var results = index.search(query);
-  for (var i=0; i < results.length; i++){
-    var result = results[i];
-    doc = documents[result.ref];
-    doc.summary = doc.text.substring(0, 200);
-    resultDocuments.push(doc);
-  }
-  return resultDocuments;
-}
-
-if( 'function' === typeof importScripts ) {
-  onmessage = function (e) {
-    if (e.data.init) {
-      init();
-    } else if (e.data.query) {
-      postMessage({ results: search(e.data.query) });
-    } else {
-      console.error("Worker - Unrecognized message: " + e);
-    }
-  };
-}
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 1fc9d535..0c1bfebb 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -1,53 +1,84 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+
+    
     <url>
      <loc>http://epistasislab.github.io/tpot/</loc>
-     <lastmod>2019-07-16</lastmod>
+     <lastmod>2019-11-05</lastmod>
      <changefreq>daily</changefreq>
     </url>
+    
+
+    
     <url>
      <loc>http://epistasislab.github.io/tpot/installing/</loc>
-     <lastmod>2019-07-16</lastmod>
+     <lastmod>2019-11-05</lastmod>
      <changefreq>daily</changefreq>
     </url>
+    
+
+    
     <url>
      <loc>http://epistasislab.github.io/tpot/using/</loc>
-     <lastmod>2019-07-16</lastmod>
+     <lastmod>2019-11-05</lastmod>
      <changefreq>daily</changefreq>
     </url>
+    
+
+    
     <url>
      <loc>http://epistasislab.github.io/tpot/api/</loc>
-     <lastmod>2019-07-16</lastmod>
+     <lastmod>2019-11-05</lastmod>
      <changefreq>daily</changefreq>
     </url>
+    
+
+    
     <url>
      <loc>http://epistasislab.github.io/tpot/examples/</loc>
-     <lastmod>2019-07-16</lastmod>
+     <lastmod>2019-11-05</lastmod>
      <changefreq>daily</changefreq>
     </url>
+    
+
+    
     <url>
      <loc>http://epistasislab.github.io/tpot/contributing/</loc>
-     <lastmod>2019-07-16</lastmod>
+     <lastmod>2019-11-05</lastmod>
      <changefreq>daily</changefreq>
     </url>
+    
+
+    
     <url>
      <loc>http://epistasislab.github.io/tpot/releases/</loc>
-     <lastmod>2019-07-16</lastmod>
+     <lastmod>2019-11-05</lastmod>
      <changefreq>daily</changefreq>
     </url>
+    
+
+    
     <url>
      <loc>http://epistasislab.github.io/tpot/citing/</loc>
-     <lastmod>2019-07-16</lastmod>
+     <lastmod>2019-11-05</lastmod>
      <changefreq>daily</changefreq>
     </url>
+    
+
+    
     <url>
      <loc>http://epistasislab.github.io/tpot/support/</loc>
-     <lastmod>2019-07-16</lastmod>
+     <lastmod>2019-11-05</lastmod>
      <changefreq>daily</changefreq>
     </url>
+    
+
+    
     <url>
      <loc>http://epistasislab.github.io/tpot/related/</loc>
-     <lastmod>2019-07-16</lastmod>
+     <lastmod>2019-11-05</lastmod>
      <changefreq>daily</changefreq>
     </url>
+    
+
 </urlset>
\ No newline at end of file
diff --git a/docs/sitemap.xml.gz b/docs/sitemap.xml.gz
deleted file mode 100644
index 4d8af7a47588db7f12174def822cff12d2a88555..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 272
zcmV+r0q_1FiwFog_$^%m|8r?{Wo=<_E_iKh0L_)bPQ)M(hVT0n4fjIpL0!|bH=jTs
zfYPy@gaXS<?fUkzZZ)1wOyC0i^E3JK&w=iASzPrFi7@zHw6YNucpFCaQ!id$p6f?(
z(uZzMj)1Fd_SB1+IX)@1ZJQ)ySWF}t(xk}F`9f+JRU?l=AF8~r4Q{8ZbHVDp14JY?
zgyhU1C**k@BnHJXu<F#+`2EHZFJaVoO?#}Hhq}Gb<z3q?b~b*3w*+TB8gw6>`lZ|;
z)DOiUc+lsPb2*se&{isu%0w)V2`*-d4yZUV8$2f*RxWXp2^<(IF*PhQFIr;^iA$WL
WVHp39>ubv&h5iIJ!1X^^1^@uYfqoVM

diff --git a/docs/support/index.html b/docs/support/index.html
index 6f327ad4..2a31bfd3 100644
--- a/docs/support/index.html
+++ b/docs/support/index.html
@@ -13,19 +13,18 @@
 
   <link rel="stylesheet" href="../css/theme.css" type="text/css" />
   <link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
-  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
+  <link rel="stylesheet" href="../css/highlight.css">
   
   <script>
     // Current page data
     var mkdocs_page_name = "Support";
     var mkdocs_page_input_path = "support.md";
-    var mkdocs_page_url = "/tpot/support/";
+    var mkdocs_page_url = "/support/";
   </script>
   
-  <script src="../js/jquery-2.1.1.min.js" defer></script>
-  <script src="../js/modernizr-2.8.3.min.js" defer></script>
-  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
-  <script>hljs.initHighlightingOnLoad();</script> 
+  <script src="../js/jquery-2.1.1.min.js"></script>
+  <script src="../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../js/highlight.pack.js"></script> 
   
 </head>
 
@@ -39,7 +38,7 @@
         <a href=".." class="icon icon-home"> TPOT</a>
         <div role="search">
   <form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
-    <input type="text" name="q" placeholder="Search docs" title="Type search term here" />
+    <input type="text" name="q" placeholder="Search docs" />
   </form>
 </div>
       </div>
@@ -186,8 +185,9 @@
     </span>
 </div>
     <script>var base_url = '..';</script>
-    <script src="../js/theme.js" defer></script>
-      <script src="../search/main.js" defer></script>
+    <script src="../js/theme.js"></script>
+      <script src="../search/require.js"></script>
+      <script src="../search/search.js"></script>
 
 </body>
 </html>
diff --git a/docs/using/index.html b/docs/using/index.html
index 94af489a..a1c9d1e4 100644
--- a/docs/using/index.html
+++ b/docs/using/index.html
@@ -13,19 +13,18 @@
 
   <link rel="stylesheet" href="../css/theme.css" type="text/css" />
   <link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
-  <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
+  <link rel="stylesheet" href="../css/highlight.css">
   
   <script>
     // Current page data
     var mkdocs_page_name = "Using TPOT";
     var mkdocs_page_input_path = "using.md";
-    var mkdocs_page_url = "/tpot/using/";
+    var mkdocs_page_url = "/using/";
   </script>
   
-  <script src="../js/jquery-2.1.1.min.js" defer></script>
-  <script src="../js/modernizr-2.8.3.min.js" defer></script>
-  <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
-  <script>hljs.initHighlightingOnLoad();</script> 
+  <script src="../js/jquery-2.1.1.min.js"></script>
+  <script src="../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../js/highlight.pack.js"></script> 
   
 </head>
 
@@ -39,7 +38,7 @@
         <a href=".." class="icon icon-home"> TPOT</a>
         <div role="search">
   <form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
-    <input type="text" name="q" placeholder="Search docs" title="Type search term here" />
+    <input type="text" name="q" placeholder="Search docs" />
   </form>
 </div>
       </div>
@@ -193,7 +192,7 @@ <h5>AutoML algorithms can take a long time to finish their search</h5>
 which means that roughly 100,000 models are fit and evaluated on the training data in one grid search.
 That's a time-consuming procedure, even for simpler models like decision trees.</p>
 <p>Typical TPOT runs will take hours to days to finish (unless it's a small dataset), but you can always interrupt
-the run partway through and see the best results so far. TPOT also <a href="/api/">provides</a> a <code>warm_start</code> parameter that
+the run partway through and see the best results so far. TPOT also <a href="../api/">provides</a> a <code>warm_start</code> parameter that
 lets you restart a TPOT run from where it left off.</p>
 <h5>AutoML algorithms can recommend different solutions for the same dataset</h5>
 
@@ -217,7 +216,7 @@ <h1 id="tpot-with-code">TPOT with code</h1>
 </code></pre>
 
 <p>It's also possible to use TPOT for regression problems with the <code>TPOTRegressor</code> class. Other than the class name,
-a <code>TPOTRegressor</code> is used the same way as a <code>TPOTClassifier</code>. You can read more about the <code>TPOTClassifier</code> and <code>TPOTRegressor</code> classes in the <a href="/api/">API documentation</a>.</p>
+a <code>TPOTRegressor</code> is used the same way as a <code>TPOTClassifier</code>. You can read more about the <code>TPOTClassifier</code> and <code>TPOTRegressor</code> classes in the <a href="../api/">API documentation</a>.</p>
 <p>Some example code with custom TPOT parameters might look like:</p>
 <pre><code class="Python">pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5,
                                     random_state=42, verbosity=2)
@@ -254,7 +253,7 @@ <h1 id="tpot-with-code">TPOT with code</h1>
 pipeline_optimizer.export('tpot_exported_pipeline.py')
 </code></pre>
 
-<p>Check our <a href="examples/">examples</a> to see TPOT applied to some specific data sets.</p>
+<p>Check our <a href="../examples/">examples</a> to see TPOT applied to some specific data sets.</p>
 <h1 id="tpot-on-the-command-line">TPOT on the command line</h1>
 <p>To use TPOT via the command line, enter the following command with a path to the data file:</p>
 <pre><code class="Shell">tpot /path_to/data_file.csv
@@ -304,8 +303,8 @@ <h1 id="tpot-on-the-command-line">TPOT on the command line</h1>
 <tr>
 <td>-g</td>
 <td>GENERATIONS</td>
-<td>Any positive integer</td>
-<td>Number of iterations to run the pipeline optimization process. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.
+<td>Any positive integer or None</td>
+<td>Number of iterations to run the pipeline optimization process. It must be a positive number or None. If None, the parameter max_time_mins must be defined as the runtime limit. Generally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.
 <br /><br />
 TPOT will evaluate POPULATION_SIZE + GENERATIONS x OFFSPRING_SIZE pipelines in total.</td>
 </tr>
@@ -382,7 +381,7 @@ <h1 id="tpot-on-the-command-line">TPOT on the command line</h1>
 <td>Any positive integer</td>
 <td>How many minutes TPOT has to optimize the pipeline.
 <br /><br />
-If provided, this setting will override the "generations" parameter and allow TPOT to run until it runs out of time.</td>
+How many minutes TPOT has to optimize the pipeline.If not None, this setting will allow TPOT to run until max_time_mins minutes elapsed and then stop. TPOT will stop earlier if generationsis set and all generations are already evaluated.</td>
 </tr>
 <tr>
 <td>-maxeval</td>
@@ -523,13 +522,8 @@ <h1 id="scoring-functions">Scoring functions</h1>
 </code></pre>
 
 <ul>
-<li>
-<p>You can pass a metric function with the signature <code>score_func(y_true, y_pred)</code> (e.g. <code>my_custom_accuracy</code> in the example above), where <code>y_true</code> are the true target values and <code>y_pred</code> are the predicted target values from an estimator. To do this, you should implement your own function. See the example above for further explanation. TPOT assumes that any function with "error" or "loss" in the function name is meant to be minimized (<code>greater_is_better=False</code> in <a href="http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html"><code>make_scorer</code></a>), whereas any other functions will be maximized. This scoring type was deprecated in version 0.9.1 and will be removed in version 0.11.</p>
-</li>
-<li>
-<p><strong>my_module.scorer_name</strong>: You can also use a custom <code>score_func(y_true, y_pred)</code> or <code>scorer(estimator, X, y)</code> function through the command line by adding the argument <code>-scoring my_module.scorer</code> to your command-line call. TPOT will import your module and use the custom scoring function from there. TPOT will include your current working directory when importing the module, so you can place it in the same directory where you are going to run TPOT.
-Example: <code>-scoring sklearn.metrics.auc</code> will use the function auc from sklearn.metrics module.</p>
-</li>
+<li><strong>my_module.scorer_name</strong>: You can also use a custom <code>score_func(y_true, y_pred)</code> or <code>scorer(estimator, X, y)</code> function through the command line by adding the argument <code>-scoring my_module.scorer</code> to your command-line call. TPOT will import your module and use the custom scoring function from there. TPOT will include your current working directory when importing the module, so you can place it in the same directory where you are going to run TPOT.
+Example: <code>-scoring sklearn.metrics.auc</code> will use the function auc from sklearn.metrics module.</li>
 </ul>
 <h1 id="built-in-tpot-configurations">Built-in TPOT configurations</h1>
 <p>TPOT comes with a handful of default operators and parameter configurations that we believe work well for optimizing machine learning pipelines. Below is a list of the current built-in configurations that come with TPOT.</p>
@@ -659,13 +653,13 @@ <h1 id="customizing-tpots-operators-and-parameters">Customizing TPOT's operators
 <p>Note that you must have all of the corresponding packages for the operators installed on your computer, otherwise TPOT will not be able to use them. For example, if XGBoost is not installed on your computer, then TPOT will simply not import nor use XGBoost in the pipelines it considers.</p>
 <h1 id="template-option-in-tpot">Template option in TPOT</h1>
 <p>Template option provides a way to specify a desired structure for machine learning pipeline, which may reduce TPOT computation time and potentially provide more interpretable results. Current implementation only supports linear pipelines.</p>
-<p>Below is a simple example to use <code>template</code> option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of <a href="https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17"><code>SelectorMixin</code></a>), 2nd step is a feature transformer (a subclass of <a href="https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html"><code>TransformerMixin</code></a>) and 3rd step is a classifier for classification (a subclass of <a href="https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html"><code>ClassifierMixin</code></a>). The last step must be <code>Classifier</code> for <code>TPOTClassifier</code>'s template but <code>Regressor</code> for <code>TPOTRegressor</code>. <strong>Note: although <code>SelectorMixin</code> is subclass of <code>TransformerMixin</code> in scikit-leawrn, but <code>Transformer</code> in this option excludes those subclasses of <code>SelectorMixin</code>.</strong></p>
+<p>Below is a simple example to use <code>template</code> option. The pipelines generated/evaluated in TPOT will follow this structure: 1st step is a feature selector (a subclass of <a href="https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17"><code>SelectorMixin</code></a>), 2nd step is a feature transformer (a subclass of <a href="https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html"><code>TransformerMixin</code></a>) and 3rd step is a classifier for classification (a subclass of <a href="https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html"><code>ClassifierMixin</code></a>). The last step must be <code>Classifier</code> for <code>TPOTClassifier</code>'s template but <code>Regressor</code> for <code>TPOTRegressor</code>. <strong>Note: although <code>SelectorMixin</code> is subclass of <code>TransformerMixin</code> in scikit-learn, but <code>Transformer</code> in this option excludes those subclasses of <code>SelectorMixin</code>.</strong></p>
 <pre><code class="Python">tpot_obj = TPOTClassifier(
                 template='Selector-Transformer-Classifier'
                 )
 </code></pre>
 
-<p>If a specific operator, e.g. <code>SelectPercentile</code>, is prefered to used in the 1st step of pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'.</p>
+<p>If a specific operator, e.g. <code>SelectPercentile</code>, is preferred for usage in the 1st step of the pipeline, the template can be defined like 'SelectPercentile-Transformer-Classifier'.</p>
 <h1 id="featuresetselector-in-tpot">FeatureSetSelector in TPOT</h1>
 <p><code>FeatureSetSelector</code> is a special new operator in TPOT. This operator enables feature selection based on <em>priori</em> export knowledge. For example, in RNA-seq gene expression analysis, this operator can be used to select one or more gene (feature) set(s) based on GO (Gene Ontology) terms or annotated gene sets Molecular Signatures Database (<a href="http://software.broadinstitute.org/gsea/msigdb/index.jsp">MSigDB</a>) in the 1st step of pipeline via <code>template</code> option above, in order to reduce dimensions and TPOT computation time. This operator requires a dataset list in csv format. In this csv file, there are only three columns: 1st column is feature set names, 2nd column is the total number of features in one set and 3rd column is a list of feature names (if input X is pandas.DataFrame) or indexes (if input X is numpy.ndarray) delimited by ";". Below is a example how to use this operator in TPOT.</p>
 <p>Please check our <a href="https://www.biorxiv.org/content/10.1101/502484v1.article-info">preprint paper</a> for more details.</p>
@@ -807,8 +801,9 @@ <h1 id="parallel-training-with-dask">Parallel Training with Dask</h1>
     </span>
 </div>
     <script>var base_url = '..';</script>
-    <script src="../js/theme.js" defer></script>
-      <script src="../search/main.js" defer></script>
+    <script src="../js/theme.js"></script>
+      <script src="../search/require.js"></script>
+      <script src="../search/search.js"></script>
 
 </body>
 </html>
diff --git a/docs_sources/releases.md b/docs_sources/releases.md
index 6aa77a11..c0fb1c73 100644
--- a/docs_sources/releases.md
+++ b/docs_sources/releases.md
@@ -1,15 +1,17 @@
 # Version 0.11.0
+
 - **Support for Python 3.4 and below has been officially dropped.** Also support for scikit-learn 0.20 or below has been dropped.
 - The support of a metric function with the signature `score_func(y_true, y_pred)` for `scoring parameter` has been dropped.
 - Refine `StackingEstimator` for not stacking NaN/Infinity predication probabilities.
 - Fix a bug that population doesn't persist by `warm_start=True` when `max_time_mins` is not default value.
-- Now the `random_state` parameter in TPOT is used for pipeline evaluation instead of using a fixed random seed of 42 before. The `set_param_recursive` function has been moved to export_utils.py and it can be used in exported codes for setting `random_state` recursively in scikit-learn Pipeline. It is used to set `random_state` in `fitted_pipeline_` attribute and exported pipelines.
+- Now the `random_state` parameter in TPOT is used for pipeline evaluation instead of using a fixed random seed of 42 before. The `set_param_recursive` function has been moved to `export_utils.py` and it can be used in exported codes for setting `random_state` recursively in scikit-learn Pipeline. It is used to set `random_state` in `fitted_pipeline_` attribute and exported pipelines.
 - TPOT can independently use `generations` and `max_time_mins` to limit the optimization process through using one of the parameters or both.
 - `.export()` function will return string of exported pipeline if output filename is not specified.
 - Add [`SGDClassifier`](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html) and [`SGDRegressor`](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html) into TPOT default configs.
-- Fix errors in documentation
+- Documentation has been updated.
 
 # Version 0.10.2
+
 - **TPOT v0.10.2 is the last version to support Python 2.7 and Python 3.4.**
 - Minor updates for fixing compatibility issues with the latest version of scikit-learn (version > 0.21) and xgboost (v0.90)
 - Default value of `template` parameter is changed to `None` instead.

From df15e742fec73f996c2670dd6157201ff2706f88 Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 15:29:26 -0500
Subject: [PATCH 41/44] update docs

---
 docs/index.html               | 2 +-
 docs/releases/index.html      | 1 +
 docs/search/search_index.json | 4 ++--
 docs_sources/releases.md      | 1 +
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/index.html b/docs/index.html
index d09b60ed..971d6109 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -213,5 +213,5 @@
 
 <!--
 MkDocs version : 0.17.2
-Build Date UTC : 2019-11-05 20:18:31
+Build Date UTC : 2019-11-05 20:29:06
 -->
diff --git a/docs/releases/index.html b/docs/releases/index.html
index 2bbbcab6..087db602 100644
--- a/docs/releases/index.html
+++ b/docs/releases/index.html
@@ -191,6 +191,7 @@ <h1 id="version-0110">Version 0.11.0</h1>
 <li><code>.export()</code> function will return string of exported pipeline if output filename is not specified.</li>
 <li>Add <a href="https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html"><code>SGDClassifier</code></a> and <a href="https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html"><code>SGDRegressor</code></a> into TPOT default configs.</li>
 <li>Documentation has been updated.</li>
+<li>Fix minor bugs.</li>
 </ul>
 <h1 id="version-0102">Version 0.10.2</h1>
 <ul>
diff --git a/docs/search/search_index.json b/docs/search/search_index.json
index c76d2153..4e1ef841 100644
--- a/docs/search/search_index.json
+++ b/docs/search/search_index.json
@@ -152,12 +152,12 @@
         },
         {
             "location": "/releases/",
-            "text": "Version 0.11.0\n\n\n\n\nSupport for Python 3.4 and below has been officially dropped.\n Also support for scikit-learn 0.20 or below has been dropped.\n\n\nThe support of a metric function with the signature \nscore_func(y_true, y_pred)\n for \nscoring parameter\n has been dropped.\n\n\nRefine \nStackingEstimator\n for not stacking NaN/Infinity predication probabilities.\n\n\nFix a bug that population doesn't persist by \nwarm_start=True\n when \nmax_time_mins\n is not default value.\n\n\nNow the \nrandom_state\n parameter in TPOT is used for pipeline evaluation instead of using a fixed random seed of 42 before. The \nset_param_recursive\n function has been moved to \nexport_utils.py\n and it can be used in exported codes for setting \nrandom_state\n recursively in scikit-learn Pipeline. It is used to set \nrandom_state\n in \nfitted_pipeline_\n attribute and exported pipelines.\n\n\nTPOT can independently use \ngenerations\n and \nmax_time_mins\n to limit the optimization process through using one of the parameters or both.\n\n\n.export()\n function will return string of exported pipeline if output filename is not specified.\n\n\nAdd \nSGDClassifier\n and \nSGDRegressor\n into TPOT default configs.\n\n\nDocumentation has been updated.\n\n\n\n\nVersion 0.10.2\n\n\n\n\nTPOT v0.10.2 is the last version to support Python 2.7 and Python 3.4.\n\n\nMinor updates for fixing compatibility issues with the latest version of scikit-learn (version > 0.21) and xgboost (v0.90)\n\n\nDefault value of \ntemplate\n parameter is changed to \nNone\n instead.\n\n\nFix errors in documentation\n\n\n\n\nVersion 0.10.1\n\n\n\n\nAdd \ndata_file_path\n option into \nexpert\n function for replacing \n'PATH/TO/DATA/FILE'\n to customized dataset path in exported scripts. (Related issue #838)\n\n\nChange python version in CI tests to 3.7\n\n\nAdd CI tests for macOS.\n\n\n\n\nVersion 0.10.0\n\n\n\n\nAdd a new \ntemplate\n option to specify a desired structure for machine learning pipeline in TPOT. Check \nTPOT API\n (it will be updated once it is merge to master branch).\n\n\nAdd \nFeatureSetSelector\n operator into TPOT for feature selection based on \npriori\n export knowledge. Please check our \npreprint paper\n for more details (\nNote: it was named \nDatasetSelector\n in 1st version paper but we will rename to FeatureSetSelector in next version of the paper\n)\n\n\nRefine \nn_jobs\n parameter to accept value below -1. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used.\n\n\nNow \nmemory\n  parameter can create memory cache directory if it does not exist.\n\n\nFix minor bugs.\n\n\n\n\nVersion 0.9.6\n\n\n\n\nFix a bug causing that \nmax_time_mins\n parameter doesn't work when \nuse_dask=True\n in TPOT 0.9.5\n\n\nNow TPOT saves best pareto values best pareto pipeline s in checkpoint folder\n\n\nTPOT raises \nImportError\n if operators in the TPOT configuration are not available when \nverbosity>2\n\n\nThank @PGijsbers for the suggestions. Now TPOT can save scores of individuals already evaluated in any generation even the evaluation process of that generation is interrupted/stopped. But it is noted that, in this case, TPOT will raise this \nwarning message\n: \nWARNING: TPOT may not provide a good pipeline if TPOT is stopped/interrupted in a early generation.\n, because the pipelines in early generation, e.g. 1st generation, are evolved/modified very limited times via evolutionary algorithm.\n\n\nFix bugs in configuration of \nTPOTRegressor\n\n\nError fixes in documentation\n\n\n\n\nVersion 0.9.5\n\n\n\n\n\n\nTPOT now supports integration with Dask for parallelization + smart caching\n. Big thanks to the Dask dev team for making this happen!\n\n\n\n\n\n\nTPOT now supports for imputation/sparse matrices into \npredict\n and \npredict_proba\n functions.\n\n\n\n\n\n\nTPOTClassifier\n and \nTPOTRegressor\n now follows scikit-learn estimator API.\n\n\n\n\n\n\nWe refined scoring parameter in TPOT API for accepting \nScorer\n object\n.\n\n\n\n\n\n\nWe refined parameters in VarianceThreshold and FeatureAgglomeration.\n\n\n\n\n\n\nTPOT now supports using memory caching within a Pipeline via a optional \nmemory\n parameter.\n\n\n\n\n\n\nWe improved documentation of TPOT.\n\n\n\n\n\n\nVersion 0.9\n\n\n\n\n\n\nTPOT now supports sparse matrices\n with a new built-in TPOT configuration, \"TPOT sparse\". We are using a custom OneHotEncoder implementation that supports missing values and continuous features.\n\n\n\n\n\n\nWe have added an \"early stopping\" option for stopping the optimization process if no improvement is made within a set number of generations. Look up the \nearly_stop\n parameter to access this functionality.\n\n\n\n\n\n\nTPOT now reduces the number of duplicated pipelines between generations, which saves you time during the optimization process.\n\n\n\n\n\n\nTPOT now supports custom scoring functions via the command-line mode.\n\n\n\n\n\n\nWe have added a new optional argument, \nperiodic_checkpoint_folder\n, that allows TPOT to periodically save the best pipeline so far to a local folder during optimization process.\n\n\n\n\n\n\nTPOT no longer uses \nsklearn.externals.joblib\n when \nn_jobs=1\n to avoid the potential freezing issue \nthat scikit-learn suffers from\n.\n\n\n\n\n\n\nWe have added \npandas\n as a dependency to read input datasets instead of \nnumpy.recfromcsv\n. NumPy's \nrecfromcsv\n function is unable to parse datasets with complex data types.\n\n\n\n\n\n\nFixed a bug that \nDEFAULT\n in the parameter(s) of nested estimator raises \nKeyError\n when exporting pipelines.\n\n\n\n\n\n\nFixed a bug related to setting \nrandom_state\n in nested estimators. The issue would happen with pipeline with \nSelectFromModel\n (\nExtraTreesClassifier\n as nested estimator) or \nStackingEstimator\n if nested estimator has \nrandom_state\n parameter.\n\n\n\n\n\n\nFixed a bug in the missing value imputation function in TPOT to impute along columns instead rows.\n\n\n\n\n\n\nRefined input checking for sparse matrices in TPOT.\n\n\n\n\n\n\nRefined the TPOT pipeline mutation operator.\n\n\n\n\n\n\nVersion 0.8\n\n\n\n\n\n\nTPOT now detects whether there are missing values in your dataset\n and replaces them with the median value of the column.\n\n\n\n\n\n\nTPOT now allows you to set a \ngroup\n parameter in the \nfit\n function so you can use the \nGroupKFold\n cross-validation strategy.\n\n\n\n\n\n\nTPOT now allows you to set a subsample ratio of the training instance with the \nsubsample\n parameter. For example, setting \nsubsample\n=0.5 tells TPOT to create a fixed subsample of half of the training data for the pipeline optimization process. This parameter can be useful for speeding up the pipeline optimization process, but may give less accurate performance estimates from cross-validation.\n\n\n\n\n\n\nTPOT now has more \nbuilt-in configurations\n, including TPOT MDR and TPOT light, for both classification and regression problems.\n\n\n\n\n\n\nTPOTClassifier\n and \nTPOTRegressor\n now expose three useful internal attributes, \nfitted_pipeline_\n, \npareto_front_fitted_pipelines_\n, and \nevaluated_individuals_\n. These attributes are described in the \nAPI documentation\n.\n\n\n\n\n\n\nOh, \nTPOT now has \nthorough API documentation\n. Check it out!\n\n\n\n\n\n\nFixed a reproducibility issue where setting \nrandom_seed\n didn't necessarily result in the same results every time. This bug was present since TPOT v0.7.\n\n\n\n\n\n\nRefined input checking in TPOT.\n\n\n\n\n\n\nRemoved Python 2 uncompliant code.\n\n\n\n\n\n\nVersion 0.7\n\n\n\n\n\n\nTPOT now has multiprocessing support.\n TPOT allows you to use multiple processes in parallel to accelerate the pipeline optimization process in TPOT with the \nn_jobs\n parameter.\n\n\n\n\n\n\nTPOT now allows you to \ncustomize the operators and parameters considered during the optimization process\n, which can be accomplished with the new \nconfig_dict\n parameter. The format of this customized dictionary can be found in the \nonline documentation\n, along with a list of \nbuilt-in configurations\n.\n\n\n\n\n\n\nTPOT now allows you to \nspecify a time limit for evaluating a single pipeline\n  (default limit is 5 minutes) in optimization process with the \nmax_eval_time_mins\n parameter, so TPOT won't spend hours evaluating overly-complex pipelines.\n\n\n\n\n\n\nWe tweaked TPOT's underlying evolutionary optimization algorithm to work even better, including using the \nmu+lambda algorithm\n. This algorithm gives you more control of how many pipelines are generated every iteration with the \noffspring_size\n parameter.\n\n\n\n\n\n\nRefined the default operators and parameters in TPOT, so TPOT 0.7 should work even better than 0.6.\n\n\n\n\n\n\nTPOT now supports sample weights in the fitness function if some if your samples are more important to classify correctly than others. The sample weights option works the same as in scikit-learn, e.g., \ntpot.fit(x_train, y_train, sample_weights=sample_weights)\n.\n\n\n\n\n\n\nThe default scoring metric in TPOT has been changed from balanced accuracy to accuracy, the same default metric for classification algorithms in scikit-learn. Balanced accuracy can still be used by setting \nscoring='balanced_accuracy'\n when creating a TPOT instance.\n\n\n\n\n\n\nVersion 0.6\n\n\n\n\n\n\nTPOT now supports regression problems!\n We have created two separate \nTPOTClassifier\n and \nTPOTRegressor\n classes to support classification and regression problems, respectively. The \ncommand-line interface\n also supports this feature through the \n-mode\n parameter.\n\n\n\n\n\n\nTPOT now allows you to \nspecify a time limit\n for the optimization process with the \nmax_time_mins\n parameter, so you don't need to guess how long TPOT will take any more to recommend a pipeline to you.\n\n\n\n\n\n\nAdded a new operator that performs feature selection using \nExtraTrees\n feature importance scores.\n\n\n\n\n\n\nXGBoost\n has been added as an optional dependency to TPOT.\n If you have XGBoost installed, TPOT will automatically detect your installation and use the \nXGBoostClassifier\n and \nXGBoostRegressor\n in its pipelines.\n\n\n\n\n\n\nTPOT now offers a verbosity level of 3 (\"science mode\"), which outputs the entire Pareto front instead of only the current best score. This feature may be useful for users looking to make a trade-off between pipeline complexity and score.\n\n\n\n\n\n\nVersion 0.5\n\n\n\n\nMajor refactor: Each operator is defined in a separate class file. Hooray for easier-to-maintain code!\n\n\nTPOT now \nexports directly to scikit-learn Pipelines\n instead of hacky code.\n\n\nInternal representation of individuals now uses scikit-learn pipelines.\n\n\nParameters for each operator have been optimized so TPOT spends less time exploring useless parameters.\n\n\nWe have removed pandas as a dependency and instead use numpy matrices to store the data.\n\n\nTPOT now uses \nk-fold cross-validation\n when evaluating pipelines, with a default k = 3. This k parameter can be tuned when creating a new TPOT instance.\n\n\nImproved \nscoring function support\n: Even though TPOT uses balanced accuracy by default, you can now have TPOT use \nany of the scoring functions\n that \ncross_val_score\n supports.\n\n\nAdded the scikit-learn \nNormalizer\n preprocessor.\n\n\nMinor text fixes.\n\n\n\n\nVersion 0.4\n\n\nIn TPOT 0.4, we've made some major changes to the internals of TPOT and added some convenience functions. We've summarized the changes below.\n\n\n\n\nAdded new sklearn models and preprocessors\n\n\n\n\nAdaBoostClassifier\n\n\nBernoulliNB\n\n\nExtraTreesClassifier\n\n\nGaussianNB\n\n\nMultinomialNB\n\n\nLinearSVC\n\n\nPassiveAggressiveClassifier\n\n\nGradientBoostingClassifier\n\n\nRBFSampler\n\n\nFastICA\n\n\nFeatureAgglomeration\n\n\nNystroem\n\n\n\n\nAdded operator that inserts virtual features for the count of features with values of zero\n\n\nReworked parameterization of TPOT operators\n\n\n\nReduced parameter search space with information from a scikit-learn benchmark\n\n\nTPOT no longer generates arbitrary parameter values, but uses a fixed parameter set instead\n\n\n\n\nRemoved XGBoost as a dependency\n\n\n\nToo many users were having install issues with XGBoost\n\n\nReplaced with scikit-learn's GradientBoostingClassifier\n\n\n\n\nImproved descriptiveness of TPOT command line parameter documentation\n\n\nRemoved min/max/avg details during fit() when verbosity > 1\n\n\n\n\nReplaced with tqdm progress bar\n\n\nAdded tqdm as a dependency\n\n\n\n\nAdded \nfit_predict()\n convenience function\n\n\nAdded \nget_params()\n function so TPOT can operate in scikit-learn's \ncross_val_score\n & related functions\n\n\n\n\n\nVersion 0.3\n\n\n\n\nWe revised the internal optimization process of TPOT to make it more efficient, in particular in regards to the model parameters that TPOT optimizes over.\n\n\n\n\nVersion 0.2\n\n\n\n\n\n\nTPOT now has the ability to export the optimized pipelines to sklearn code.\n\n\n\n\n\n\nLogistic regression, SVM, and k-nearest neighbors classifiers were added as pipeline operators. Previously, TPOT only included decision tree and random forest classifiers.\n\n\n\n\n\n\nTPOT can now use arbitrary scoring functions for the optimization process.\n\n\n\n\n\n\nTPOT now performs multi-objective Pareto optimization to balance model complexity (i.e., # of pipeline operators) and the score of the pipeline.\n\n\n\n\n\n\nVersion 0.1\n\n\n\n\n\n\nFirst public release of TPOT.\n\n\n\n\n\n\nOptimizes pipelines with decision trees and random forest classifiers as the model, and uses a handful of feature preprocessors.",
+            "text": "Version 0.11.0\n\n\n\n\nSupport for Python 3.4 and below has been officially dropped.\n Also support for scikit-learn 0.20 or below has been dropped.\n\n\nThe support of a metric function with the signature \nscore_func(y_true, y_pred)\n for \nscoring parameter\n has been dropped.\n\n\nRefine \nStackingEstimator\n for not stacking NaN/Infinity predication probabilities.\n\n\nFix a bug that population doesn't persist by \nwarm_start=True\n when \nmax_time_mins\n is not default value.\n\n\nNow the \nrandom_state\n parameter in TPOT is used for pipeline evaluation instead of using a fixed random seed of 42 before. The \nset_param_recursive\n function has been moved to \nexport_utils.py\n and it can be used in exported codes for setting \nrandom_state\n recursively in scikit-learn Pipeline. It is used to set \nrandom_state\n in \nfitted_pipeline_\n attribute and exported pipelines.\n\n\nTPOT can independently use \ngenerations\n and \nmax_time_mins\n to limit the optimization process through using one of the parameters or both.\n\n\n.export()\n function will return string of exported pipeline if output filename is not specified.\n\n\nAdd \nSGDClassifier\n and \nSGDRegressor\n into TPOT default configs.\n\n\nDocumentation has been updated.\n\n\nFix minor bugs.\n\n\n\n\nVersion 0.10.2\n\n\n\n\nTPOT v0.10.2 is the last version to support Python 2.7 and Python 3.4.\n\n\nMinor updates for fixing compatibility issues with the latest version of scikit-learn (version > 0.21) and xgboost (v0.90)\n\n\nDefault value of \ntemplate\n parameter is changed to \nNone\n instead.\n\n\nFix errors in documentation\n\n\n\n\nVersion 0.10.1\n\n\n\n\nAdd \ndata_file_path\n option into \nexpert\n function for replacing \n'PATH/TO/DATA/FILE'\n to customized dataset path in exported scripts. (Related issue #838)\n\n\nChange python version in CI tests to 3.7\n\n\nAdd CI tests for macOS.\n\n\n\n\nVersion 0.10.0\n\n\n\n\nAdd a new \ntemplate\n option to specify a desired structure for machine learning pipeline in TPOT. Check \nTPOT API\n (it will be updated once it is merge to master branch).\n\n\nAdd \nFeatureSetSelector\n operator into TPOT for feature selection based on \npriori\n export knowledge. Please check our \npreprint paper\n for more details (\nNote: it was named \nDatasetSelector\n in 1st version paper but we will rename to FeatureSetSelector in next version of the paper\n)\n\n\nRefine \nn_jobs\n parameter to accept value below -1. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used.\n\n\nNow \nmemory\n  parameter can create memory cache directory if it does not exist.\n\n\nFix minor bugs.\n\n\n\n\nVersion 0.9.6\n\n\n\n\nFix a bug causing that \nmax_time_mins\n parameter doesn't work when \nuse_dask=True\n in TPOT 0.9.5\n\n\nNow TPOT saves best pareto values best pareto pipeline s in checkpoint folder\n\n\nTPOT raises \nImportError\n if operators in the TPOT configuration are not available when \nverbosity>2\n\n\nThank @PGijsbers for the suggestions. Now TPOT can save scores of individuals already evaluated in any generation even the evaluation process of that generation is interrupted/stopped. But it is noted that, in this case, TPOT will raise this \nwarning message\n: \nWARNING: TPOT may not provide a good pipeline if TPOT is stopped/interrupted in a early generation.\n, because the pipelines in early generation, e.g. 1st generation, are evolved/modified very limited times via evolutionary algorithm.\n\n\nFix bugs in configuration of \nTPOTRegressor\n\n\nError fixes in documentation\n\n\n\n\nVersion 0.9.5\n\n\n\n\n\n\nTPOT now supports integration with Dask for parallelization + smart caching\n. Big thanks to the Dask dev team for making this happen!\n\n\n\n\n\n\nTPOT now supports for imputation/sparse matrices into \npredict\n and \npredict_proba\n functions.\n\n\n\n\n\n\nTPOTClassifier\n and \nTPOTRegressor\n now follows scikit-learn estimator API.\n\n\n\n\n\n\nWe refined scoring parameter in TPOT API for accepting \nScorer\n object\n.\n\n\n\n\n\n\nWe refined parameters in VarianceThreshold and FeatureAgglomeration.\n\n\n\n\n\n\nTPOT now supports using memory caching within a Pipeline via a optional \nmemory\n parameter.\n\n\n\n\n\n\nWe improved documentation of TPOT.\n\n\n\n\n\n\nVersion 0.9\n\n\n\n\n\n\nTPOT now supports sparse matrices\n with a new built-in TPOT configuration, \"TPOT sparse\". We are using a custom OneHotEncoder implementation that supports missing values and continuous features.\n\n\n\n\n\n\nWe have added an \"early stopping\" option for stopping the optimization process if no improvement is made within a set number of generations. Look up the \nearly_stop\n parameter to access this functionality.\n\n\n\n\n\n\nTPOT now reduces the number of duplicated pipelines between generations, which saves you time during the optimization process.\n\n\n\n\n\n\nTPOT now supports custom scoring functions via the command-line mode.\n\n\n\n\n\n\nWe have added a new optional argument, \nperiodic_checkpoint_folder\n, that allows TPOT to periodically save the best pipeline so far to a local folder during optimization process.\n\n\n\n\n\n\nTPOT no longer uses \nsklearn.externals.joblib\n when \nn_jobs=1\n to avoid the potential freezing issue \nthat scikit-learn suffers from\n.\n\n\n\n\n\n\nWe have added \npandas\n as a dependency to read input datasets instead of \nnumpy.recfromcsv\n. NumPy's \nrecfromcsv\n function is unable to parse datasets with complex data types.\n\n\n\n\n\n\nFixed a bug that \nDEFAULT\n in the parameter(s) of nested estimator raises \nKeyError\n when exporting pipelines.\n\n\n\n\n\n\nFixed a bug related to setting \nrandom_state\n in nested estimators. The issue would happen with pipeline with \nSelectFromModel\n (\nExtraTreesClassifier\n as nested estimator) or \nStackingEstimator\n if nested estimator has \nrandom_state\n parameter.\n\n\n\n\n\n\nFixed a bug in the missing value imputation function in TPOT to impute along columns instead rows.\n\n\n\n\n\n\nRefined input checking for sparse matrices in TPOT.\n\n\n\n\n\n\nRefined the TPOT pipeline mutation operator.\n\n\n\n\n\n\nVersion 0.8\n\n\n\n\n\n\nTPOT now detects whether there are missing values in your dataset\n and replaces them with the median value of the column.\n\n\n\n\n\n\nTPOT now allows you to set a \ngroup\n parameter in the \nfit\n function so you can use the \nGroupKFold\n cross-validation strategy.\n\n\n\n\n\n\nTPOT now allows you to set a subsample ratio of the training instance with the \nsubsample\n parameter. For example, setting \nsubsample\n=0.5 tells TPOT to create a fixed subsample of half of the training data for the pipeline optimization process. This parameter can be useful for speeding up the pipeline optimization process, but may give less accurate performance estimates from cross-validation.\n\n\n\n\n\n\nTPOT now has more \nbuilt-in configurations\n, including TPOT MDR and TPOT light, for both classification and regression problems.\n\n\n\n\n\n\nTPOTClassifier\n and \nTPOTRegressor\n now expose three useful internal attributes, \nfitted_pipeline_\n, \npareto_front_fitted_pipelines_\n, and \nevaluated_individuals_\n. These attributes are described in the \nAPI documentation\n.\n\n\n\n\n\n\nOh, \nTPOT now has \nthorough API documentation\n. Check it out!\n\n\n\n\n\n\nFixed a reproducibility issue where setting \nrandom_seed\n didn't necessarily result in the same results every time. This bug was present since TPOT v0.7.\n\n\n\n\n\n\nRefined input checking in TPOT.\n\n\n\n\n\n\nRemoved Python 2 uncompliant code.\n\n\n\n\n\n\nVersion 0.7\n\n\n\n\n\n\nTPOT now has multiprocessing support.\n TPOT allows you to use multiple processes in parallel to accelerate the pipeline optimization process in TPOT with the \nn_jobs\n parameter.\n\n\n\n\n\n\nTPOT now allows you to \ncustomize the operators and parameters considered during the optimization process\n, which can be accomplished with the new \nconfig_dict\n parameter. The format of this customized dictionary can be found in the \nonline documentation\n, along with a list of \nbuilt-in configurations\n.\n\n\n\n\n\n\nTPOT now allows you to \nspecify a time limit for evaluating a single pipeline\n  (default limit is 5 minutes) in optimization process with the \nmax_eval_time_mins\n parameter, so TPOT won't spend hours evaluating overly-complex pipelines.\n\n\n\n\n\n\nWe tweaked TPOT's underlying evolutionary optimization algorithm to work even better, including using the \nmu+lambda algorithm\n. This algorithm gives you more control of how many pipelines are generated every iteration with the \noffspring_size\n parameter.\n\n\n\n\n\n\nRefined the default operators and parameters in TPOT, so TPOT 0.7 should work even better than 0.6.\n\n\n\n\n\n\nTPOT now supports sample weights in the fitness function if some if your samples are more important to classify correctly than others. The sample weights option works the same as in scikit-learn, e.g., \ntpot.fit(x_train, y_train, sample_weights=sample_weights)\n.\n\n\n\n\n\n\nThe default scoring metric in TPOT has been changed from balanced accuracy to accuracy, the same default metric for classification algorithms in scikit-learn. Balanced accuracy can still be used by setting \nscoring='balanced_accuracy'\n when creating a TPOT instance.\n\n\n\n\n\n\nVersion 0.6\n\n\n\n\n\n\nTPOT now supports regression problems!\n We have created two separate \nTPOTClassifier\n and \nTPOTRegressor\n classes to support classification and regression problems, respectively. The \ncommand-line interface\n also supports this feature through the \n-mode\n parameter.\n\n\n\n\n\n\nTPOT now allows you to \nspecify a time limit\n for the optimization process with the \nmax_time_mins\n parameter, so you don't need to guess how long TPOT will take any more to recommend a pipeline to you.\n\n\n\n\n\n\nAdded a new operator that performs feature selection using \nExtraTrees\n feature importance scores.\n\n\n\n\n\n\nXGBoost\n has been added as an optional dependency to TPOT.\n If you have XGBoost installed, TPOT will automatically detect your installation and use the \nXGBoostClassifier\n and \nXGBoostRegressor\n in its pipelines.\n\n\n\n\n\n\nTPOT now offers a verbosity level of 3 (\"science mode\"), which outputs the entire Pareto front instead of only the current best score. This feature may be useful for users looking to make a trade-off between pipeline complexity and score.\n\n\n\n\n\n\nVersion 0.5\n\n\n\n\nMajor refactor: Each operator is defined in a separate class file. Hooray for easier-to-maintain code!\n\n\nTPOT now \nexports directly to scikit-learn Pipelines\n instead of hacky code.\n\n\nInternal representation of individuals now uses scikit-learn pipelines.\n\n\nParameters for each operator have been optimized so TPOT spends less time exploring useless parameters.\n\n\nWe have removed pandas as a dependency and instead use numpy matrices to store the data.\n\n\nTPOT now uses \nk-fold cross-validation\n when evaluating pipelines, with a default k = 3. This k parameter can be tuned when creating a new TPOT instance.\n\n\nImproved \nscoring function support\n: Even though TPOT uses balanced accuracy by default, you can now have TPOT use \nany of the scoring functions\n that \ncross_val_score\n supports.\n\n\nAdded the scikit-learn \nNormalizer\n preprocessor.\n\n\nMinor text fixes.\n\n\n\n\nVersion 0.4\n\n\nIn TPOT 0.4, we've made some major changes to the internals of TPOT and added some convenience functions. We've summarized the changes below.\n\n\n\n\nAdded new sklearn models and preprocessors\n\n\n\n\nAdaBoostClassifier\n\n\nBernoulliNB\n\n\nExtraTreesClassifier\n\n\nGaussianNB\n\n\nMultinomialNB\n\n\nLinearSVC\n\n\nPassiveAggressiveClassifier\n\n\nGradientBoostingClassifier\n\n\nRBFSampler\n\n\nFastICA\n\n\nFeatureAgglomeration\n\n\nNystroem\n\n\n\n\nAdded operator that inserts virtual features for the count of features with values of zero\n\n\nReworked parameterization of TPOT operators\n\n\n\nReduced parameter search space with information from a scikit-learn benchmark\n\n\nTPOT no longer generates arbitrary parameter values, but uses a fixed parameter set instead\n\n\n\n\nRemoved XGBoost as a dependency\n\n\n\nToo many users were having install issues with XGBoost\n\n\nReplaced with scikit-learn's GradientBoostingClassifier\n\n\n\n\nImproved descriptiveness of TPOT command line parameter documentation\n\n\nRemoved min/max/avg details during fit() when verbosity > 1\n\n\n\n\nReplaced with tqdm progress bar\n\n\nAdded tqdm as a dependency\n\n\n\n\nAdded \nfit_predict()\n convenience function\n\n\nAdded \nget_params()\n function so TPOT can operate in scikit-learn's \ncross_val_score\n & related functions\n\n\n\n\n\nVersion 0.3\n\n\n\n\nWe revised the internal optimization process of TPOT to make it more efficient, in particular in regards to the model parameters that TPOT optimizes over.\n\n\n\n\nVersion 0.2\n\n\n\n\n\n\nTPOT now has the ability to export the optimized pipelines to sklearn code.\n\n\n\n\n\n\nLogistic regression, SVM, and k-nearest neighbors classifiers were added as pipeline operators. Previously, TPOT only included decision tree and random forest classifiers.\n\n\n\n\n\n\nTPOT can now use arbitrary scoring functions for the optimization process.\n\n\n\n\n\n\nTPOT now performs multi-objective Pareto optimization to balance model complexity (i.e., # of pipeline operators) and the score of the pipeline.\n\n\n\n\n\n\nVersion 0.1\n\n\n\n\n\n\nFirst public release of TPOT.\n\n\n\n\n\n\nOptimizes pipelines with decision trees and random forest classifiers as the model, and uses a handful of feature preprocessors.",
             "title": "Release Notes"
         },
         {
             "location": "/releases/#version-0110",
-            "text": "Support for Python 3.4 and below has been officially dropped.  Also support for scikit-learn 0.20 or below has been dropped.  The support of a metric function with the signature  score_func(y_true, y_pred)  for  scoring parameter  has been dropped.  Refine  StackingEstimator  for not stacking NaN/Infinity predication probabilities.  Fix a bug that population doesn't persist by  warm_start=True  when  max_time_mins  is not default value.  Now the  random_state  parameter in TPOT is used for pipeline evaluation instead of using a fixed random seed of 42 before. The  set_param_recursive  function has been moved to  export_utils.py  and it can be used in exported codes for setting  random_state  recursively in scikit-learn Pipeline. It is used to set  random_state  in  fitted_pipeline_  attribute and exported pipelines.  TPOT can independently use  generations  and  max_time_mins  to limit the optimization process through using one of the parameters or both.  .export()  function will return string of exported pipeline if output filename is not specified.  Add  SGDClassifier  and  SGDRegressor  into TPOT default configs.  Documentation has been updated.",
+            "text": "Support for Python 3.4 and below has been officially dropped.  Also support for scikit-learn 0.20 or below has been dropped.  The support of a metric function with the signature  score_func(y_true, y_pred)  for  scoring parameter  has been dropped.  Refine  StackingEstimator  for not stacking NaN/Infinity predication probabilities.  Fix a bug that population doesn't persist by  warm_start=True  when  max_time_mins  is not default value.  Now the  random_state  parameter in TPOT is used for pipeline evaluation instead of using a fixed random seed of 42 before. The  set_param_recursive  function has been moved to  export_utils.py  and it can be used in exported codes for setting  random_state  recursively in scikit-learn Pipeline. It is used to set  random_state  in  fitted_pipeline_  attribute and exported pipelines.  TPOT can independently use  generations  and  max_time_mins  to limit the optimization process through using one of the parameters or both.  .export()  function will return string of exported pipeline if output filename is not specified.  Add  SGDClassifier  and  SGDRegressor  into TPOT default configs.  Documentation has been updated.  Fix minor bugs.",
             "title": "Version 0.11.0"
         },
         {
diff --git a/docs_sources/releases.md b/docs_sources/releases.md
index c0fb1c73..b24ca4d9 100644
--- a/docs_sources/releases.md
+++ b/docs_sources/releases.md
@@ -9,6 +9,7 @@
 - `.export()` function will return string of exported pipeline if output filename is not specified.
 - Add [`SGDClassifier`](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html) and [`SGDRegressor`](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html) into TPOT default configs.
 - Documentation has been updated.
+- Fix minor bugs.
 
 # Version 0.10.2
 

From c0ca7812eca8d756c4c00a4bed376dc115bebd5b Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 15:44:22 -0500
Subject: [PATCH 42/44] edit citing sections

---
 README.md                     | 14 ++++++++++++++
 docs/api/index.html           | 20 +++++++++++++++++---
 docs/citing/index.html        | 10 ++++++++++
 docs/index.html               |  2 +-
 docs/search/search_index.json |  8 ++++----
 docs_sources/api.md           | 20 +++++++++++++++++---
 docs_sources/citing.md        | 15 +++++++++++++++
 7 files changed, 78 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 02c6bc0b..4ea1269a 100644
--- a/README.md
+++ b/README.md
@@ -167,6 +167,20 @@ Please [check the existing open and closed issues](https://github.com/EpistasisL
 
 If you use TPOT in a scientific publication, please consider citing at least one of the following papers:
 
+Trang T. Le, Weixuan Fu and Jason H. Moore (2019). [Scaling tree-based automated machine learning to biomedical big data with a feature set selector](https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btz470/5511404). *Bioinformatics*. 2019 Jun 4.
+
+BibTeX entry:
+
+```bibtex
+@article{le2019scaling,
+  title={Scaling tree-based automated machine learning to biomedical big data with a feature set selector.},
+  author={Le, TT and Fu, W and Moore, JH},
+  journal={Bioinformatics (Oxford, England)},
+  year={2019}
+}
+```
+
+
 Randal S. Olson, Ryan J. Urbanowicz, Peter C. Andrews, Nicole A. Lavender, La Creis Kidd, and Jason H. Moore (2016). [Automating biomedical data science through tree-based pipeline optimization](http://link.springer.com/chapter/10.1007/978-3-319-31204-0_9). *Applications of Evolutionary Computation*, pages 123-137.
 
 BibTeX entry:
diff --git a/docs/api/index.html b/docs/api/index.html
index 0c6f1042..fd52b3ff 100644
--- a/docs/api/index.html
+++ b/docs/api/index.html
@@ -585,7 +585,7 @@ <h1 id="classification">Classification</h1>
 </div>
 
 <p><a name="tpotclassifier-export"></a></p>
-<pre><code class="Python">export(output_file_name)
+<pre><code class="Python">export(output_file_name, data_file_path)
 </code></pre>
 
 <div style="padding-left:5%" width="100%">
@@ -601,11 +601,18 @@ <h1 id="classification">Classification</h1>
 <blockquote>
 String containing the path and file name of the desired output file
 </blockquote>
+<strong>data_file_path</strong>: string
+<blockquote>
+By default, the path of input dataset is 'PATH/TO/DATA/FILE' by default. If data_file_path is another string, the path will be replaced.
+</blockquote>
 </tr>
 <tr>
 <td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Returns:</strong></td>
 <td width="80%" style="background:white;">
-Does not return anything
+<strong>exported_code_string</strong>: string
+<blockquote>
+The whole pipeline text as a string should be returned if output_file_name is not specified.
+</blockquote>
 </td>
 </tr>
 </table>
@@ -1036,11 +1043,18 @@ <h1 id="regression">Regression</h1>
 <blockquote>
 String containing the path and file name of the desired output file
 </blockquote>
+<strong>data_file_path</strong>: string
+<blockquote>
+By default, the path of input dataset is 'PATH/TO/DATA/FILE' by default. If data_file_path is another string, the path will be replaced.
+</blockquote>
 </tr>
 <tr>
 <td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Returns:</strong></td>
 <td width="80%" style="background:white;">
-Does not return anything
+<strong>exported_code_string</strong>: string
+<blockquote>
+The whole pipeline text as a string should be returned if output_file_name is not specified.
+</blockquote>
 </td>
 </tr>
 </table>
diff --git a/docs/citing/index.html b/docs/citing/index.html
index 2ed072e5..f91adf8d 100644
--- a/docs/citing/index.html
+++ b/docs/citing/index.html
@@ -136,6 +136,16 @@
             <div class="section">
               
                 <p>If you use TPOT in a scientific publication, please consider citing at least one of the following papers:</p>
+<p>Trang T. Le, Weixuan Fu and Jason H. Moore (2019). <a href="https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btz470/5511404">Scaling tree-based automated machine learning to biomedical big data with a feature set selector</a>. <em>Bioinformatics</em>. 2019 Jun 4.</p>
+<p>BibTeX entry:</p>
+<pre><code class="bibtex">@article{le2019scaling,
+  title={Scaling tree-based automated machine learning to biomedical big data with a feature set selector.},
+  author={Le, TT and Fu, W and Moore, JH},
+  journal={Bioinformatics (Oxford, England)},
+  year={2019}
+}
+</code></pre>
+
 <p>Randal S. Olson, Ryan J. Urbanowicz, Peter C. Andrews, Nicole A. Lavender, La Creis Kidd, and Jason H. Moore (2016). <a href="http://link.springer.com/chapter/10.1007/978-3-319-31204-0_9">Automating biomedical data science through tree-based pipeline optimization</a>. <em>Applications of Evolutionary Computation</em>, pages 123-137.</p>
 <p>BibTeX entry:</p>
 <pre><code class="bibtex">@inbook{Olson2016EvoBio,
diff --git a/docs/index.html b/docs/index.html
index 971d6109..aadc2af1 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -213,5 +213,5 @@
 
 <!--
 MkDocs version : 0.17.2
-Build Date UTC : 2019-11-05 20:29:06
+Build Date UTC : 2019-11-05 20:44:02
 -->
diff --git a/docs/search/search_index.json b/docs/search/search_index.json
index 4e1ef841..4fe79512 100644
--- a/docs/search/search_index.json
+++ b/docs/search/search_index.json
@@ -72,17 +72,17 @@
         },
         {
             "location": "/api/",
-            "text": "Classification\n\n\nclass\n tpot.\nTPOTClassifier\n(\ngenerations\n=100, \npopulation_size\n=100,\n                          \noffspring_size\n=None, \nmutation_rate\n=0.9,\n                          \ncrossover_rate\n=0.1,\n                          \nscoring\n='accuracy', \ncv\n=5,\n                          \nsubsample\n=1.0, \nn_jobs\n=1,\n                          \nmax_time_mins\n=None, \nmax_eval_time_mins\n=5,\n                          \nrandom_state\n=None, \nconfig_dict\n=None,\n                          \ntemplate\n=None,\n                          \nwarm_start\n=False,\n                          \nmemory\n=None,\n                          \nuse_dask\n=False,\n                          \nperiodic_checkpoint_folder\n=None,\n                          \nearly_stop\n=None,\n                          \nverbosity\n=0,\n                          \ndisable_update_check\n=False\n)\n\n\n\nsource\n\n\n\nAutomated machine learning for supervised classification tasks.\n\n\nThe TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models,\npreprocessors, feature selection techniques, and any other estimator or transformer that follows the \nscikit-learn API\n.\nThe TPOTClassifier will also search over the hyperparameters of all objects in the pipeline.\n\n\nBy default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters.\nHowever, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the \nconfig_dict\n parameter.\n\n\nRead more in the \nUser Guide\n.\n\n\n\n\n\n\nParameters:\n\n\n\n\ngenerations\n: int or None optional (default=100)\n\n\nNumber of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter \nmax_time_mins\n must be defined as the runtime limit.\n\n\nGenerally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.\n\n\nTPOT will evaluate \npopulation_size\n + \ngenerations\n \u00d7 \noffspring_size\n pipelines in total.\n\n\n\n\npopulation_size\n: int, optional (default=100)\n\n\nNumber of individuals to retain in the genetic programming population every generation. Must be a positive number.\n\n\nGenerally, TPOT will work better when you give it more individuals with which to optimize the pipeline.\n\n\n\n\noffspring_size\n: int, optional (default=None)\n\n\nNumber of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.\n\n\n\n\nmutation_rate\n: float, optional (default=0.9)\n\n\nMutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation.\n\n\n\nmutation_rate\n + \ncrossover_rate\n cannot exceed 1.0.\n\n\nWe recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.\n\n\n\n\ncrossover_rate\n: float, optional (default=0.1)\n\n\nCrossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation.\n\n\n\nmutation_rate\n + \ncrossover_rate\n cannot exceed 1.0.\n\n\nWe recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.\n\n\n\n\nscoring\n: string or callable, optional (default='accuracy')\n\n\nFunction used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used:\n\n\n'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision',\n'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc'\n\n\nIf you would like to use a custom scorer, you can pass the callable object/function with signature \nscorer(estimator, X, y)\n.\n\n\nSee the section on \nscoring functions\n for more details.\n\n\n\n\n\ncv\n: int, cross-validation generator, or an iterable, optional (default=5)\n\n\nCross-validation strategy used when evaluating pipelines.\n\n\nPossible inputs:\n\n\n\ninteger, to specify the number of folds in a StratifiedKFold,\n\n\nAn object to be used as a cross-validation generator, or\n\n\nAn iterable yielding train/test splits.\n\n\n\n\n\nsubsample\n: float, optional (default=1.0)\n\n\nFraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0].\n\n\nSetting \nsubsample\n=0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process.\n\n\n\n\nn_jobs\n: integer, optional (default=1)\n\n\nNumber of processes to use in parallel for evaluating pipelines during the TPOT optimization process.\n\n\nSetting \nn_jobs\n=-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets.\n\n\n\n\nmax_time_mins\n: integer or None, optional (default=None)\n\n\nHow many minutes TPOT has to optimize the pipeline.\n\n\nIf not None, this setting will allow TPOT to run until \nmax_time_mins\n minutes elapsed and then stop. TPOT will stop earlier if \ngenerations\n is set and all generations are already evaluated.\n\n\n\n\nmax_eval_time_mins\n: float, optional (default=5)\n\n\nHow many minutes TPOT has to evaluate a single pipeline.\n\n\nSetting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines.\n\n\n\n\nrandom_state\n: integer or None, optional (default=None)\n\n\nThe seed of the pseudo random number generator used in TPOT.\n\n\nUse this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed.\n\n\n\n\nconfig_dict\n: Python dictionary, string, or None, optional (default=None)\n\n\nA configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process.\n\n\nPossible inputs are:\n\n\n\nPython dictionary, TPOT will use your custom configuration,\n\n\nstring 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or\n\n\nstring 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or\n\n\nstring 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or\n\n\nNone, TPOT will use the default TPOTClassifier configuration.\n\n\n\nSee the \nbuilt-in configurations\n section for the list of configurations included with TPOT, and the \ncustom configuration\n section for more information and examples of how to create your own TPOT configurations.\n\n\n\n\ntemplate\n: string (default=None)\n\n\nTemplate of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT.\n\n\nSo far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly.\n\nSee the \n template option in tpot\n section for more details.\n\n\n\n\nwarm_start\n: boolean, optional (default=False)\n\n\nFlag indicating whether the TPOT instance will reuse the population from previous calls to \nfit()\n.\n\n\nSetting \nwarm_start\n=True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off.\n\n\n\n\nmemory\n: a joblib.Memory object or string, optional (default=None)\n\n\nIf supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in \nscikit-learn documentation\n\n\n\nPossible inputs are:\n\n\n\nString 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or\n\n\nPath of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or\n\n\nMemory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or\n\n\nNone, TPOT does not use memory caching.\n\n\n\n\n\n\n\nuse_dask\n: boolean, optional (default: False)\n\n\nWhether to use Dask-ML's pipeline optimiziations. This avoid re-fitting\nthe same estimator on the same split of data multiple times. It\nwill also provide more detailed diagnostics when using Dask's\ndistributed scheduler.\n\n\nSee \navoid repeated work\n for more details.\n\n\n\n\nperiodic_checkpoint_folder\n: path string, optional (default: None)\n\n\nIf supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.\n\nCurrently once per generation but not more often than once per 30 seconds.\n\nUseful in multiple cases:\n\n\n\nSudden death before TPOT could save optimized pipeline\n\n\nTrack its progress\n\n\nGrab pipelines while it's still optimizing\n\n\n\n\n\n\n\nearly_stop\n: integer, optional (default: None)\n\n\nHow many generations TPOT checks whether there is no improvement in optimization process.\n\n\nEnds the optimization process if there is no improvement in the given number of generations.\n\n\n\n\nverbosity\n: integer, optional (default=0)\n\n\nHow much information TPOT communicates while it's running.\n\n\nPossible inputs are:\n\n\n\n0, TPOT will print nothing,\n\n\n1, TPOT will print minimal information,\n\n\n2, TPOT will print more information and provide a progress bar, or\n\n\n3, TPOT will print everything and provide a progress bar.\n\n\n\n\n\n\n\ndisable_update_check\n: boolean, optional (default=False)\n\n\nFlag indicating whether the TPOT version checker should be disabled.\n\n\nThe update checker will tell you when a new version of TPOT has been released.\n\n\n\n\n\n\n\n\n\n\nAttributes:\n\n\n\n\nfitted_pipeline_\n: scikit-learn Pipeline object\n\n\nThe best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset.\n\n\n\n\npareto_front_fitted_pipelines_\n: Python dictionary\n\n\nDictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset.\n\n\nThe TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline.\n\n\nNote: \npareto_front_fitted_pipelines_\n is only available when \nverbosity\n=3.\n\n\n\n\nevaluated_individuals_\n: Python dictionary\n\n\nDictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline).\n\n\nThis attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated.\n\n\n\n\n\n\n\n\n\n\nExample\n\n\nfrom tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot = TPOTClassifier(generations=5, population_size=50, verbosity=2)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_digits_pipeline.py')\n\n\n\n\nFunctions\n\n\n\n\n\n\nfit\n(features, classes[, sample_weight, groups])\n\n\nRun the TPOT optimization process on the given training data.\n\n\n\n\n\n\n\npredict\n(features)\n\n\nUse the optimized pipeline to predict the classes for a feature set.\n\n\n\n\n\n\n\npredict_proba\n(features)\n\n\nUse the optimized pipeline to estimate the class probabilities for a feature set.\n\n\n\n\n\n\n\nscore\n(testing_features, testing_classes)\n\n\nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function.\n\n\n\n\n\n\n\nexport\n(output_file_name)\n\n\nExport the optimized pipeline as Python code.\n\n\n\n\n\n\n\n\n\nfit(features, classes, sample_weight=None, groups=None)\n\n\n\n\n\nRun the TPOT optimization process on the given training data.\n\n\nUses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\nfeatures\n: array-like {n_samples, n_features}\n\n\nFeature matrix\n\n\nTPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values.\nAs such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed)\nusing \nmedian value imputation\n.\n\n\nIf you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT.\n\n\n\n\nclasses\n: array-like {n_samples}\n\n\nList of class labels for prediction\n\n\n\n\nsample_weight\n: array-like {n_samples}, optional\n\n\nPer-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines.\n\n\n\n\ngroups\n: array-like, with shape {n_samples, }, optional\n\n\nGroup labels for the samples used when performing cross-validation.\n\n\nThis parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as \nsklearn.model_selection.GroupKFold\n.\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\nself\n: object\n\n\nReturns a copy of the fitted TPOT object\n\n\n\n\n\n\n\n\n\n\n\n\n\n\npredict(features)\n\n\n\n\n\nUse the optimized pipeline to predict the classes for a feature set.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\nfeatures\n: array-like {n_samples, n_features}\n\n\nFeature matrix\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\npredictions\n: array-like {n_samples}\n\n\nPredicted classes for the samples in the feature matrix\n\n\n\n\n\n\n\n\n\n\n\n\n\n\npredict_proba(features)\n\n\n\n\n\nUse the optimized pipeline to estimate the class probabilities for a feature set.\n\n\nNote: This function will only work for pipelines whose final classifier supports the \npredict_proba\n function. TPOT will raise an error otherwise.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\nfeatures\n: array-like {n_samples, n_features}\n\n\nFeature matrix\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\npredictions\n: array-like {n_samples, n_classes}\n\n\nThe class probabilities of the input samples\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nscore(testing_features, testing_classes)\n\n\n\n\n\nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function.\n\n\nThe default scoring function for TPOTClassifier is 'accuracy'.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\ntesting_features\n: array-like {n_samples, n_features}\n\n\nFeature matrix of the testing set\n\n\n\n\ntesting_classes\n: array-like {n_samples}\n\n\nList of class labels for prediction in the testing set\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\naccuracy_score\n: float\n\n\nThe estimated test set accuracy according to the user-specified scoring function.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nexport(output_file_name)\n\n\n\n\n\nExport the optimized pipeline as Python code.\n\n\nSee the \nusage documentation\n for example usage of the export function.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\noutput_file_name\n: string\n\n\nString containing the path and file name of the desired output file\n\n\n\n\n\n\n\nReturns:\n\n\n\nDoes not return anything\n\n\n\n\n\n\n\n\n\n\nRegression\n\n\nclass\n tpot.\nTPOTRegressor\n(\ngenerations\n=100, \npopulation_size\n=100,\n                         \noffspring_size\n=None, \nmutation_rate\n=0.9,\n                         \ncrossover_rate\n=0.1,\n                         \nscoring\n='neg_mean_squared_error', \ncv\n=5,\n                         \nsubsample\n=1.0, \nn_jobs\n=1,\n                         \nmax_time_mins\n=None, \nmax_eval_time_mins\n=5,\n                         \nrandom_state\n=None, \nconfig_dict\n=None,\n                         \ntemplate\n=None,\n                         \nwarm_start\n=False,\n                         \nmemory\n=None,\n                         \nuse_dask\n=False,\n                         \nperiodic_checkpoint_folder\n=None,\n                         \nearly_stop\n=None,\n                         \nverbosity\n=0,\n                         \ndisable_update_check\n=False\n)\n\n\n\nsource\n\n\n\nAutomated machine learning for supervised regression tasks.\n\n\nThe TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models,\npreprocessors, feature selection techniques, and any other estimator or transformer that follows the \nscikit-learn API\n.\nThe TPOTRegressor will also search over the hyperparameters of all objects in the pipeline.\n\n\nBy default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters.\nHowever, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the \nconfig_dict\n parameter.\n\n\nRead more in the \nUser Guide\n.\n\n\n\n\n\n\nParameters:\n\n\n\n\ngenerations\n: int or None, optional (default=100)\n\n\nNumber of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter \nmax_time_mins\n must be defined as the runtime limit.\n\n\nGenerally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.\n\n\nTPOT will evaluate \npopulation_size\n + \ngenerations\n \u00d7 \noffspring_size\n pipelines in total.\n\n\n\n\npopulation_size\n: int, optional (default=100)\n\n\nNumber of individuals to retain in the genetic programming population every generation. Must be a positive number.\n\n\nGenerally, TPOT will work better when you give it more individuals with which to optimize the pipeline.\n\n\n\n\noffspring_size\n: int, optional (default=None)\n\n\nNumber of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.\n\n\n\n\nmutation_rate\n: float, optional (default=0.9)\n\n\nMutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation.\n\n\n\nmutation_rate\n + \ncrossover_rate\n cannot exceed 1.0.\n\n\nWe recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.\n\n\n\n\ncrossover_rate\n: float, optional (default=0.1)\n\n\nCrossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation.\n\n\n\nmutation_rate\n + \ncrossover_rate\n cannot exceed 1.0.\n\n\nWe recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.\n\n\n\n\nscoring\n: string or callable, optional (default='neg_mean_squared_error')\n\n\nFunction used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used:\n\n\n'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2'\n\n\nNote that we recommend using the \nneg\n version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric.\n\n\nIf you would like to use a custom scorer, you can pass the callable object/function with signature \nscorer(estimator, X, y)\n.\n\n\nSee the section on \nscoring functions\n for more details.\n\n\n\n\ncv\n: int, cross-validation generator, or an iterable, optional (default=5)\n\n\nCross-validation strategy used when evaluating pipelines.\n\n\nPossible inputs:\n\n\n\ninteger, to specify the number of folds in a KFold,\n\n\nAn object to be used as a cross-validation generator, or\n\n\nAn iterable yielding train/test splits.\n\n\n\n\n\n\n\nsubsample\n: float, optional (default=1.0)\n\n\nFraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0].\n\n\nSetting \nsubsample\n=0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process.\n\n\n\n\nn_jobs\n: integer, optional (default=1)\n\n\nNumber of processes to use in parallel for evaluating pipelines during the TPOT optimization process.\n\n\nSetting \nn_jobs\n=-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets\n\n\n\n\nmax_time_mins\n: integer or None, optional (default=None)\n\n\nHow many minutes TPOT has to optimize the pipeline.\n\n\nIf not None, this setting will allow TPOT to run until \nmax_time_mins\n minutes elapsed and then stop. TPOT will stop earlier if \ngenerations\n is set and all generations are already evaluated.\n\n\n\n\nmax_eval_time_mins\n: float, optional (default=5)\n\n\nHow many minutes TPOT has to evaluate a single pipeline.\n\n\nSetting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines.\n\n\n\n\nrandom_state\n: integer or None, optional (default=None)\n\n\nThe seed of the pseudo random number generator used in TPOT.\n\n\nUse this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed.\n\n\n\n\nconfig_dict\n: Python dictionary, string, or None, optional (default=None)\n\n\nA configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process.\n\n\nPossible inputs are:\n\n\n\nPython dictionary, TPOT will use your custom configuration,\n\n\nstring 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or\n\n\nstring 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or\n\n\nstring 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or\n\n\nNone, TPOT will use the default TPOTRegressor configuration.\n\n\n\nSee the \nbuilt-in configurations\n section for the list of configurations included with TPOT, and the \ncustom configuration\n section for more information and examples of how to create your own TPOT configurations.\n\n\n\n\ntemplate\n: string (default=None)\n\n\nTemplate of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT.\n\n\nSo far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly.\n\nSee the \n template option in tpot\n section for more details.\n\n\n\n\nwarm_start\n: boolean, optional (default=False)\n\n\nFlag indicating whether the TPOT instance will reuse the population from previous calls to \nfit()\n.\n\n\nSetting \nwarm_start\n=True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off.\n\n\n\n\nmemory\n: a joblib.Memory object or string, optional (default=None)\n\n\nIf supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in \nscikit-learn documentation\n\n\n\nPossible inputs are:\n\n\n\nString 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or\n\n\nPath of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or\n\n\nMemory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or\n\n\nNone, TPOT does not use memory caching.\n\n\n\n\n\n\n\nuse_dask\n: boolean, optional (default: False)\n\n\nWhether to use Dask-ML's pipeline optimiziations. This avoid re-fitting\nthe same estimator on the same split of data multiple times. It\nwill also provide more detailed diagnostics when using Dask's\ndistributed scheduler.\n\n\nSee \navoid repeated work\n for more details.\n\n\n\n\nperiodic_checkpoint_folder\n: path string, optional (default: None)\n\n\nIf supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.\n\nCurrently once per generation but not more often than once per 30 seconds.\n\nUseful in multiple cases:\n\n\n\nSudden death before TPOT could save optimized pipeline\n\n\nTrack its progress\n\n\nGrab pipelines while it's still optimizing\n\n\n\n\n\n\n\nearly_stop\n: integer, optional (default: None)\n\n\nHow many generations TPOT checks whether there is no improvement in optimization process.\n\n\nEnds the optimization process if there is no improvement in the given number of generations.\n\n\n\n\nverbosity\n: integer, optional (default=0)\n\n\nHow much information TPOT communicates while it's running.\n\n\nPossible inputs are:\n\n\n\n0, TPOT will print nothing,\n\n\n1, TPOT will print minimal information,\n\n\n2, TPOT will print more information and provide a progress bar, or\n\n\n3, TPOT will print everything and provide a progress bar.\n\n\n\n\n\n\n\ndisable_update_check\n: boolean, optional (default=False)\n\n\nFlag indicating whether the TPOT version checker should be disabled.\n\n\nThe update checker will tell you when a new version of TPOT has been released.\n\n\n\n\n\n\n\n\n\n\nAttributes:\n\n\n\n\nfitted_pipeline_\n: scikit-learn Pipeline object\n\n\nThe best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset.\n\n\n\n\npareto_front_fitted_pipelines_\n: Python dictionary\n\n\nDictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset.\n\n\nThe TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline.\n\n\nNote: \n_pareto_front_fitted_pipelines\n is only available when \nverbosity\n=3.\n\n\n\n\nevaluated_individuals_\n: Python dictionary\n\n\nDictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline).\n\n\nThis attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated.\n\n\n\n\n\n\n\n\n\n\nExample\n\n\nfrom tpot import TPOTRegressor\nfrom sklearn.datasets import load_boston\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_boston()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot = TPOTRegressor(generations=5, population_size=50, verbosity=2)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_boston_pipeline.py')\n\n\n\n\nFunctions\n\n\n\n\n\n\nfit\n(features, target[, sample_weight, groups])\n\n\nRun the TPOT optimization process on the given training data.\n\n\n\n\n\n\n\npredict\n(features)\n\n\nUse the optimized pipeline to predict the target values for a feature set.\n\n\n\n\n\n\n\nscore\n(testing_features, testing_target)\n\n\nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function.\n\n\n\n\n\n\n\nexport\n(output_file_name)\n\n\nExport the optimized pipeline as Python code.\n\n\n\n\n\n\n\n\n\nfit(features, target, sample_weight=None, groups=None)\n\n\n\n\n\nRun the TPOT optimization process on the given training data.\n\n\nUses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\nfeatures\n: array-like {n_samples, n_features}\n\n\nFeature matrix\n\n\nTPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values.\nAs such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed)\nusing \nmedian value imputation\n.\n\n\nIf you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT.\n\n\n\n\ntarget\n: array-like {n_samples}\n\n\nList of target labels for prediction\n\n\n\n\nsample_weight\n: array-like {n_samples}, optional\n\n\nPer-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines.\n\n\n\n\ngroups\n: array-like, with shape {n_samples, }, optional\n\n\nGroup labels for the samples used when performing cross-validation.\n\n\nThis parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as \nsklearn.model_selection.GroupKFold\n.\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\nself\n: object\n\n\nReturns a copy of the fitted TPOT object\n\n\n\n\n\n\n\n\n\n\n\n\n\n\npredict(features)\n\n\n\n\n\nUse the optimized pipeline to predict the target values for a feature set.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\nfeatures\n: array-like {n_samples, n_features}\n\n\nFeature matrix\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\npredictions\n: array-like {n_samples}\n\n\nPredicted target values for the samples in the feature matrix\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nscore(testing_features, testing_target)\n\n\n\n\n\nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function.\n\n\nThe default scoring function for TPOTClassifier is 'mean_squared_error'.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\ntesting_features\n: array-like {n_samples, n_features}\n\n\nFeature matrix of the testing set\n\n\n\n\ntesting_target\n: array-like {n_samples}\n\n\nList of target labels for prediction in the testing set\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\naccuracy_score\n: float\n\n\nThe estimated test set accuracy according to the user-specified scoring function.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nexport(output_file_name)\n\n\n\n\n\nExport the optimized pipeline as Python code.\n\n\nSee the \nusage documentation\n for example usage of the export function.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\noutput_file_name\n: string\n\n\nString containing the path and file name of the desired output file\n\n\n\n\n\n\n\nReturns:\n\n\n\nDoes not return anything",
+            "text": "Classification\n\n\nclass\n tpot.\nTPOTClassifier\n(\ngenerations\n=100, \npopulation_size\n=100,\n                          \noffspring_size\n=None, \nmutation_rate\n=0.9,\n                          \ncrossover_rate\n=0.1,\n                          \nscoring\n='accuracy', \ncv\n=5,\n                          \nsubsample\n=1.0, \nn_jobs\n=1,\n                          \nmax_time_mins\n=None, \nmax_eval_time_mins\n=5,\n                          \nrandom_state\n=None, \nconfig_dict\n=None,\n                          \ntemplate\n=None,\n                          \nwarm_start\n=False,\n                          \nmemory\n=None,\n                          \nuse_dask\n=False,\n                          \nperiodic_checkpoint_folder\n=None,\n                          \nearly_stop\n=None,\n                          \nverbosity\n=0,\n                          \ndisable_update_check\n=False\n)\n\n\n\nsource\n\n\n\nAutomated machine learning for supervised classification tasks.\n\n\nThe TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models,\npreprocessors, feature selection techniques, and any other estimator or transformer that follows the \nscikit-learn API\n.\nThe TPOTClassifier will also search over the hyperparameters of all objects in the pipeline.\n\n\nBy default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters.\nHowever, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the \nconfig_dict\n parameter.\n\n\nRead more in the \nUser Guide\n.\n\n\n\n\n\n\nParameters:\n\n\n\n\ngenerations\n: int or None optional (default=100)\n\n\nNumber of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter \nmax_time_mins\n must be defined as the runtime limit.\n\n\nGenerally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.\n\n\nTPOT will evaluate \npopulation_size\n + \ngenerations\n \u00d7 \noffspring_size\n pipelines in total.\n\n\n\n\npopulation_size\n: int, optional (default=100)\n\n\nNumber of individuals to retain in the genetic programming population every generation. Must be a positive number.\n\n\nGenerally, TPOT will work better when you give it more individuals with which to optimize the pipeline.\n\n\n\n\noffspring_size\n: int, optional (default=None)\n\n\nNumber of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.\n\n\n\n\nmutation_rate\n: float, optional (default=0.9)\n\n\nMutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation.\n\n\n\nmutation_rate\n + \ncrossover_rate\n cannot exceed 1.0.\n\n\nWe recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.\n\n\n\n\ncrossover_rate\n: float, optional (default=0.1)\n\n\nCrossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation.\n\n\n\nmutation_rate\n + \ncrossover_rate\n cannot exceed 1.0.\n\n\nWe recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.\n\n\n\n\nscoring\n: string or callable, optional (default='accuracy')\n\n\nFunction used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used:\n\n\n'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision',\n'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc'\n\n\nIf you would like to use a custom scorer, you can pass the callable object/function with signature \nscorer(estimator, X, y)\n.\n\n\nSee the section on \nscoring functions\n for more details.\n\n\n\n\n\ncv\n: int, cross-validation generator, or an iterable, optional (default=5)\n\n\nCross-validation strategy used when evaluating pipelines.\n\n\nPossible inputs:\n\n\n\ninteger, to specify the number of folds in a StratifiedKFold,\n\n\nAn object to be used as a cross-validation generator, or\n\n\nAn iterable yielding train/test splits.\n\n\n\n\n\nsubsample\n: float, optional (default=1.0)\n\n\nFraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0].\n\n\nSetting \nsubsample\n=0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process.\n\n\n\n\nn_jobs\n: integer, optional (default=1)\n\n\nNumber of processes to use in parallel for evaluating pipelines during the TPOT optimization process.\n\n\nSetting \nn_jobs\n=-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets.\n\n\n\n\nmax_time_mins\n: integer or None, optional (default=None)\n\n\nHow many minutes TPOT has to optimize the pipeline.\n\n\nIf not None, this setting will allow TPOT to run until \nmax_time_mins\n minutes elapsed and then stop. TPOT will stop earlier if \ngenerations\n is set and all generations are already evaluated.\n\n\n\n\nmax_eval_time_mins\n: float, optional (default=5)\n\n\nHow many minutes TPOT has to evaluate a single pipeline.\n\n\nSetting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines.\n\n\n\n\nrandom_state\n: integer or None, optional (default=None)\n\n\nThe seed of the pseudo random number generator used in TPOT.\n\n\nUse this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed.\n\n\n\n\nconfig_dict\n: Python dictionary, string, or None, optional (default=None)\n\n\nA configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process.\n\n\nPossible inputs are:\n\n\n\nPython dictionary, TPOT will use your custom configuration,\n\n\nstring 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or\n\n\nstring 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or\n\n\nstring 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or\n\n\nNone, TPOT will use the default TPOTClassifier configuration.\n\n\n\nSee the \nbuilt-in configurations\n section for the list of configurations included with TPOT, and the \ncustom configuration\n section for more information and examples of how to create your own TPOT configurations.\n\n\n\n\ntemplate\n: string (default=None)\n\n\nTemplate of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT.\n\n\nSo far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly.\n\nSee the \n template option in tpot\n section for more details.\n\n\n\n\nwarm_start\n: boolean, optional (default=False)\n\n\nFlag indicating whether the TPOT instance will reuse the population from previous calls to \nfit()\n.\n\n\nSetting \nwarm_start\n=True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off.\n\n\n\n\nmemory\n: a joblib.Memory object or string, optional (default=None)\n\n\nIf supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in \nscikit-learn documentation\n\n\n\nPossible inputs are:\n\n\n\nString 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or\n\n\nPath of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or\n\n\nMemory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or\n\n\nNone, TPOT does not use memory caching.\n\n\n\n\n\n\n\nuse_dask\n: boolean, optional (default: False)\n\n\nWhether to use Dask-ML's pipeline optimiziations. This avoid re-fitting\nthe same estimator on the same split of data multiple times. It\nwill also provide more detailed diagnostics when using Dask's\ndistributed scheduler.\n\n\nSee \navoid repeated work\n for more details.\n\n\n\n\nperiodic_checkpoint_folder\n: path string, optional (default: None)\n\n\nIf supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.\n\nCurrently once per generation but not more often than once per 30 seconds.\n\nUseful in multiple cases:\n\n\n\nSudden death before TPOT could save optimized pipeline\n\n\nTrack its progress\n\n\nGrab pipelines while it's still optimizing\n\n\n\n\n\n\n\nearly_stop\n: integer, optional (default: None)\n\n\nHow many generations TPOT checks whether there is no improvement in optimization process.\n\n\nEnds the optimization process if there is no improvement in the given number of generations.\n\n\n\n\nverbosity\n: integer, optional (default=0)\n\n\nHow much information TPOT communicates while it's running.\n\n\nPossible inputs are:\n\n\n\n0, TPOT will print nothing,\n\n\n1, TPOT will print minimal information,\n\n\n2, TPOT will print more information and provide a progress bar, or\n\n\n3, TPOT will print everything and provide a progress bar.\n\n\n\n\n\n\n\ndisable_update_check\n: boolean, optional (default=False)\n\n\nFlag indicating whether the TPOT version checker should be disabled.\n\n\nThe update checker will tell you when a new version of TPOT has been released.\n\n\n\n\n\n\n\n\n\n\nAttributes:\n\n\n\n\nfitted_pipeline_\n: scikit-learn Pipeline object\n\n\nThe best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset.\n\n\n\n\npareto_front_fitted_pipelines_\n: Python dictionary\n\n\nDictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset.\n\n\nThe TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline.\n\n\nNote: \npareto_front_fitted_pipelines_\n is only available when \nverbosity\n=3.\n\n\n\n\nevaluated_individuals_\n: Python dictionary\n\n\nDictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline).\n\n\nThis attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated.\n\n\n\n\n\n\n\n\n\n\nExample\n\n\nfrom tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot = TPOTClassifier(generations=5, population_size=50, verbosity=2)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_digits_pipeline.py')\n\n\n\n\nFunctions\n\n\n\n\n\n\nfit\n(features, classes[, sample_weight, groups])\n\n\nRun the TPOT optimization process on the given training data.\n\n\n\n\n\n\n\npredict\n(features)\n\n\nUse the optimized pipeline to predict the classes for a feature set.\n\n\n\n\n\n\n\npredict_proba\n(features)\n\n\nUse the optimized pipeline to estimate the class probabilities for a feature set.\n\n\n\n\n\n\n\nscore\n(testing_features, testing_classes)\n\n\nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function.\n\n\n\n\n\n\n\nexport\n(output_file_name)\n\n\nExport the optimized pipeline as Python code.\n\n\n\n\n\n\n\n\n\nfit(features, classes, sample_weight=None, groups=None)\n\n\n\n\n\nRun the TPOT optimization process on the given training data.\n\n\nUses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\nfeatures\n: array-like {n_samples, n_features}\n\n\nFeature matrix\n\n\nTPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values.\nAs such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed)\nusing \nmedian value imputation\n.\n\n\nIf you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT.\n\n\n\n\nclasses\n: array-like {n_samples}\n\n\nList of class labels for prediction\n\n\n\n\nsample_weight\n: array-like {n_samples}, optional\n\n\nPer-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines.\n\n\n\n\ngroups\n: array-like, with shape {n_samples, }, optional\n\n\nGroup labels for the samples used when performing cross-validation.\n\n\nThis parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as \nsklearn.model_selection.GroupKFold\n.\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\nself\n: object\n\n\nReturns a copy of the fitted TPOT object\n\n\n\n\n\n\n\n\n\n\n\n\n\n\npredict(features)\n\n\n\n\n\nUse the optimized pipeline to predict the classes for a feature set.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\nfeatures\n: array-like {n_samples, n_features}\n\n\nFeature matrix\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\npredictions\n: array-like {n_samples}\n\n\nPredicted classes for the samples in the feature matrix\n\n\n\n\n\n\n\n\n\n\n\n\n\n\npredict_proba(features)\n\n\n\n\n\nUse the optimized pipeline to estimate the class probabilities for a feature set.\n\n\nNote: This function will only work for pipelines whose final classifier supports the \npredict_proba\n function. TPOT will raise an error otherwise.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\nfeatures\n: array-like {n_samples, n_features}\n\n\nFeature matrix\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\npredictions\n: array-like {n_samples, n_classes}\n\n\nThe class probabilities of the input samples\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nscore(testing_features, testing_classes)\n\n\n\n\n\nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function.\n\n\nThe default scoring function for TPOTClassifier is 'accuracy'.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\ntesting_features\n: array-like {n_samples, n_features}\n\n\nFeature matrix of the testing set\n\n\n\n\ntesting_classes\n: array-like {n_samples}\n\n\nList of class labels for prediction in the testing set\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\naccuracy_score\n: float\n\n\nThe estimated test set accuracy according to the user-specified scoring function.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nexport(output_file_name, data_file_path)\n\n\n\n\n\nExport the optimized pipeline as Python code.\n\n\nSee the \nusage documentation\n for example usage of the export function.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\noutput_file_name\n: string\n\n\nString containing the path and file name of the desired output file\n\n\n\ndata_file_path\n: string\n\n\nBy default, the path of input dataset is 'PATH/TO/DATA/FILE' by default. If data_file_path is another string, the path will be replaced.\n\n\n\n\n\n\n\nReturns:\n\n\n\n\nexported_code_string\n: string\n\n\nThe whole pipeline text as a string should be returned if output_file_name is not specified.\n\n\n\n\n\n\n\n\n\n\n\n\nRegression\n\n\nclass\n tpot.\nTPOTRegressor\n(\ngenerations\n=100, \npopulation_size\n=100,\n                         \noffspring_size\n=None, \nmutation_rate\n=0.9,\n                         \ncrossover_rate\n=0.1,\n                         \nscoring\n='neg_mean_squared_error', \ncv\n=5,\n                         \nsubsample\n=1.0, \nn_jobs\n=1,\n                         \nmax_time_mins\n=None, \nmax_eval_time_mins\n=5,\n                         \nrandom_state\n=None, \nconfig_dict\n=None,\n                         \ntemplate\n=None,\n                         \nwarm_start\n=False,\n                         \nmemory\n=None,\n                         \nuse_dask\n=False,\n                         \nperiodic_checkpoint_folder\n=None,\n                         \nearly_stop\n=None,\n                         \nverbosity\n=0,\n                         \ndisable_update_check\n=False\n)\n\n\n\nsource\n\n\n\nAutomated machine learning for supervised regression tasks.\n\n\nThe TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models,\npreprocessors, feature selection techniques, and any other estimator or transformer that follows the \nscikit-learn API\n.\nThe TPOTRegressor will also search over the hyperparameters of all objects in the pipeline.\n\n\nBy default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters.\nHowever, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the \nconfig_dict\n parameter.\n\n\nRead more in the \nUser Guide\n.\n\n\n\n\n\n\nParameters:\n\n\n\n\ngenerations\n: int or None, optional (default=100)\n\n\nNumber of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter \nmax_time_mins\n must be defined as the runtime limit.\n\n\nGenerally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline.\n\n\nTPOT will evaluate \npopulation_size\n + \ngenerations\n \u00d7 \noffspring_size\n pipelines in total.\n\n\n\n\npopulation_size\n: int, optional (default=100)\n\n\nNumber of individuals to retain in the genetic programming population every generation. Must be a positive number.\n\n\nGenerally, TPOT will work better when you give it more individuals with which to optimize the pipeline.\n\n\n\n\noffspring_size\n: int, optional (default=None)\n\n\nNumber of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.\n\n\n\n\nmutation_rate\n: float, optional (default=0.9)\n\n\nMutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation.\n\n\n\nmutation_rate\n + \ncrossover_rate\n cannot exceed 1.0.\n\n\nWe recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.\n\n\n\n\ncrossover_rate\n: float, optional (default=0.1)\n\n\nCrossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation.\n\n\n\nmutation_rate\n + \ncrossover_rate\n cannot exceed 1.0.\n\n\nWe recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.\n\n\n\n\nscoring\n: string or callable, optional (default='neg_mean_squared_error')\n\n\nFunction used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used:\n\n\n'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2'\n\n\nNote that we recommend using the \nneg\n version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric.\n\n\nIf you would like to use a custom scorer, you can pass the callable object/function with signature \nscorer(estimator, X, y)\n.\n\n\nSee the section on \nscoring functions\n for more details.\n\n\n\n\ncv\n: int, cross-validation generator, or an iterable, optional (default=5)\n\n\nCross-validation strategy used when evaluating pipelines.\n\n\nPossible inputs:\n\n\n\ninteger, to specify the number of folds in a KFold,\n\n\nAn object to be used as a cross-validation generator, or\n\n\nAn iterable yielding train/test splits.\n\n\n\n\n\n\n\nsubsample\n: float, optional (default=1.0)\n\n\nFraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0].\n\n\nSetting \nsubsample\n=0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process.\n\n\n\n\nn_jobs\n: integer, optional (default=1)\n\n\nNumber of processes to use in parallel for evaluating pipelines during the TPOT optimization process.\n\n\nSetting \nn_jobs\n=-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets\n\n\n\n\nmax_time_mins\n: integer or None, optional (default=None)\n\n\nHow many minutes TPOT has to optimize the pipeline.\n\n\nIf not None, this setting will allow TPOT to run until \nmax_time_mins\n minutes elapsed and then stop. TPOT will stop earlier if \ngenerations\n is set and all generations are already evaluated.\n\n\n\n\nmax_eval_time_mins\n: float, optional (default=5)\n\n\nHow many minutes TPOT has to evaluate a single pipeline.\n\n\nSetting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines.\n\n\n\n\nrandom_state\n: integer or None, optional (default=None)\n\n\nThe seed of the pseudo random number generator used in TPOT.\n\n\nUse this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed.\n\n\n\n\nconfig_dict\n: Python dictionary, string, or None, optional (default=None)\n\n\nA configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process.\n\n\nPossible inputs are:\n\n\n\nPython dictionary, TPOT will use your custom configuration,\n\n\nstring 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or\n\n\nstring 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or\n\n\nstring 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or\n\n\nNone, TPOT will use the default TPOTRegressor configuration.\n\n\n\nSee the \nbuilt-in configurations\n section for the list of configurations included with TPOT, and the \ncustom configuration\n section for more information and examples of how to create your own TPOT configurations.\n\n\n\n\ntemplate\n: string (default=None)\n\n\nTemplate of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT.\n\n\nSo far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly.\n\nSee the \n template option in tpot\n section for more details.\n\n\n\n\nwarm_start\n: boolean, optional (default=False)\n\n\nFlag indicating whether the TPOT instance will reuse the population from previous calls to \nfit()\n.\n\n\nSetting \nwarm_start\n=True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off.\n\n\n\n\nmemory\n: a joblib.Memory object or string, optional (default=None)\n\n\nIf supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in \nscikit-learn documentation\n\n\n\nPossible inputs are:\n\n\n\nString 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or\n\n\nPath of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or\n\n\nMemory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or\n\n\nNone, TPOT does not use memory caching.\n\n\n\n\n\n\n\nuse_dask\n: boolean, optional (default: False)\n\n\nWhether to use Dask-ML's pipeline optimiziations. This avoid re-fitting\nthe same estimator on the same split of data multiple times. It\nwill also provide more detailed diagnostics when using Dask's\ndistributed scheduler.\n\n\nSee \navoid repeated work\n for more details.\n\n\n\n\nperiodic_checkpoint_folder\n: path string, optional (default: None)\n\n\nIf supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing.\n\nCurrently once per generation but not more often than once per 30 seconds.\n\nUseful in multiple cases:\n\n\n\nSudden death before TPOT could save optimized pipeline\n\n\nTrack its progress\n\n\nGrab pipelines while it's still optimizing\n\n\n\n\n\n\n\nearly_stop\n: integer, optional (default: None)\n\n\nHow many generations TPOT checks whether there is no improvement in optimization process.\n\n\nEnds the optimization process if there is no improvement in the given number of generations.\n\n\n\n\nverbosity\n: integer, optional (default=0)\n\n\nHow much information TPOT communicates while it's running.\n\n\nPossible inputs are:\n\n\n\n0, TPOT will print nothing,\n\n\n1, TPOT will print minimal information,\n\n\n2, TPOT will print more information and provide a progress bar, or\n\n\n3, TPOT will print everything and provide a progress bar.\n\n\n\n\n\n\n\ndisable_update_check\n: boolean, optional (default=False)\n\n\nFlag indicating whether the TPOT version checker should be disabled.\n\n\nThe update checker will tell you when a new version of TPOT has been released.\n\n\n\n\n\n\n\n\n\n\nAttributes:\n\n\n\n\nfitted_pipeline_\n: scikit-learn Pipeline object\n\n\nThe best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset.\n\n\n\n\npareto_front_fitted_pipelines_\n: Python dictionary\n\n\nDictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset.\n\n\nThe TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline.\n\n\nNote: \n_pareto_front_fitted_pipelines\n is only available when \nverbosity\n=3.\n\n\n\n\nevaluated_individuals_\n: Python dictionary\n\n\nDictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline).\n\n\nThis attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated.\n\n\n\n\n\n\n\n\n\n\nExample\n\n\nfrom tpot import TPOTRegressor\nfrom sklearn.datasets import load_boston\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_boston()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot = TPOTRegressor(generations=5, population_size=50, verbosity=2)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_boston_pipeline.py')\n\n\n\n\nFunctions\n\n\n\n\n\n\nfit\n(features, target[, sample_weight, groups])\n\n\nRun the TPOT optimization process on the given training data.\n\n\n\n\n\n\n\npredict\n(features)\n\n\nUse the optimized pipeline to predict the target values for a feature set.\n\n\n\n\n\n\n\nscore\n(testing_features, testing_target)\n\n\nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function.\n\n\n\n\n\n\n\nexport\n(output_file_name)\n\n\nExport the optimized pipeline as Python code.\n\n\n\n\n\n\n\n\n\nfit(features, target, sample_weight=None, groups=None)\n\n\n\n\n\nRun the TPOT optimization process on the given training data.\n\n\nUses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\nfeatures\n: array-like {n_samples, n_features}\n\n\nFeature matrix\n\n\nTPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values.\nAs such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed)\nusing \nmedian value imputation\n.\n\n\nIf you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT.\n\n\n\n\ntarget\n: array-like {n_samples}\n\n\nList of target labels for prediction\n\n\n\n\nsample_weight\n: array-like {n_samples}, optional\n\n\nPer-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines.\n\n\n\n\ngroups\n: array-like, with shape {n_samples, }, optional\n\n\nGroup labels for the samples used when performing cross-validation.\n\n\nThis parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as \nsklearn.model_selection.GroupKFold\n.\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\nself\n: object\n\n\nReturns a copy of the fitted TPOT object\n\n\n\n\n\n\n\n\n\n\n\n\n\n\npredict(features)\n\n\n\n\n\nUse the optimized pipeline to predict the target values for a feature set.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\nfeatures\n: array-like {n_samples, n_features}\n\n\nFeature matrix\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\npredictions\n: array-like {n_samples}\n\n\nPredicted target values for the samples in the feature matrix\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nscore(testing_features, testing_target)\n\n\n\n\n\nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function.\n\n\nThe default scoring function for TPOTClassifier is 'mean_squared_error'.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\ntesting_features\n: array-like {n_samples, n_features}\n\n\nFeature matrix of the testing set\n\n\n\n\ntesting_target\n: array-like {n_samples}\n\n\nList of target labels for prediction in the testing set\n\n\n\n\n\n\n\n\n\nReturns:\n\n\n\n\naccuracy_score\n: float\n\n\nThe estimated test set accuracy according to the user-specified scoring function.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nexport(output_file_name)\n\n\n\n\n\nExport the optimized pipeline as Python code.\n\n\nSee the \nusage documentation\n for example usage of the export function.\n\n\n\n\n\n\n\nParameters:\n\n\n\n\noutput_file_name\n: string\n\n\nString containing the path and file name of the desired output file\n\n\n\ndata_file_path\n: string\n\n\nBy default, the path of input dataset is 'PATH/TO/DATA/FILE' by default. If data_file_path is another string, the path will be replaced.\n\n\n\n\n\n\n\nReturns:\n\n\n\n\nexported_code_string\n: string\n\n\nThe whole pipeline text as a string should be returned if output_file_name is not specified.",
             "title": "TPOT API"
         },
         {
             "location": "/api/#classification",
-            "text": "class  tpot. TPOTClassifier ( generations =100,  population_size =100,\n                           offspring_size =None,  mutation_rate =0.9,\n                           crossover_rate =0.1,\n                           scoring ='accuracy',  cv =5,\n                           subsample =1.0,  n_jobs =1,\n                           max_time_mins =None,  max_eval_time_mins =5,\n                           random_state =None,  config_dict =None,\n                           template =None,\n                           warm_start =False,\n                           memory =None,\n                           use_dask =False,\n                           periodic_checkpoint_folder =None,\n                           early_stop =None,\n                           verbosity =0,\n                           disable_update_check =False )  source  Automated machine learning for supervised classification tasks.  The TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models,\npreprocessors, feature selection techniques, and any other estimator or transformer that follows the  scikit-learn API .\nThe TPOTClassifier will also search over the hyperparameters of all objects in the pipeline.  By default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters.\nHowever, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the  config_dict  parameter.  Read more in the  User Guide .    Parameters:   generations : int or None optional (default=100) \nNumber of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter  max_time_mins  must be defined as the runtime limit. \nGenerally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. \nTPOT will evaluate  population_size  +  generations  \u00d7  offspring_size  pipelines in total.  population_size : int, optional (default=100) \nNumber of individuals to retain in the genetic programming population every generation. Must be a positive number. \nGenerally, TPOT will work better when you give it more individuals with which to optimize the pipeline.  offspring_size : int, optional (default=None) \nNumber of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.  mutation_rate : float, optional (default=0.9) \nMutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation.  mutation_rate  +  crossover_rate  cannot exceed 1.0. \nWe recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.  crossover_rate : float, optional (default=0.1) \nCrossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation.  mutation_rate  +  crossover_rate  cannot exceed 1.0. \nWe recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.  scoring : string or callable, optional (default='accuracy') \nFunction used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used: \n'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision',\n'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc' \nIf you would like to use a custom scorer, you can pass the callable object/function with signature  scorer(estimator, X, y) . \nSee the section on  scoring functions  for more details.  cv : int, cross-validation generator, or an iterable, optional (default=5) \nCross-validation strategy used when evaluating pipelines. \nPossible inputs:  integer, to specify the number of folds in a StratifiedKFold,  An object to be used as a cross-validation generator, or  An iterable yielding train/test splits.   subsample : float, optional (default=1.0) \nFraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. \nSetting  subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process.  n_jobs : integer, optional (default=1) \nNumber of processes to use in parallel for evaluating pipelines during the TPOT optimization process. \nSetting  n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets.  max_time_mins : integer or None, optional (default=None) \nHow many minutes TPOT has to optimize the pipeline. \nIf not None, this setting will allow TPOT to run until  max_time_mins  minutes elapsed and then stop. TPOT will stop earlier if  generations  is set and all generations are already evaluated.  max_eval_time_mins : float, optional (default=5) \nHow many minutes TPOT has to evaluate a single pipeline. \nSetting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines.  random_state : integer or None, optional (default=None) \nThe seed of the pseudo random number generator used in TPOT. \nUse this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed.  config_dict : Python dictionary, string, or None, optional (default=None) \nA configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. \nPossible inputs are:  Python dictionary, TPOT will use your custom configuration,  string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or  string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or  string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or  None, TPOT will use the default TPOTClassifier configuration.  \nSee the  built-in configurations  section for the list of configurations included with TPOT, and the  custom configuration  section for more information and examples of how to create your own TPOT configurations.  template : string (default=None) \nTemplate of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. \nSo far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly.\n\nSee the   template option in tpot  section for more details.  warm_start : boolean, optional (default=False) \nFlag indicating whether the TPOT instance will reuse the population from previous calls to  fit() . \nSetting  warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off.  memory : a joblib.Memory object or string, optional (default=None) \nIf supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in  scikit-learn documentation  \nPossible inputs are:  String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or  Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or  Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or  None, TPOT does not use memory caching.    use_dask : boolean, optional (default: False) \nWhether to use Dask-ML's pipeline optimiziations. This avoid re-fitting\nthe same estimator on the same split of data multiple times. It\nwill also provide more detailed diagnostics when using Dask's\ndistributed scheduler. \nSee  avoid repeated work  for more details.  periodic_checkpoint_folder : path string, optional (default: None) \nIf supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. \nCurrently once per generation but not more often than once per 30 seconds. \nUseful in multiple cases:  Sudden death before TPOT could save optimized pipeline  Track its progress  Grab pipelines while it's still optimizing    early_stop : integer, optional (default: None) \nHow many generations TPOT checks whether there is no improvement in optimization process. \nEnds the optimization process if there is no improvement in the given number of generations.  verbosity : integer, optional (default=0) \nHow much information TPOT communicates while it's running. \nPossible inputs are:  0, TPOT will print nothing,  1, TPOT will print minimal information,  2, TPOT will print more information and provide a progress bar, or  3, TPOT will print everything and provide a progress bar.    disable_update_check : boolean, optional (default=False) \nFlag indicating whether the TPOT version checker should be disabled. \nThe update checker will tell you when a new version of TPOT has been released.     Attributes:   fitted_pipeline_ : scikit-learn Pipeline object \nThe best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset.  pareto_front_fitted_pipelines_ : Python dictionary \nDictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. \nThe TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. \nNote:  pareto_front_fitted_pipelines_  is only available when  verbosity =3.  evaluated_individuals_ : Python dictionary \nDictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). \nThis attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated.     Example  from tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot = TPOTClassifier(generations=5, population_size=50, verbosity=2)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_digits_pipeline.py')  Functions    fit (features, classes[, sample_weight, groups])  Run the TPOT optimization process on the given training data.    predict (features)  Use the optimized pipeline to predict the classes for a feature set.    predict_proba (features)  Use the optimized pipeline to estimate the class probabilities for a feature set.    score (testing_features, testing_classes)  Returns the optimized pipeline's score on the given testing data using the user-specified scoring function.    export (output_file_name)  Export the optimized pipeline as Python code.     fit(features, classes, sample_weight=None, groups=None)  \nRun the TPOT optimization process on the given training data. \nUses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples.    Parameters:   features : array-like {n_samples, n_features} \nFeature matrix \nTPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values.\nAs such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed)\nusing  median value imputation . \nIf you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT.  classes : array-like {n_samples} \nList of class labels for prediction  sample_weight : array-like {n_samples}, optional \nPer-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines.  groups : array-like, with shape {n_samples, }, optional \nGroup labels for the samples used when performing cross-validation. \nThis parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as  sklearn.model_selection.GroupKFold .     Returns:   self : object \nReturns a copy of the fitted TPOT object       predict(features)  \nUse the optimized pipeline to predict the classes for a feature set.    Parameters:   features : array-like {n_samples, n_features} \nFeature matrix     Returns:   predictions : array-like {n_samples} \nPredicted classes for the samples in the feature matrix       predict_proba(features)  \nUse the optimized pipeline to estimate the class probabilities for a feature set. \nNote: This function will only work for pipelines whose final classifier supports the  predict_proba  function. TPOT will raise an error otherwise.    Parameters:   features : array-like {n_samples, n_features} \nFeature matrix     Returns:   predictions : array-like {n_samples, n_classes} \nThe class probabilities of the input samples       score(testing_features, testing_classes)  \nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function. \nThe default scoring function for TPOTClassifier is 'accuracy'.    Parameters:   testing_features : array-like {n_samples, n_features} \nFeature matrix of the testing set  testing_classes : array-like {n_samples} \nList of class labels for prediction in the testing set     Returns:   accuracy_score : float \nThe estimated test set accuracy according to the user-specified scoring function.       export(output_file_name)  \nExport the optimized pipeline as Python code. \nSee the  usage documentation  for example usage of the export function.    Parameters:   output_file_name : string \nString containing the path and file name of the desired output file    Returns:  \nDoes not return anything",
+            "text": "class  tpot. TPOTClassifier ( generations =100,  population_size =100,\n                           offspring_size =None,  mutation_rate =0.9,\n                           crossover_rate =0.1,\n                           scoring ='accuracy',  cv =5,\n                           subsample =1.0,  n_jobs =1,\n                           max_time_mins =None,  max_eval_time_mins =5,\n                           random_state =None,  config_dict =None,\n                           template =None,\n                           warm_start =False,\n                           memory =None,\n                           use_dask =False,\n                           periodic_checkpoint_folder =None,\n                           early_stop =None,\n                           verbosity =0,\n                           disable_update_check =False )  source  Automated machine learning for supervised classification tasks.  The TPOTClassifier performs an intelligent search over machine learning pipelines that can contain supervised classification models,\npreprocessors, feature selection techniques, and any other estimator or transformer that follows the  scikit-learn API .\nThe TPOTClassifier will also search over the hyperparameters of all objects in the pipeline.  By default, TPOTClassifier will search over a broad range of supervised classification algorithms, transformers, and their parameters.\nHowever, the algorithms, transformers, and hyperparameters that the TPOTClassifier searches over can be fully customized using the  config_dict  parameter.  Read more in the  User Guide .    Parameters:   generations : int or None optional (default=100) \nNumber of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter  max_time_mins  must be defined as the runtime limit. \nGenerally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. \nTPOT will evaluate  population_size  +  generations  \u00d7  offspring_size  pipelines in total.  population_size : int, optional (default=100) \nNumber of individuals to retain in the genetic programming population every generation. Must be a positive number. \nGenerally, TPOT will work better when you give it more individuals with which to optimize the pipeline.  offspring_size : int, optional (default=None) \nNumber of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.  mutation_rate : float, optional (default=0.9) \nMutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation.  mutation_rate  +  crossover_rate  cannot exceed 1.0. \nWe recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.  crossover_rate : float, optional (default=0.1) \nCrossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation.  mutation_rate  +  crossover_rate  cannot exceed 1.0. \nWe recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.  scoring : string or callable, optional (default='accuracy') \nFunction used to evaluate the quality of a given pipeline for the classification problem. The following built-in scoring functions can be used: \n'accuracy', 'adjusted_rand_score', 'average_precision', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'neg_log_loss','precision',\n'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'roc_auc' \nIf you would like to use a custom scorer, you can pass the callable object/function with signature  scorer(estimator, X, y) . \nSee the section on  scoring functions  for more details.  cv : int, cross-validation generator, or an iterable, optional (default=5) \nCross-validation strategy used when evaluating pipelines. \nPossible inputs:  integer, to specify the number of folds in a StratifiedKFold,  An object to be used as a cross-validation generator, or  An iterable yielding train/test splits.   subsample : float, optional (default=1.0) \nFraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. \nSetting  subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process.  n_jobs : integer, optional (default=1) \nNumber of processes to use in parallel for evaluating pipelines during the TPOT optimization process. \nSetting  n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets.  max_time_mins : integer or None, optional (default=None) \nHow many minutes TPOT has to optimize the pipeline. \nIf not None, this setting will allow TPOT to run until  max_time_mins  minutes elapsed and then stop. TPOT will stop earlier if  generations  is set and all generations are already evaluated.  max_eval_time_mins : float, optional (default=5) \nHow many minutes TPOT has to evaluate a single pipeline. \nSetting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines.  random_state : integer or None, optional (default=None) \nThe seed of the pseudo random number generator used in TPOT. \nUse this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed.  config_dict : Python dictionary, string, or None, optional (default=None) \nA configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. \nPossible inputs are:  Python dictionary, TPOT will use your custom configuration,  string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or  string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or  string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or  None, TPOT will use the default TPOTClassifier configuration.  \nSee the  built-in configurations  section for the list of configurations included with TPOT, and the  custom configuration  section for more information and examples of how to create your own TPOT configurations.  template : string (default=None) \nTemplate of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. \nSo far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer, Classifier) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html), [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Classifier\". By default value of template is None, TPOT generates tree-based pipeline randomly.\n\nSee the   template option in tpot  section for more details.  warm_start : boolean, optional (default=False) \nFlag indicating whether the TPOT instance will reuse the population from previous calls to  fit() . \nSetting  warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off.  memory : a joblib.Memory object or string, optional (default=None) \nIf supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in  scikit-learn documentation  \nPossible inputs are:  String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or  Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or  Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or  None, TPOT does not use memory caching.    use_dask : boolean, optional (default: False) \nWhether to use Dask-ML's pipeline optimiziations. This avoid re-fitting\nthe same estimator on the same split of data multiple times. It\nwill also provide more detailed diagnostics when using Dask's\ndistributed scheduler. \nSee  avoid repeated work  for more details.  periodic_checkpoint_folder : path string, optional (default: None) \nIf supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. \nCurrently once per generation but not more often than once per 30 seconds. \nUseful in multiple cases:  Sudden death before TPOT could save optimized pipeline  Track its progress  Grab pipelines while it's still optimizing    early_stop : integer, optional (default: None) \nHow many generations TPOT checks whether there is no improvement in optimization process. \nEnds the optimization process if there is no improvement in the given number of generations.  verbosity : integer, optional (default=0) \nHow much information TPOT communicates while it's running. \nPossible inputs are:  0, TPOT will print nothing,  1, TPOT will print minimal information,  2, TPOT will print more information and provide a progress bar, or  3, TPOT will print everything and provide a progress bar.    disable_update_check : boolean, optional (default=False) \nFlag indicating whether the TPOT version checker should be disabled. \nThe update checker will tell you when a new version of TPOT has been released.     Attributes:   fitted_pipeline_ : scikit-learn Pipeline object \nThe best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset.  pareto_front_fitted_pipelines_ : Python dictionary \nDictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. \nThe TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. \nNote:  pareto_front_fitted_pipelines_  is only available when  verbosity =3.  evaluated_individuals_ : Python dictionary \nDictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). \nThis attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated.     Example  from tpot import TPOTClassifier\nfrom sklearn.datasets import load_digits\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_digits()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot = TPOTClassifier(generations=5, population_size=50, verbosity=2)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_digits_pipeline.py')  Functions    fit (features, classes[, sample_weight, groups])  Run the TPOT optimization process on the given training data.    predict (features)  Use the optimized pipeline to predict the classes for a feature set.    predict_proba (features)  Use the optimized pipeline to estimate the class probabilities for a feature set.    score (testing_features, testing_classes)  Returns the optimized pipeline's score on the given testing data using the user-specified scoring function.    export (output_file_name)  Export the optimized pipeline as Python code.     fit(features, classes, sample_weight=None, groups=None)  \nRun the TPOT optimization process on the given training data. \nUses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples.    Parameters:   features : array-like {n_samples, n_features} \nFeature matrix \nTPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values.\nAs such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed)\nusing  median value imputation . \nIf you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT.  classes : array-like {n_samples} \nList of class labels for prediction  sample_weight : array-like {n_samples}, optional \nPer-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines.  groups : array-like, with shape {n_samples, }, optional \nGroup labels for the samples used when performing cross-validation. \nThis parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as  sklearn.model_selection.GroupKFold .     Returns:   self : object \nReturns a copy of the fitted TPOT object       predict(features)  \nUse the optimized pipeline to predict the classes for a feature set.    Parameters:   features : array-like {n_samples, n_features} \nFeature matrix     Returns:   predictions : array-like {n_samples} \nPredicted classes for the samples in the feature matrix       predict_proba(features)  \nUse the optimized pipeline to estimate the class probabilities for a feature set. \nNote: This function will only work for pipelines whose final classifier supports the  predict_proba  function. TPOT will raise an error otherwise.    Parameters:   features : array-like {n_samples, n_features} \nFeature matrix     Returns:   predictions : array-like {n_samples, n_classes} \nThe class probabilities of the input samples       score(testing_features, testing_classes)  \nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function. \nThe default scoring function for TPOTClassifier is 'accuracy'.    Parameters:   testing_features : array-like {n_samples, n_features} \nFeature matrix of the testing set  testing_classes : array-like {n_samples} \nList of class labels for prediction in the testing set     Returns:   accuracy_score : float \nThe estimated test set accuracy according to the user-specified scoring function.       export(output_file_name, data_file_path)  \nExport the optimized pipeline as Python code. \nSee the  usage documentation  for example usage of the export function.    Parameters:   output_file_name : string \nString containing the path and file name of the desired output file  data_file_path : string \nBy default, the path of input dataset is 'PATH/TO/DATA/FILE' by default. If data_file_path is another string, the path will be replaced.    Returns:   exported_code_string : string \nThe whole pipeline text as a string should be returned if output_file_name is not specified.",
             "title": "Classification"
         },
         {
             "location": "/api/#regression",
-            "text": "class  tpot. TPOTRegressor ( generations =100,  population_size =100,\n                          offspring_size =None,  mutation_rate =0.9,\n                          crossover_rate =0.1,\n                          scoring ='neg_mean_squared_error',  cv =5,\n                          subsample =1.0,  n_jobs =1,\n                          max_time_mins =None,  max_eval_time_mins =5,\n                          random_state =None,  config_dict =None,\n                          template =None,\n                          warm_start =False,\n                          memory =None,\n                          use_dask =False,\n                          periodic_checkpoint_folder =None,\n                          early_stop =None,\n                          verbosity =0,\n                          disable_update_check =False )  source  Automated machine learning for supervised regression tasks.  The TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models,\npreprocessors, feature selection techniques, and any other estimator or transformer that follows the  scikit-learn API .\nThe TPOTRegressor will also search over the hyperparameters of all objects in the pipeline.  By default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters.\nHowever, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the  config_dict  parameter.  Read more in the  User Guide .    Parameters:   generations : int or None, optional (default=100) \nNumber of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter  max_time_mins  must be defined as the runtime limit. \nGenerally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. \nTPOT will evaluate  population_size  +  generations  \u00d7  offspring_size  pipelines in total.  population_size : int, optional (default=100) \nNumber of individuals to retain in the genetic programming population every generation. Must be a positive number. \nGenerally, TPOT will work better when you give it more individuals with which to optimize the pipeline.  offspring_size : int, optional (default=None) \nNumber of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.  mutation_rate : float, optional (default=0.9) \nMutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation.  mutation_rate  +  crossover_rate  cannot exceed 1.0. \nWe recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.  crossover_rate : float, optional (default=0.1) \nCrossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation.  mutation_rate  +  crossover_rate  cannot exceed 1.0. \nWe recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.  scoring : string or callable, optional (default='neg_mean_squared_error') \nFunction used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used: \n'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2' \nNote that we recommend using the  neg  version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric. \nIf you would like to use a custom scorer, you can pass the callable object/function with signature  scorer(estimator, X, y) . \nSee the section on  scoring functions  for more details.  cv : int, cross-validation generator, or an iterable, optional (default=5) \nCross-validation strategy used when evaluating pipelines. \nPossible inputs:  integer, to specify the number of folds in a KFold,  An object to be used as a cross-validation generator, or  An iterable yielding train/test splits.    subsample : float, optional (default=1.0) \nFraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. \nSetting  subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process.  n_jobs : integer, optional (default=1) \nNumber of processes to use in parallel for evaluating pipelines during the TPOT optimization process. \nSetting  n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets  max_time_mins : integer or None, optional (default=None) \nHow many minutes TPOT has to optimize the pipeline. \nIf not None, this setting will allow TPOT to run until  max_time_mins  minutes elapsed and then stop. TPOT will stop earlier if  generations  is set and all generations are already evaluated.  max_eval_time_mins : float, optional (default=5) \nHow many minutes TPOT has to evaluate a single pipeline. \nSetting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines.  random_state : integer or None, optional (default=None) \nThe seed of the pseudo random number generator used in TPOT. \nUse this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed.  config_dict : Python dictionary, string, or None, optional (default=None) \nA configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. \nPossible inputs are:  Python dictionary, TPOT will use your custom configuration,  string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or  string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or  string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or  None, TPOT will use the default TPOTRegressor configuration.  \nSee the  built-in configurations  section for the list of configurations included with TPOT, and the  custom configuration  section for more information and examples of how to create your own TPOT configurations.  template : string (default=None) \nTemplate of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. \nSo far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly.\n\nSee the   template option in tpot  section for more details.  warm_start : boolean, optional (default=False) \nFlag indicating whether the TPOT instance will reuse the population from previous calls to  fit() . \nSetting  warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off.  memory : a joblib.Memory object or string, optional (default=None) \nIf supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in  scikit-learn documentation  \nPossible inputs are:  String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or  Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or  Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or  None, TPOT does not use memory caching.    use_dask : boolean, optional (default: False) \nWhether to use Dask-ML's pipeline optimiziations. This avoid re-fitting\nthe same estimator on the same split of data multiple times. It\nwill also provide more detailed diagnostics when using Dask's\ndistributed scheduler. \nSee  avoid repeated work  for more details.  periodic_checkpoint_folder : path string, optional (default: None) \nIf supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. \nCurrently once per generation but not more often than once per 30 seconds. \nUseful in multiple cases:  Sudden death before TPOT could save optimized pipeline  Track its progress  Grab pipelines while it's still optimizing    early_stop : integer, optional (default: None) \nHow many generations TPOT checks whether there is no improvement in optimization process. \nEnds the optimization process if there is no improvement in the given number of generations.  verbosity : integer, optional (default=0) \nHow much information TPOT communicates while it's running. \nPossible inputs are:  0, TPOT will print nothing,  1, TPOT will print minimal information,  2, TPOT will print more information and provide a progress bar, or  3, TPOT will print everything and provide a progress bar.    disable_update_check : boolean, optional (default=False) \nFlag indicating whether the TPOT version checker should be disabled. \nThe update checker will tell you when a new version of TPOT has been released.     Attributes:   fitted_pipeline_ : scikit-learn Pipeline object \nThe best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset.  pareto_front_fitted_pipelines_ : Python dictionary \nDictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. \nThe TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. \nNote:  _pareto_front_fitted_pipelines  is only available when  verbosity =3.  evaluated_individuals_ : Python dictionary \nDictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). \nThis attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated.     Example  from tpot import TPOTRegressor\nfrom sklearn.datasets import load_boston\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_boston()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot = TPOTRegressor(generations=5, population_size=50, verbosity=2)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_boston_pipeline.py')  Functions    fit (features, target[, sample_weight, groups])  Run the TPOT optimization process on the given training data.    predict (features)  Use the optimized pipeline to predict the target values for a feature set.    score (testing_features, testing_target)  Returns the optimized pipeline's score on the given testing data using the user-specified scoring function.    export (output_file_name)  Export the optimized pipeline as Python code.     fit(features, target, sample_weight=None, groups=None)  \nRun the TPOT optimization process on the given training data. \nUses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples.    Parameters:   features : array-like {n_samples, n_features} \nFeature matrix \nTPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values.\nAs such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed)\nusing  median value imputation . \nIf you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT.  target : array-like {n_samples} \nList of target labels for prediction  sample_weight : array-like {n_samples}, optional \nPer-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines.  groups : array-like, with shape {n_samples, }, optional \nGroup labels for the samples used when performing cross-validation. \nThis parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as  sklearn.model_selection.GroupKFold .     Returns:   self : object \nReturns a copy of the fitted TPOT object       predict(features)  \nUse the optimized pipeline to predict the target values for a feature set.    Parameters:   features : array-like {n_samples, n_features} \nFeature matrix     Returns:   predictions : array-like {n_samples} \nPredicted target values for the samples in the feature matrix       score(testing_features, testing_target)  \nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function. \nThe default scoring function for TPOTClassifier is 'mean_squared_error'.    Parameters:   testing_features : array-like {n_samples, n_features} \nFeature matrix of the testing set  testing_target : array-like {n_samples} \nList of target labels for prediction in the testing set     Returns:   accuracy_score : float \nThe estimated test set accuracy according to the user-specified scoring function.       export(output_file_name)  \nExport the optimized pipeline as Python code. \nSee the  usage documentation  for example usage of the export function.    Parameters:   output_file_name : string \nString containing the path and file name of the desired output file    Returns:  \nDoes not return anything",
+            "text": "class  tpot. TPOTRegressor ( generations =100,  population_size =100,\n                          offspring_size =None,  mutation_rate =0.9,\n                          crossover_rate =0.1,\n                          scoring ='neg_mean_squared_error',  cv =5,\n                          subsample =1.0,  n_jobs =1,\n                          max_time_mins =None,  max_eval_time_mins =5,\n                          random_state =None,  config_dict =None,\n                          template =None,\n                          warm_start =False,\n                          memory =None,\n                          use_dask =False,\n                          periodic_checkpoint_folder =None,\n                          early_stop =None,\n                          verbosity =0,\n                          disable_update_check =False )  source  Automated machine learning for supervised regression tasks.  The TPOTRegressor performs an intelligent search over machine learning pipelines that can contain supervised regression models,\npreprocessors, feature selection techniques, and any other estimator or transformer that follows the  scikit-learn API .\nThe TPOTRegressor will also search over the hyperparameters of all objects in the pipeline.  By default, TPOTRegressor will search over a broad range of supervised regression models, transformers, and their hyperparameters.\nHowever, the models, transformers, and parameters that the TPOTRegressor searches over can be fully customized using the  config_dict  parameter.  Read more in the  User Guide .    Parameters:   generations : int or None, optional (default=100) \nNumber of iterations to the run pipeline optimization process. It must be a positive number or None. If None, the parameter  max_time_mins  must be defined as the runtime limit. \nGenerally, TPOT will work better when you give it more generations (and therefore time) to optimize the pipeline. \nTPOT will evaluate  population_size  +  generations  \u00d7  offspring_size  pipelines in total.  population_size : int, optional (default=100) \nNumber of individuals to retain in the genetic programming population every generation. Must be a positive number. \nGenerally, TPOT will work better when you give it more individuals with which to optimize the pipeline.  offspring_size : int, optional (default=None) \nNumber of offspring to produce in each genetic programming generation. Must be a positive number. By default, the number of offspring is equal to the number of population size.  mutation_rate : float, optional (default=0.9) \nMutation rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the GP algorithm how many pipelines to apply random changes to every generation.  mutation_rate  +  crossover_rate  cannot exceed 1.0. \nWe recommend using the default parameter unless you understand how the mutation rate affects GP algorithms.  crossover_rate : float, optional (default=0.1) \nCrossover rate for the genetic programming algorithm in the range [0.0, 1.0]. This parameter tells the genetic programming algorithm how many pipelines to \"breed\" every generation.  mutation_rate  +  crossover_rate  cannot exceed 1.0. \nWe recommend using the default parameter unless you understand how the crossover rate affects GP algorithms.  scoring : string or callable, optional (default='neg_mean_squared_error') \nFunction used to evaluate the quality of a given pipeline for the regression problem. The following built-in scoring functions can be used: \n'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'r2' \nNote that we recommend using the  neg  version of mean squared error and related metrics so TPOT will minimize (instead of maximize) the metric. \nIf you would like to use a custom scorer, you can pass the callable object/function with signature  scorer(estimator, X, y) . \nSee the section on  scoring functions  for more details.  cv : int, cross-validation generator, or an iterable, optional (default=5) \nCross-validation strategy used when evaluating pipelines. \nPossible inputs:  integer, to specify the number of folds in a KFold,  An object to be used as a cross-validation generator, or  An iterable yielding train/test splits.    subsample : float, optional (default=1.0) \nFraction of training samples that are used during the TPOT optimization process. Must be in the range (0.0, 1.0]. \nSetting  subsample =0.5 tells TPOT to use a random subsample of half of the training data. This subsample will remain the same during the entire pipeline optimization process.  n_jobs : integer, optional (default=1) \nNumber of processes to use in parallel for evaluating pipelines during the TPOT optimization process. \nSetting  n_jobs =-1 will use as many cores as available on the computer. For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all CPUs but one are used. Beware that using multiple processes on the same machine may cause memory issues for large datasets  max_time_mins : integer or None, optional (default=None) \nHow many minutes TPOT has to optimize the pipeline. \nIf not None, this setting will allow TPOT to run until  max_time_mins  minutes elapsed and then stop. TPOT will stop earlier if  generations  is set and all generations are already evaluated.  max_eval_time_mins : float, optional (default=5) \nHow many minutes TPOT has to evaluate a single pipeline. \nSetting this parameter to higher values will allow TPOT to evaluate more complex pipelines, but will also allow TPOT to run longer. Use this parameter to help prevent TPOT from wasting time on evaluating time-consuming pipelines.  random_state : integer or None, optional (default=None) \nThe seed of the pseudo random number generator used in TPOT. \nUse this parameter to make sure that TPOT will give you the same results each time you run it against the same data set with that seed.  config_dict : Python dictionary, string, or None, optional (default=None) \nA configuration dictionary for customizing the operators and parameters that TPOT searches in the optimization process. \nPossible inputs are:  Python dictionary, TPOT will use your custom configuration,  string 'TPOT light', TPOT will use a built-in configuration with only fast models and preprocessors, or  string 'TPOT MDR', TPOT will use a built-in configuration specialized for genomic studies, or  string 'TPOT sparse': TPOT will use a configuration dictionary with a one-hot encoder and the operators normally included in TPOT that also support sparse matrices, or  None, TPOT will use the default TPOTRegressor configuration.  \nSee the  built-in configurations  section for the list of configurations included with TPOT, and the  custom configuration  section for more information and examples of how to create your own TPOT configurations.  template : string (default=None) \nTemplate of predefined pipeline structure. The option is for specifying a desired structure for the machine learning pipeline evaluated in TPOT. \nSo far this option only supports linear pipeline structure. Each step in the pipeline should be a main class of operators (Selector, Transformer or Regressor) or a specific operator (e.g. `SelectPercentile`) defined in TPOT operator configuration. If one step is a main class, TPOT will randomly assign all subclass operators (subclasses of [`SelectorMixin`](https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_selection/base.py#L17), [`TransformerMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html) or [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html) in scikit-learn) to that step. Steps in the template are delimited by \"-\", e.g. \"SelectPercentile-Transformer-Regressor\". By default value of template is None, TPOT generates tree-based pipeline randomly.\n\nSee the   template option in tpot  section for more details.  warm_start : boolean, optional (default=False) \nFlag indicating whether the TPOT instance will reuse the population from previous calls to  fit() . \nSetting  warm_start =True can be useful for running TPOT for a short time on a dataset, checking the results, then resuming the TPOT run from where it left off.  memory : a joblib.Memory object or string, optional (default=None) \nIf supplied, pipeline will cache each transformer after calling fit. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical with another fitted pipeline during optimization process. More details about memory caching in  scikit-learn documentation  \nPossible inputs are:  String 'auto': TPOT uses memory caching with a temporary directory and cleans it up upon shutdown, or  Path of a caching directory, TPOT uses memory caching with the provided directory and TPOT does NOT clean the caching directory up upon shutdown, or  Memory object, TPOT uses the instance of joblib.Memory for memory caching and TPOT does NOT clean the caching directory up upon shutdown, or  None, TPOT does not use memory caching.    use_dask : boolean, optional (default: False) \nWhether to use Dask-ML's pipeline optimiziations. This avoid re-fitting\nthe same estimator on the same split of data multiple times. It\nwill also provide more detailed diagnostics when using Dask's\ndistributed scheduler. \nSee  avoid repeated work  for more details.  periodic_checkpoint_folder : path string, optional (default: None) \nIf supplied, a folder in which TPOT will periodically save pipelines in pareto front so far while optimizing. \nCurrently once per generation but not more often than once per 30 seconds. \nUseful in multiple cases:  Sudden death before TPOT could save optimized pipeline  Track its progress  Grab pipelines while it's still optimizing    early_stop : integer, optional (default: None) \nHow many generations TPOT checks whether there is no improvement in optimization process. \nEnds the optimization process if there is no improvement in the given number of generations.  verbosity : integer, optional (default=0) \nHow much information TPOT communicates while it's running. \nPossible inputs are:  0, TPOT will print nothing,  1, TPOT will print minimal information,  2, TPOT will print more information and provide a progress bar, or  3, TPOT will print everything and provide a progress bar.    disable_update_check : boolean, optional (default=False) \nFlag indicating whether the TPOT version checker should be disabled. \nThe update checker will tell you when a new version of TPOT has been released.     Attributes:   fitted_pipeline_ : scikit-learn Pipeline object \nThe best pipeline that TPOT discovered during the pipeline optimization process, fitted on the entire training dataset.  pareto_front_fitted_pipelines_ : Python dictionary \nDictionary containing the all pipelines on the TPOT Pareto front, where the key is the string representation of the pipeline and the value is the corresponding pipeline fitted on the entire training dataset. \nThe TPOT Pareto front provides a trade-off between pipeline complexity (i.e., the number of steps in the pipeline) and the predictive performance of the pipeline. \nNote:  _pareto_front_fitted_pipelines  is only available when  verbosity =3.  evaluated_individuals_ : Python dictionary \nDictionary containing all pipelines that were evaluated during the pipeline optimization process, where the key is the string representation of the pipeline and the value is a tuple containing (# of steps in pipeline, accuracy metric for the pipeline). \nThis attribute is primarily for internal use, but may be useful for looking at the other pipelines that TPOT evaluated.     Example  from tpot import TPOTRegressor\nfrom sklearn.datasets import load_boston\nfrom sklearn.model_selection import train_test_split\n\ndigits = load_boston()\nX_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,\n                                                    train_size=0.75, test_size=0.25)\n\ntpot = TPOTRegressor(generations=5, population_size=50, verbosity=2)\ntpot.fit(X_train, y_train)\nprint(tpot.score(X_test, y_test))\ntpot.export('tpot_boston_pipeline.py')  Functions    fit (features, target[, sample_weight, groups])  Run the TPOT optimization process on the given training data.    predict (features)  Use the optimized pipeline to predict the target values for a feature set.    score (testing_features, testing_target)  Returns the optimized pipeline's score on the given testing data using the user-specified scoring function.    export (output_file_name)  Export the optimized pipeline as Python code.     fit(features, target, sample_weight=None, groups=None)  \nRun the TPOT optimization process on the given training data. \nUses genetic programming to optimize a machine learning pipeline that maximizes the score on the provided features and target. This pipeline optimization procedure uses internal k-fold cross-validaton to avoid overfitting on the provided data. At the end of the pipeline optimization procedure, the best pipeline is then trained on the entire set of provided samples.    Parameters:   features : array-like {n_samples, n_features} \nFeature matrix \nTPOT and all scikit-learn algorithms assume that the features will be numerical and there will be no missing values.\nAs such, when a feature matrix is provided to TPOT, all missing values will automatically be replaced (i.e., imputed)\nusing  median value imputation . \nIf you wish to use a different imputation strategy than median imputation, please make sure to apply imputation to your feature set prior to passing it to TPOT.  target : array-like {n_samples} \nList of target labels for prediction  sample_weight : array-like {n_samples}, optional \nPer-sample weights. Higher weights indicate more importance. If specified, sample_weight will be passed to any pipeline element whose fit() function accepts a sample_weight argument. By default, using sample_weight does not affect tpot's scoring functions, which determine preferences between pipelines.  groups : array-like, with shape {n_samples, }, optional \nGroup labels for the samples used when performing cross-validation. \nThis parameter should only be used in conjunction with sklearn's Group cross-validation functions, such as  sklearn.model_selection.GroupKFold .     Returns:   self : object \nReturns a copy of the fitted TPOT object       predict(features)  \nUse the optimized pipeline to predict the target values for a feature set.    Parameters:   features : array-like {n_samples, n_features} \nFeature matrix     Returns:   predictions : array-like {n_samples} \nPredicted target values for the samples in the feature matrix       score(testing_features, testing_target)  \nReturns the optimized pipeline's score on the given testing data using the user-specified scoring function. \nThe default scoring function for TPOTClassifier is 'mean_squared_error'.    Parameters:   testing_features : array-like {n_samples, n_features} \nFeature matrix of the testing set  testing_target : array-like {n_samples} \nList of target labels for prediction in the testing set     Returns:   accuracy_score : float \nThe estimated test set accuracy according to the user-specified scoring function.       export(output_file_name)  \nExport the optimized pipeline as Python code. \nSee the  usage documentation  for example usage of the export function.    Parameters:   output_file_name : string \nString containing the path and file name of the desired output file  data_file_path : string \nBy default, the path of input dataset is 'PATH/TO/DATA/FILE' by default. If data_file_path is another string, the path will be replaced.    Returns:   exported_code_string : string \nThe whole pipeline text as a string should be returned if output_file_name is not specified.",
             "title": "Regression"
         },
         {
@@ -232,7 +232,7 @@
         },
         {
             "location": "/citing/",
-            "text": "If you use TPOT in a scientific publication, please consider citing at least one of the following papers:\n\n\nRandal S. Olson, Ryan J. Urbanowicz, Peter C. Andrews, Nicole A. Lavender, La Creis Kidd, and Jason H. Moore (2016). \nAutomating biomedical data science through tree-based pipeline optimization\n. \nApplications of Evolutionary Computation\n, pages 123-137.\n\n\nBibTeX entry:\n\n\n@inbook{Olson2016EvoBio,\n    author={Olson, Randal S. and Urbanowicz, Ryan J. and Andrews, Peter C. and Lavender, Nicole A. and Kidd, La Creis and Moore, Jason H.},\n    editor={Squillero, Giovanni and Burelli, Paolo},\n    chapter={Automating Biomedical Data Science Through Tree-Based Pipeline Optimization},\n    title={Applications of Evolutionary Computation: 19th European Conference, EvoApplications 2016, Porto, Portugal, March 30 -- April 1, 2016, Proceedings, Part I},\n    year={2016},\n    publisher={Springer International Publishing},\n    pages={123--137},\n    isbn={978-3-319-31204-0},\n    doi={10.1007/978-3-319-31204-0_9},\n    url={http://dx.doi.org/10.1007/978-3-319-31204-0_9}\n}\n\n\n\n\nEvaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science\n\n\nRandal S. Olson, Nathan Bartley, Ryan J. Urbanowicz, and Jason H. Moore (2016). \nEvaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science\n. \nProceedings of GECCO 2016\n, pages 485-492.\n\n\nBibTeX entry:\n\n\n@inproceedings{OlsonGECCO2016,\n    author = {Olson, Randal S. and Bartley, Nathan and Urbanowicz, Ryan J. and Moore, Jason H.},\n    title = {Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science},\n    booktitle = {Proceedings of the Genetic and Evolutionary Computation Conference 2016},\n    series = {GECCO '16},\n    year = {2016},\n    isbn = {978-1-4503-4206-3},\n    location = {Denver, Colorado, USA},\n    pages = {485--492},\n    numpages = {8},\n    url = {http://doi.acm.org/10.1145/2908812.2908918},\n    doi = {10.1145/2908812.2908918},\n    acmid = {2908918},\n    publisher = {ACM},\n    address = {New York, NY, USA},\n}\n\n\n\n\nAlternatively, you can cite the repository directly with the following DOI:",
+            "text": "If you use TPOT in a scientific publication, please consider citing at least one of the following papers:\n\n\nTrang T. Le, Weixuan Fu and Jason H. Moore (2019). \nScaling tree-based automated machine learning to biomedical big data with a feature set selector\n. \nBioinformatics\n. 2019 Jun 4.\n\n\nBibTeX entry:\n\n\n@article{le2019scaling,\n  title={Scaling tree-based automated machine learning to biomedical big data with a feature set selector.},\n  author={Le, TT and Fu, W and Moore, JH},\n  journal={Bioinformatics (Oxford, England)},\n  year={2019}\n}\n\n\n\n\nRandal S. Olson, Ryan J. Urbanowicz, Peter C. Andrews, Nicole A. Lavender, La Creis Kidd, and Jason H. Moore (2016). \nAutomating biomedical data science through tree-based pipeline optimization\n. \nApplications of Evolutionary Computation\n, pages 123-137.\n\n\nBibTeX entry:\n\n\n@inbook{Olson2016EvoBio,\n    author={Olson, Randal S. and Urbanowicz, Ryan J. and Andrews, Peter C. and Lavender, Nicole A. and Kidd, La Creis and Moore, Jason H.},\n    editor={Squillero, Giovanni and Burelli, Paolo},\n    chapter={Automating Biomedical Data Science Through Tree-Based Pipeline Optimization},\n    title={Applications of Evolutionary Computation: 19th European Conference, EvoApplications 2016, Porto, Portugal, March 30 -- April 1, 2016, Proceedings, Part I},\n    year={2016},\n    publisher={Springer International Publishing},\n    pages={123--137},\n    isbn={978-3-319-31204-0},\n    doi={10.1007/978-3-319-31204-0_9},\n    url={http://dx.doi.org/10.1007/978-3-319-31204-0_9}\n}\n\n\n\n\nEvaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science\n\n\nRandal S. Olson, Nathan Bartley, Ryan J. Urbanowicz, and Jason H. Moore (2016). \nEvaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science\n. \nProceedings of GECCO 2016\n, pages 485-492.\n\n\nBibTeX entry:\n\n\n@inproceedings{OlsonGECCO2016,\n    author = {Olson, Randal S. and Bartley, Nathan and Urbanowicz, Ryan J. and Moore, Jason H.},\n    title = {Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science},\n    booktitle = {Proceedings of the Genetic and Evolutionary Computation Conference 2016},\n    series = {GECCO '16},\n    year = {2016},\n    isbn = {978-1-4503-4206-3},\n    location = {Denver, Colorado, USA},\n    pages = {485--492},\n    numpages = {8},\n    url = {http://doi.acm.org/10.1145/2908812.2908918},\n    doi = {10.1145/2908812.2908918},\n    acmid = {2908918},\n    publisher = {ACM},\n    address = {New York, NY, USA},\n}\n\n\n\n\nAlternatively, you can cite the repository directly with the following DOI:",
             "title": "Citing"
         },
         {
diff --git a/docs_sources/api.md b/docs_sources/api.md
index e31c2883..2e20357e 100644
--- a/docs_sources/api.md
+++ b/docs_sources/api.md
@@ -459,7 +459,7 @@ The estimated test set accuracy according to the user-specified scoring function
 
 <a name="tpotclassifier-export"></a>
 ```Python
-export(output_file_name)
+export(output_file_name, data_file_path)
 ```
 
 <div style="padding-left:5%" width="100%">
@@ -475,11 +475,18 @@ See the <a href="../using/#tpot-with-code">usage documentation</a> for example u
 <blockquote>
 String containing the path and file name of the desired output file
 </blockquote>
+<strong>data_file_path</strong>: string
+<blockquote>
+By default, the path of input dataset is 'PATH/TO/DATA/FILE' by default. If data_file_path is another string, the path will be replaced.
+</blockquote>
 </tr>
 <tr>
 <td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Returns:</strong></td>
 <td width="80%" style="background:white;">
-Does not return anything
+<strong>exported_code_string</strong>: string
+<blockquote>
+The whole pipeline text as a string should be returned if output_file_name is not specified.
+</blockquote>
 </td>
 </tr>
 </table>
@@ -928,11 +935,18 @@ See the <a href="../using/#tpot-with-code">usage documentation</a> for example u
 <blockquote>
 String containing the path and file name of the desired output file
 </blockquote>
+<strong>data_file_path</strong>: string
+<blockquote>
+By default, the path of input dataset is 'PATH/TO/DATA/FILE' by default. If data_file_path is another string, the path will be replaced.
+</blockquote>
 </tr>
 <tr>
 <td width="20%" style="vertical-align:top; background:#F5F5F5;"><strong>Returns:</strong></td>
 <td width="80%" style="background:white;">
-Does not return anything
+<strong>exported_code_string</strong>: string
+<blockquote>
+The whole pipeline text as a string should be returned if output_file_name is not specified.
+</blockquote>
 </td>
 </tr>
 </table>
diff --git a/docs_sources/citing.md b/docs_sources/citing.md
index 020be096..3150ac06 100644
--- a/docs_sources/citing.md
+++ b/docs_sources/citing.md
@@ -1,5 +1,20 @@
 If you use TPOT in a scientific publication, please consider citing at least one of the following papers:
 
+
+Trang T. Le, Weixuan Fu and Jason H. Moore (2019). [Scaling tree-based automated machine learning to biomedical big data with a feature set selector](https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btz470/5511404). *Bioinformatics*. 2019 Jun 4.
+
+BibTeX entry:
+
+```bibtex
+@article{le2019scaling,
+  title={Scaling tree-based automated machine learning to biomedical big data with a feature set selector.},
+  author={Le, TT and Fu, W and Moore, JH},
+  journal={Bioinformatics (Oxford, England)},
+  year={2019}
+}
+```
+
+
 Randal S. Olson, Ryan J. Urbanowicz, Peter C. Andrews, Nicole A. Lavender, La Creis Kidd, and Jason H. Moore (2016). [Automating biomedical data science through tree-based pipeline optimization](http://link.springer.com/chapter/10.1007/978-3-319-31204-0_9). *Applications of Evolutionary Computation*, pages 123-137.
 
 BibTeX entry:

From 67c51c0f8669882247746d83e50d2825dad49c0e Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 15:48:29 -0500
Subject: [PATCH 43/44] clean a not used pop TPOT base

---
 tpot/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tpot/base.py b/tpot/base.py
index d32d19cb..1b666b9d 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -701,7 +701,7 @@ def pareto_eq(ind1, ind2):
             with warnings.catch_warnings():
                 self._setup_memory()
                 warnings.simplefilter('ignore')
-                pop, _ = eaMuPlusLambda(
+                _, _ = eaMuPlusLambda(
                     population=self._pop,
                     toolbox=self._toolbox,
                     mu=self.population_size,

From 8b716874f48aacbfc759d66bcd1f11d85f3ede8e Mon Sep 17 00:00:00 2001
From: weixuanfu <weixuanf@pennmedicine.upenn.edu>
Date: Tue, 5 Nov 2019 16:01:39 -0500
Subject: [PATCH 44/44] refine self._pop return #946

---
 tests/tpot_tests.py | 3 +--
 tpot/base.py        | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/tpot_tests.py b/tests/tpot_tests.py
index e9be5b5e..e1b17032 100644
--- a/tests/tpot_tests.py
+++ b/tests/tpot_tests.py
@@ -900,8 +900,7 @@ def test_warm_start():
     tpot_obj.fit(pretest_X, pretest_y)
 
     assert tpot_obj._pop == first_pop
-    assert tpot_obj._pop == first_pop
-    assert tpot_obj._pop == first_pop
+
 
 
 def test_fit():
diff --git a/tpot/base.py b/tpot/base.py
index 1b666b9d..f46e4267 100644
--- a/tpot/base.py
+++ b/tpot/base.py
@@ -701,7 +701,7 @@ def pareto_eq(ind1, ind2):
             with warnings.catch_warnings():
                 self._setup_memory()
                 warnings.simplefilter('ignore')
-                _, _ = eaMuPlusLambda(
+                self._pop, _ = eaMuPlusLambda(
                     population=self._pop,
                     toolbox=self._toolbox,
                     mu=self.population_size,