From c9c783ca0d97d9bf911893a34bd336d3ddf01b49 Mon Sep 17 00:00:00 2001 From: Miles Cranmer Date: Sat, 11 Jun 2022 19:42:22 +0000 Subject: [PATCH 001/136] initial commit --- gui/.gitattributes | 27 +++++++++++++++++++++++++++ gui/README.md | 12 ++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 gui/.gitattributes create mode 100644 gui/README.md diff --git a/gui/.gitattributes b/gui/.gitattributes new file mode 100644 index 000000000..ac481c8eb --- /dev/null +++ b/gui/.gitattributes @@ -0,0 +1,27 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/gui/README.md b/gui/README.md new file mode 100644 index 000000000..540da82d2 --- /dev/null +++ b/gui/README.md @@ -0,0 +1,12 @@ +--- +title: PySR +emoji: 🌍 +colorFrom: green +colorTo: indigo +sdk: streamlit +sdk_version: 1.10.0 +app_file: app.py +pinned: false +--- + +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference From 073279066bd30a6cf709a7fbf3e918ebd70e2697 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 11 Jun 2022 15:47:25 -0400 Subject: [PATCH 002/136] Add PySR as pip requirement --- gui/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 gui/requirements.txt diff --git a/gui/requirements.txt b/gui/requirements.txt new file mode 100644 index 000000000..8f2600af5 --- /dev/null +++ b/gui/requirements.txt @@ -0,0 +1,2 @@ +pysr==0.9.1 +numpy==1.22.2 \ No newline at end of file From 57203050e98a8eddbd2536d0f262734a31445204 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 11 Jun 2022 15:49:30 -0400 Subject: [PATCH 003/136] Add Julia package file --- gui/packages.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 gui/packages.txt diff --git a/gui/packages.txt b/gui/packages.txt new file mode 100644 index 000000000..78dd3ada4 --- /dev/null +++ b/gui/packages.txt @@ -0,0 +1 @@ +julia \ No newline at end of file From 9162188df443f2dad4e2976dc1fa71184688b428 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 11 Jun 2022 15:56:06 -0400 Subject: [PATCH 004/136] Add pandas requirement --- gui/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gui/requirements.txt b/gui/requirements.txt index 8f2600af5..cef9e0133 100644 --- a/gui/requirements.txt +++ b/gui/requirements.txt @@ -1,2 +1,3 @@ pysr==0.9.1 -numpy==1.22.2 \ No newline at end of file +numpy +pandas \ No newline at end of file From c6a43c4c3d3b44de1913e50e013ea5f5d85091f4 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 11 Jun 2022 15:56:16 -0400 Subject: [PATCH 005/136] Add app to fit PySR model --- gui/app.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 gui/app.py diff --git a/gui/app.py b/gui/app.py new file mode 100644 index 000000000..8960a7e06 --- /dev/null +++ b/gui/app.py @@ -0,0 +1,29 @@ +import streamlit as st +import numpy as np +import pandas as pd + +st.title("Interactive PySR") +file_name = st.file_uploader( + "Upload a data file, with your output column labeled 'y'", type=["csv"] +) + +if file_name is not None: + col1, col2 = st.columns(2) + + df = pd.read_csv(file_name) + y = np.array(df["y"]) + X = df.drop(["y"], axis=1) + import pysr + + pysr.install() + from pysr import PySRRegressor + + model = PySRRegressor() + model.fit(X, y) + + col1.header("Equation") + col2.header("Loss") + # model.equations_ is a pd.DataFrame + for i, row in model.equations_.iterrows(): + col1.subheader(row["equation"]) + col2.subheader(row["loss"]) From edbcfa6f1ac638784c5101bb45959fada90b8ce8 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 11 Jun 2022 17:19:25 -0400 Subject: [PATCH 006/136] Install PyCall.jl with other python instance --- gui/app.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/gui/app.py b/gui/app.py index 8960a7e06..553ced626 100644 --- a/gui/app.py +++ b/gui/app.py @@ -1,4 +1,16 @@ import streamlit as st +import os + +# Need to install PySR in separate python instance: +os.system( + """ +if [ ! -d "$HOME/.julia/environments/pysr-0.9.1" ]; then + python -c 'import pysr; pysr.install()' +fi +""" +) +import pysr +from pysr import PySRRegressor import numpy as np import pandas as pd @@ -13,17 +25,18 @@ df = pd.read_csv(file_name) y = np.array(df["y"]) X = df.drop(["y"], axis=1) - import pysr - - pysr.install() - from pysr import PySRRegressor - model = PySRRegressor() + model = PySRRegressor(update=False) model.fit(X, y) col1.header("Equation") col2.header("Loss") - # model.equations_ is a pd.DataFrame for i, row in model.equations_.iterrows(): - col1.subheader(row["equation"]) - col2.subheader(row["loss"]) + col1.subheader(str(row["equation"])) + col2.subheader(str(row["loss"])) + + model = None + +Main = None +pysr.sr.Main = None +pysr.sr.already_ran = False From 34cffe9fba9c009d7dafcee34ff694810a9a4d18 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 11:27:26 -0400 Subject: [PATCH 007/136] Add file to generate example data --- gui/gen_example_data.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 gui/gen_example_data.py diff --git a/gui/gen_example_data.py b/gui/gen_example_data.py new file mode 100644 index 000000000..4eef2be26 --- /dev/null +++ b/gui/gen_example_data.py @@ -0,0 +1,17 @@ +import pandas as pd +import numpy as np + +rand_between = lambda a, b, size: np.random.rand(*size) * (b - a) + a + +X = pd.DataFrame( + { + "T": rand_between(273, 373, (100,)), # Kelvin + "P": rand_between(100, 200, (100,)) * 1e3, # Pa + "n": rand_between(0, 10, (100,)), # mole + } +) + +R = 8.3144598 # J/mol/K +X["y"] = X["n"] * R * X["T"] / X["P"] + +X.to_csv("data.csv", index=False) \ No newline at end of file From 9cee10c66a963edb365c04887f9145e3f37632eb Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 11:27:34 -0400 Subject: [PATCH 008/136] Add gitignore --- gui/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 gui/.gitignore diff --git a/gui/.gitignore b/gui/.gitignore new file mode 100644 index 000000000..074bb4715 --- /dev/null +++ b/gui/.gitignore @@ -0,0 +1 @@ +*.csv* From 222fbf088615854c7213e1f2c597648d17e678c2 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 11:54:47 -0400 Subject: [PATCH 009/136] Init gradio version --- gui/app.py | 68 +++++++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/gui/app.py b/gui/app.py index 553ced626..dcccc3170 100644 --- a/gui/app.py +++ b/gui/app.py @@ -1,42 +1,42 @@ -import streamlit as st +import io +import gradio as gr import os +import tempfile +from typing import List -# Need to install PySR in separate python instance: -os.system( - """ -if [ ! -d "$HOME/.julia/environments/pysr-0.9.1" ]; then - python -c 'import pysr; pysr.install()' -fi -""" -) -import pysr -from pysr import PySRRegressor -import numpy as np -import pandas as pd - -st.title("Interactive PySR") -file_name = st.file_uploader( - "Upload a data file, with your output column labeled 'y'", type=["csv"] -) -if file_name is not None: - col1, col2 = st.columns(2) +def greet(file_obj: List[tempfile._TemporaryFileWrapper]): + # Need to install PySR in separate python instance: + os.system( + """if [ ! -d "$HOME/.julia/environments/pysr-0.9.1" ] + then + python -c 'import pysr; pysr.install()' + fi""" + ) + from pysr import PySRRegressor + import numpy as np + import pandas as pd + + df = pd.read_csv(file_obj[0]) + # y = np.array(df["y"]) + # X = df.drop(["y"], axis=1) - df = pd.read_csv(file_name) - y = np.array(df["y"]) - X = df.drop(["y"], axis=1) + # model = PySRRegressor(update=False, temp_equation_file=True) + # model.fit(X, y) - model = PySRRegressor(update=False) - model.fit(X, y) + # df_output = model.equations_ + df_output = df + df_output.to_csv("output.csv", index=False, sep="\t") - col1.header("Equation") - col2.header("Loss") - for i, row in model.equations_.iterrows(): - col1.subheader(str(row["equation"])) - col2.subheader(str(row["loss"])) + return "output.csv" - model = None -Main = None -pysr.sr.Main = None -pysr.sr.already_ran = False +demo = gr.Interface( + fn=greet, + description="A demo of PySR", + inputs=gr.File(label="Upload a CSV file", file_count=1), + outputs=gr.File(label="Equation List"), +) +# Add file to the demo: + +demo.launch() From f072863ba6b6cf12ba6a0cad1cce3916dd6177ab Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 12:33:38 -0400 Subject: [PATCH 010/136] Working app? --- gui/app.py | 72 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/gui/app.py b/gui/app.py index dcccc3170..c05c45fda 100644 --- a/gui/app.py +++ b/gui/app.py @@ -2,10 +2,18 @@ import gradio as gr import os import tempfile -from typing import List -def greet(file_obj: List[tempfile._TemporaryFileWrapper]): +def greet( + file_obj: tempfile._TemporaryFileWrapper, + col_to_fit: str, + niterations: int, + binary_operators: list, + unary_operators: list, +): + if col_to_fit == "": + raise ValueError("Please enter a column to predict") + niterations = int(niterations) # Need to install PySR in separate python instance: os.system( """if [ ! -d "$HOME/.julia/environments/pysr-0.9.1" ] @@ -17,26 +25,52 @@ def greet(file_obj: List[tempfile._TemporaryFileWrapper]): import numpy as np import pandas as pd - df = pd.read_csv(file_obj[0]) - # y = np.array(df["y"]) - # X = df.drop(["y"], axis=1) + df = pd.read_csv(file_obj.name) + y = np.array(df[col_to_fit]) + X = df.drop([col_to_fit], axis=1) - # model = PySRRegressor(update=False, temp_equation_file=True) - # model.fit(X, y) + model = PySRRegressor( + update=False, + temp_equation_file=True, + niterations=niterations, + binary_operators=binary_operators, + unary_operators=unary_operators, + ) + model.fit(X, y) + + return model.equations_ - # df_output = model.equations_ - df_output = df - df_output.to_csv("output.csv", index=False, sep="\t") - return "output.csv" +def main(): + demo = gr.Interface( + fn=greet, + description="A demo of PySR", + inputs=[ + gr.File(label="Upload a CSV file"), + gr.Textbox(placeholder="Column to predict"), + gr.Slider( + minimum=1, + maximum=1000, + value=40, + label="Number of iterations", + ), + gr.CheckboxGroup( + choices=["+", "-", "*", "/", "^"], + label="Binary Operators", + value=["+", "-", "*", "/"], + ), + gr.CheckboxGroup( + choices=["sin", "cos", "exp", "log"], + label="Unary Operators", + value=[], + ), + ], + outputs="dataframe", + ) + # Add file to the demo: + demo.launch() -demo = gr.Interface( - fn=greet, - description="A demo of PySR", - inputs=gr.File(label="Upload a CSV file", file_count=1), - outputs=gr.File(label="Equation List"), -) -# Add file to the demo: -demo.launch() +if __name__ == "__main__": + main() From 460af25a5228f8ffb889a28b780d7e5bdb453476 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 12:46:09 -0400 Subject: [PATCH 011/136] Output more useful errors --- gui/app.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/gui/app.py b/gui/app.py index c05c45fda..bb958e1ed 100644 --- a/gui/app.py +++ b/gui/app.py @@ -2,6 +2,8 @@ import gradio as gr import os import tempfile +import numpy as np +import pandas as pd def greet( @@ -11,8 +13,28 @@ def greet( binary_operators: list, unary_operators: list, ): + empty_df = pd.DataFrame( + { + "equation": [], + "loss": [], + "complexity": [], + } + ) if col_to_fit == "": - raise ValueError("Please enter a column to predict") + return ( + empty_df, + "Please enter a column to predict!", + ) + if len(binary_operators) == 0 and len(unary_operators) == 0: + return ( + empty_df, + "Please select at least one operator!", + ) + if file_obj is None: + return ( + empty_df, + "Please upload a CSV file!", + ) niterations = int(niterations) # Need to install PySR in separate python instance: os.system( @@ -22,8 +44,6 @@ def greet( fi""" ) from pysr import PySRRegressor - import numpy as np - import pandas as pd df = pd.read_csv(file_obj.name) y = np.array(df[col_to_fit]) @@ -38,7 +58,10 @@ def greet( ) model.fit(X, y) - return model.equations_ + df = model.equations_[["equation", "loss", "complexity"]] + # Convert all columns to string type: + df = df.astype(str) + return df, "Successful." def main(): @@ -65,7 +88,7 @@ def main(): value=[], ), ], - outputs="dataframe", + outputs=["dataframe", "text"], ) # Add file to the demo: From 0cd6a71e28e348d97cf8d43cf03eda7c3a5b1ba4 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 12:49:23 -0400 Subject: [PATCH 012/136] Better labels --- gui/app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gui/app.py b/gui/app.py index bb958e1ed..255a48a2d 100644 --- a/gui/app.py +++ b/gui/app.py @@ -69,8 +69,8 @@ def main(): fn=greet, description="A demo of PySR", inputs=[ - gr.File(label="Upload a CSV file"), - gr.Textbox(placeholder="Column to predict"), + gr.File(label="Upload a CSV File"), + gr.Textbox(label="Column to Predict", placeholder="y"), gr.Slider( minimum=1, maximum=1000, @@ -88,7 +88,7 @@ def main(): value=[], ), ], - outputs=["dataframe", "text"], + outputs=[gr.DataFrame(label="Equations"), gr.Textbox(label="Error Log")], ) # Add file to the demo: From d6fc94cb033bd27337f50c263e5a8bed48446fda Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 12:55:06 -0400 Subject: [PATCH 013/136] Update backend --- gui/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gui/README.md b/gui/README.md index 540da82d2..3c93a16c3 100644 --- a/gui/README.md +++ b/gui/README.md @@ -3,8 +3,8 @@ title: PySR emoji: 🌍 colorFrom: green colorTo: indigo -sdk: streamlit -sdk_version: 1.10.0 +sdk: gradio +sdk_version: 3.0.5 app_file: app.py pinned: false --- From 8614da94e7e42698648f505ea572bc6d984f52b4 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 12:57:22 -0400 Subject: [PATCH 014/136] Fix gradio issues --- gui/app.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/gui/app.py b/gui/app.py index 255a48a2d..6df9dcb5e 100644 --- a/gui/app.py +++ b/gui/app.py @@ -69,26 +69,29 @@ def main(): fn=greet, description="A demo of PySR", inputs=[ - gr.File(label="Upload a CSV File"), - gr.Textbox(label="Column to Predict", placeholder="y"), - gr.Slider( + gr.inputs.File(label="Upload a CSV File"), + gr.inputs.Textbox(label="Column to Predict", placeholder="y"), + gr.inputs.Slider( minimum=1, maximum=1000, value=40, label="Number of iterations", ), - gr.CheckboxGroup( + gr.inputs.CheckboxGroup( choices=["+", "-", "*", "/", "^"], label="Binary Operators", value=["+", "-", "*", "/"], ), - gr.CheckboxGroup( + gr.inputs.CheckboxGroup( choices=["sin", "cos", "exp", "log"], label="Unary Operators", value=[], ), ], - outputs=[gr.DataFrame(label="Equations"), gr.Textbox(label="Error Log")], + outputs=[ + gr.outputs.DataFrame(label="Equations"), + gr.outputs.Textbox(label="Error Log") + ], ) # Add file to the demo: From 3535667790dd18e7dde26b84d0ee4f3c89e6eafe Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 12:58:02 -0400 Subject: [PATCH 015/136] Update sdk version --- gui/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/README.md b/gui/README.md index 3c93a16c3..affe03b50 100644 --- a/gui/README.md +++ b/gui/README.md @@ -4,7 +4,7 @@ emoji: 🌍 colorFrom: green colorTo: indigo sdk: gradio -sdk_version: 3.0.5 +sdk_version: 3.0.15 app_file: app.py pinned: false --- From 454ec0adb9fb52c67daaaf2ef2fe691caac2c960 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 14:05:43 -0400 Subject: [PATCH 016/136] Fix use of API --- gui/app.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gui/app.py b/gui/app.py index 6df9dcb5e..303eaab93 100644 --- a/gui/app.py +++ b/gui/app.py @@ -74,23 +74,23 @@ def main(): gr.inputs.Slider( minimum=1, maximum=1000, - value=40, + default=40, label="Number of iterations", ), gr.inputs.CheckboxGroup( choices=["+", "-", "*", "/", "^"], label="Binary Operators", - value=["+", "-", "*", "/"], + default=["+", "-", "*", "/"], ), gr.inputs.CheckboxGroup( choices=["sin", "cos", "exp", "log"], label="Unary Operators", - value=[], + default=[], ), ], outputs=[ - gr.outputs.DataFrame(label="Equations"), - gr.outputs.Textbox(label="Error Log") + "dataframe", + gr.outputs.Textbox(label="Error Log"), ], ) # Add file to the demo: From dc554ea174ab051447dcd44e44fd3c4d82ab27c3 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 14:13:15 -0400 Subject: [PATCH 017/136] Change title of app --- gui/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/app.py b/gui/app.py index 303eaab93..23f609b60 100644 --- a/gui/app.py +++ b/gui/app.py @@ -67,7 +67,7 @@ def greet( def main(): demo = gr.Interface( fn=greet, - description="A demo of PySR", + description="PySR Demo", inputs=[ gr.inputs.File(label="Upload a CSV File"), gr.inputs.Textbox(label="Column to Predict", placeholder="y"), From cc248dd25e2c87dd81b6deb7d5dc23ab85915a72 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 14:35:15 -0400 Subject: [PATCH 018/136] Bump pysr version --- gui/app.py | 2 +- gui/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gui/app.py b/gui/app.py index 23f609b60..c05d78a8a 100644 --- a/gui/app.py +++ b/gui/app.py @@ -38,7 +38,7 @@ def greet( niterations = int(niterations) # Need to install PySR in separate python instance: os.system( - """if [ ! -d "$HOME/.julia/environments/pysr-0.9.1" ] + """if [ ! -d "$HOME/.julia/environments/pysr-0.9.3" ] then python -c 'import pysr; pysr.install()' fi""" diff --git a/gui/requirements.txt b/gui/requirements.txt index cef9e0133..0262f6f9e 100644 --- a/gui/requirements.txt +++ b/gui/requirements.txt @@ -1,3 +1,3 @@ -pysr==0.9.1 +pysr==0.9.3 numpy pandas \ No newline at end of file From e69aea3bb865a316297c9b61b7fc864cb349aa5d Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 14:40:02 -0400 Subject: [PATCH 019/136] More helpful error messages --- gui/app.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/gui/app.py b/gui/app.py index c05d78a8a..d658367ec 100644 --- a/gui/app.py +++ b/gui/app.py @@ -4,6 +4,7 @@ import tempfile import numpy as np import pandas as pd +import traceback as tb def greet( @@ -56,7 +57,22 @@ def greet( binary_operators=binary_operators, unary_operators=unary_operators, ) - model.fit(X, y) + try: + model.fit(X, y) + # Catch all error: + except Exception as e: + error_traceback = tb.format_exc() + if "CalledProcessError" in error_traceback: + return ( + empty_df, + "Could not initialize Julia. Error message:\n" + + error_traceback, + ) + else: + return ( + empty_df, + "Failed due to error:\n" + error_traceback, + ) df = model.equations_[["equation", "loss", "complexity"]] # Convert all columns to string type: From 71ed39768a4a0e4f4c035b7629b8fb231687cdc8 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 15:08:59 -0400 Subject: [PATCH 020/136] Manually load julia --- gui/app.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/gui/app.py b/gui/app.py index d658367ec..658de9cd6 100644 --- a/gui/app.py +++ b/gui/app.py @@ -6,6 +6,13 @@ import pandas as pd import traceback as tb +empty_df = pd.DataFrame( + { + "equation": [], + "loss": [], + "complexity": [], + } +) def greet( file_obj: tempfile._TemporaryFileWrapper, @@ -14,13 +21,6 @@ def greet( binary_operators: list, unary_operators: list, ): - empty_df = pd.DataFrame( - { - "equation": [], - "loss": [], - "complexity": [], - } - ) if col_to_fit == "": return ( empty_df, @@ -44,6 +44,21 @@ def greet( python -c 'import pysr; pysr.install()' fi""" ) + + import pysr + try: + from julia.api import JuliaInfo + info = JuliaInfo.load(julia="/usr/bin/julia") + from julia import Main as _Main + pysr.sr.Main = _Main + except Exception as e: + error_message = tb.format_exc() + return ( + empty_df, + error_message, + ) + + from pysr import PySRRegressor df = pd.read_csv(file_obj.name) From 1049889aaee50bda0bdb64f1bfd1c789665cd43d Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 15:18:40 -0400 Subject: [PATCH 021/136] More helpful error message for julia install --- gui/app.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/gui/app.py b/gui/app.py index 658de9cd6..e47417a45 100644 --- a/gui/app.py +++ b/gui/app.py @@ -1,5 +1,6 @@ import io import gradio as gr +import sys import os import tempfile import numpy as np @@ -45,6 +46,13 @@ def greet( fi""" ) + # Check if /usr/bin/julia exists: + if not os.path.isfile("/usr/bin/julia"): + return ( + empty_df, + "Julia is not installed! (assuming '/usr/bin/julia')", + ) + import pysr try: from julia.api import JuliaInfo From 3c09196f759805c6272db340d1b7edf7c92b762b Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 15:30:00 -0400 Subject: [PATCH 022/136] Try to install Julia within app --- gui/app.py | 23 ++++++++++++----------- gui/requirements.txt | 3 --- 2 files changed, 12 insertions(+), 14 deletions(-) delete mode 100644 gui/requirements.txt diff --git a/gui/app.py b/gui/app.py index e47417a45..0c918424b 100644 --- a/gui/app.py +++ b/gui/app.py @@ -38,21 +38,24 @@ def greet( "Please upload a CSV file!", ) niterations = int(niterations) + + # Install Julia: + os.system( + """if [ ! -d "~/julia" ]; then + wget https://julialang-s3.julialang.org/bin/linux/x64/1.7/julia-1.7.3-linux-x86_64.tar.gz + tar zxvf julia-1.7.3-linux-x86_64.tar.gz + mkdir ~/julia + mv julia-1.7.3-linux/* ~/julia/ + fi""") + os.environ["PATH"] += ":~/julia/bin/" # Need to install PySR in separate python instance: os.system( - """if [ ! -d "$HOME/.julia/environments/pysr-0.9.3" ] - then + """if [ ! -d "$HOME/.julia/environments/pysr-0.9.3" ]; then + export PATH="$PATH:$HOME/julia/bin/" python -c 'import pysr; pysr.install()' fi""" ) - # Check if /usr/bin/julia exists: - if not os.path.isfile("/usr/bin/julia"): - return ( - empty_df, - "Julia is not installed! (assuming '/usr/bin/julia')", - ) - import pysr try: from julia.api import JuliaInfo @@ -65,8 +68,6 @@ def greet( empty_df, error_message, ) - - from pysr import PySRRegressor df = pd.read_csv(file_obj.name) diff --git a/gui/requirements.txt b/gui/requirements.txt deleted file mode 100644 index 0262f6f9e..000000000 --- a/gui/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -pysr==0.9.3 -numpy -pandas \ No newline at end of file From 48482f081bf6a0b0d1d944c2c47494e4647e82ee Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 15:33:53 -0400 Subject: [PATCH 023/136] Deleted wrong file --- gui/packages.txt | 1 - gui/requirements.txt | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) delete mode 100644 gui/packages.txt create mode 100644 gui/requirements.txt diff --git a/gui/packages.txt b/gui/packages.txt deleted file mode 100644 index 78dd3ada4..000000000 --- a/gui/packages.txt +++ /dev/null @@ -1 +0,0 @@ -julia \ No newline at end of file diff --git a/gui/requirements.txt b/gui/requirements.txt new file mode 100644 index 000000000..0262f6f9e --- /dev/null +++ b/gui/requirements.txt @@ -0,0 +1,3 @@ +pysr==0.9.3 +numpy +pandas \ No newline at end of file From dcd98bec920d8adfdb0d19cdc6555f942153fb78 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 15:34:23 -0400 Subject: [PATCH 024/136] Fix pathname to Julia install --- gui/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/app.py b/gui/app.py index 0c918424b..4bec776ab 100644 --- a/gui/app.py +++ b/gui/app.py @@ -45,7 +45,7 @@ def greet( wget https://julialang-s3.julialang.org/bin/linux/x64/1.7/julia-1.7.3-linux-x86_64.tar.gz tar zxvf julia-1.7.3-linux-x86_64.tar.gz mkdir ~/julia - mv julia-1.7.3-linux/* ~/julia/ + mv julia-1.7.3/* ~/julia/ fi""") os.environ["PATH"] += ":~/julia/bin/" # Need to install PySR in separate python instance: From e8191c413b27dced70062033020f175b8361d262 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 15:41:43 -0400 Subject: [PATCH 025/136] Fix issues with not identifying folder --- gui/app.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gui/app.py b/gui/app.py index 4bec776ab..f8cfa6ef9 100644 --- a/gui/app.py +++ b/gui/app.py @@ -41,17 +41,17 @@ def greet( # Install Julia: os.system( - """if [ ! -d "~/julia" ]; then + """if [ ! -d "/home/user/julia" ]; then wget https://julialang-s3.julialang.org/bin/linux/x64/1.7/julia-1.7.3-linux-x86_64.tar.gz tar zxvf julia-1.7.3-linux-x86_64.tar.gz - mkdir ~/julia - mv julia-1.7.3/* ~/julia/ + mkdir /home/user/julia + mv julia-1.7.3/* /home/user/julia/ fi""") - os.environ["PATH"] += ":~/julia/bin/" + os.environ["PATH"] += ":/home/user/julia/bin/" # Need to install PySR in separate python instance: os.system( - """if [ ! -d "$HOME/.julia/environments/pysr-0.9.3" ]; then - export PATH="$PATH:$HOME/julia/bin/" + """if [ ! -d "/home/user/.julia/environments/pysr-0.9.3" ]; then + export PATH="$PATH:/home/user/julia/bin/" python -c 'import pysr; pysr.install()' fi""" ) From 0b704756fd7736d9dea975d007552547b55e4a2d Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 15:44:35 -0400 Subject: [PATCH 026/136] Fix path to Julia --- gui/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/app.py b/gui/app.py index f8cfa6ef9..0c9296cfd 100644 --- a/gui/app.py +++ b/gui/app.py @@ -59,7 +59,7 @@ def greet( import pysr try: from julia.api import JuliaInfo - info = JuliaInfo.load(julia="/usr/bin/julia") + info = JuliaInfo.load(julia="/home/user/julia/bin/julia") from julia import Main as _Main pysr.sr.Main = _Main except Exception as e: From 02c14cb0d0db4de0d2e7b72ab4a166141a2f91f3 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 15:58:06 -0400 Subject: [PATCH 027/136] Protection against segfaults --- gui/app.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/gui/app.py b/gui/app.py index 0c9296cfd..752c301bd 100644 --- a/gui/app.py +++ b/gui/app.py @@ -14,6 +14,7 @@ "complexity": [], } ) +Main = None def greet( file_obj: tempfile._TemporaryFileWrapper, @@ -22,6 +23,12 @@ def greet( binary_operators: list, unary_operators: list, ): + global Main + if Main is not None: + return ( + empty_df, + "Refresh the page to run with a different configuration." + ) if col_to_fit == "": return ( empty_df, From ad955c1e0f751d1b722222498030595905d6cdd2 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 16:11:19 -0400 Subject: [PATCH 028/136] Run pysr in secondary instance --- gui/app.py | 80 +++------------------------------------- gui/install_pysr.sh | 12 ++++++ gui/run_pysr_and_save.py | 68 ++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 75 deletions(-) create mode 100644 gui/install_pysr.sh create mode 100644 gui/run_pysr_and_save.py diff --git a/gui/app.py b/gui/app.py index 752c301bd..c8c5c6253 100644 --- a/gui/app.py +++ b/gui/app.py @@ -1,11 +1,7 @@ -import io import gradio as gr -import sys import os import tempfile -import numpy as np import pandas as pd -import traceback as tb empty_df = pd.DataFrame( { @@ -14,7 +10,6 @@ "complexity": [], } ) -Main = None def greet( file_obj: tempfile._TemporaryFileWrapper, @@ -23,12 +18,6 @@ def greet( binary_operators: list, unary_operators: list, ): - global Main - if Main is not None: - return ( - empty_df, - "Refresh the page to run with a different configuration." - ) if col_to_fit == "": return ( empty_df, @@ -44,71 +33,12 @@ def greet( empty_df, "Please upload a CSV file!", ) - niterations = int(niterations) - - # Install Julia: - os.system( - """if [ ! -d "/home/user/julia" ]; then - wget https://julialang-s3.julialang.org/bin/linux/x64/1.7/julia-1.7.3-linux-x86_64.tar.gz - tar zxvf julia-1.7.3-linux-x86_64.tar.gz - mkdir /home/user/julia - mv julia-1.7.3/* /home/user/julia/ - fi""") - os.environ["PATH"] += ":/home/user/julia/bin/" - # Need to install PySR in separate python instance: - os.system( - """if [ ! -d "/home/user/.julia/environments/pysr-0.9.3" ]; then - export PATH="$PATH:/home/user/julia/bin/" - python -c 'import pysr; pysr.install()' - fi""" - ) - - import pysr - try: - from julia.api import JuliaInfo - info = JuliaInfo.load(julia="/home/user/julia/bin/julia") - from julia import Main as _Main - pysr.sr.Main = _Main - except Exception as e: - error_message = tb.format_exc() - return ( - empty_df, - error_message, - ) - from pysr import PySRRegressor - - df = pd.read_csv(file_obj.name) - y = np.array(df[col_to_fit]) - X = df.drop([col_to_fit], axis=1) - - model = PySRRegressor( - update=False, - temp_equation_file=True, - niterations=niterations, - binary_operators=binary_operators, - unary_operators=unary_operators, - ) - try: - model.fit(X, y) - # Catch all error: - except Exception as e: - error_traceback = tb.format_exc() - if "CalledProcessError" in error_traceback: - return ( - empty_df, - "Could not initialize Julia. Error message:\n" - + error_traceback, - ) - else: - return ( - empty_df, - "Failed due to error:\n" + error_traceback, - ) - df = model.equations_[["equation", "loss", "complexity"]] - # Convert all columns to string type: - df = df.astype(str) - return df, "Successful." + os.system("bash install_pysr.sh") + os.system(f"python run_pysr_and_save.py --niterations {niterations} --binary_operators '{binary_operators}' --unary_operators '{unary_operators}' --col_to_fit {col_to_fit} --filename {file_obj.name}") + df = pd.read_csv("pysr_output.csv") + error_log = open("error.log", "r").read() + return df, error_log def main(): diff --git a/gui/install_pysr.sh b/gui/install_pysr.sh new file mode 100644 index 000000000..76c226e6d --- /dev/null +++ b/gui/install_pysr.sh @@ -0,0 +1,12 @@ +import os + +# Install Julia: +if [ ! -f "/home/user/.local/bin/julia" ]; then + bash -ci "$(curl -fsSL https://raw.githubusercontent.com/abelsiqueira/jill/main/jill.sh)" +fi + +# Need to install PySR in separate python instance: +if [ ! -d "/home/user/.julia/environments/pysr-0.9.3" ]; then + export PATH="$PATH:/home/user/julia/bin/" + python -c 'import pysr; pysr.install()' +fi \ No newline at end of file diff --git a/gui/run_pysr_and_save.py b/gui/run_pysr_and_save.py new file mode 100644 index 000000000..24d151b12 --- /dev/null +++ b/gui/run_pysr_and_save.py @@ -0,0 +1,68 @@ +import os +import pandas as pd +import traceback as tb +import numpy as np +from argparse import ArgumentParser + +# Args: +# niterations +# binary_operators +# unary_operators +# col_to_fit + +empty_df = pd.DataFrame( + { + "equation": [], + "loss": [], + "complexity": [], + } +) + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("niterations", type=int) + parser.add_argument("binary_operators", type=str) + parser.add_argument("unary_operators", type=str) + parser.add_argument("col_to_fit", type=str) + parser.add_argument("filename", type=str) + args = parser.parse_args() + niterations = args.niterations + binary_operators = eval(args.binary_operators) + unary_operators = eval(args.unary_operators) + col_to_fit = args.col_to_fit + filename = args.filename + + os.environ["PATH"] += ":/home/user/.local/bin/" + + try: + import pysr + from julia.api import JuliaInfo + info = JuliaInfo.load(julia="/home/user/.local/bin/julia") + from julia import Main as _Main + pysr.sr.Main = _Main + + from pysr import PySRRegressor + + df = pd.read_csv(filename) + y = np.array(df[col_to_fit]) + X = df.drop([col_to_fit], axis=1) + + model = PySRRegressor( + update=False, + niterations=niterations, + binary_operators=binary_operators, + unary_operators=unary_operators, + ) + model.fit(X, y) + + df = model.equations_[["equation", "loss", "complexity"]] + # Convert all columns to string type: + df = df.astype(str) + df.to_csv("pysr_output.csv", index=False) + except Exception as e: + error_message = tb.format_exc() + # Dump to file: + empty_df.to_csv("pysr_output.csv", index=False) + with open("error.log", "w") as f: + f.write(error_message) + \ No newline at end of file From 7078b013b20800fe46866bc9118228f591eaae25 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 16:14:30 -0400 Subject: [PATCH 029/136] Avoid jill.sh to install julia --- gui/install_pysr.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gui/install_pysr.sh b/gui/install_pysr.sh index 76c226e6d..90cf2303e 100644 --- a/gui/install_pysr.sh +++ b/gui/install_pysr.sh @@ -1,8 +1,11 @@ import os # Install Julia: -if [ ! -f "/home/user/.local/bin/julia" ]; then - bash -ci "$(curl -fsSL https://raw.githubusercontent.com/abelsiqueira/jill/main/jill.sh)" +if [ ! -d "/home/user/julia" ]; then + wget https://julialang-s3.julialang.org/bin/linux/x64/1.7/julia-1.7.3-linux-x86_64.tar.gz + tar zxvf julia-1.7.3-linux-x86_64.tar.gz + mkdir /home/user/julia + mv julia-1.7.3/* /home/user/.local/ fi # Need to install PySR in separate python instance: From fd867c55d2329c6a4924156f249a05b418478f68 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 16:21:01 -0400 Subject: [PATCH 030/136] Back to jill.sh --- gui/install_pysr.sh | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/gui/install_pysr.sh b/gui/install_pysr.sh index 90cf2303e..222d31f25 100644 --- a/gui/install_pysr.sh +++ b/gui/install_pysr.sh @@ -1,11 +1,10 @@ import os # Install Julia: -if [ ! -d "/home/user/julia" ]; then - wget https://julialang-s3.julialang.org/bin/linux/x64/1.7/julia-1.7.3-linux-x86_64.tar.gz - tar zxvf julia-1.7.3-linux-x86_64.tar.gz - mkdir /home/user/julia - mv julia-1.7.3/* /home/user/.local/ +if [ ! -f "/home/user/.local/bin/julia" ]; then + wget https://raw.githubusercontent.com/abelsiqueira/jill/main/jill.sh + chmod a+x jill.sh + ./jill.sh --version 1.7.3 -y fi # Need to install PySR in separate python instance: From 10688316a90b5ec0c9cacecd2562e871ba08b271 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 16:23:19 -0400 Subject: [PATCH 031/136] Automatically install PySR at init --- gui/app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gui/app.py b/gui/app.py index c8c5c6253..3c7f16550 100644 --- a/gui/app.py +++ b/gui/app.py @@ -11,6 +11,8 @@ } ) +os.system("bash install_pysr.sh") + def greet( file_obj: tempfile._TemporaryFileWrapper, col_to_fit: str, @@ -34,7 +36,6 @@ def greet( "Please upload a CSV file!", ) - os.system("bash install_pysr.sh") os.system(f"python run_pysr_and_save.py --niterations {niterations} --binary_operators '{binary_operators}' --unary_operators '{unary_operators}' --col_to_fit {col_to_fit} --filename {file_obj.name}") df = pd.read_csv("pysr_output.csv") error_log = open("error.log", "r").read() From a1300adc611ff4241733e7f9a292e4dc71dc9a9b Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 16:34:02 -0400 Subject: [PATCH 032/136] Correct arguments --- gui/run_pysr_and_save.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gui/run_pysr_and_save.py b/gui/run_pysr_and_save.py index 24d151b12..08bc39e6a 100644 --- a/gui/run_pysr_and_save.py +++ b/gui/run_pysr_and_save.py @@ -20,11 +20,11 @@ if __name__ == "__main__": parser = ArgumentParser() - parser.add_argument("niterations", type=int) - parser.add_argument("binary_operators", type=str) - parser.add_argument("unary_operators", type=str) - parser.add_argument("col_to_fit", type=str) - parser.add_argument("filename", type=str) + parser.add_argument("--niterations", type=int) + parser.add_argument("--binary_operators", type=str) + parser.add_argument("--unary_operators", type=str) + parser.add_argument("--col_to_fit", type=str) + parser.add_argument("--filename", type=str) args = parser.parse_args() niterations = args.niterations binary_operators = eval(args.binary_operators) From deeb73ee6b1a72486028440871b9c8a7ab0c33b2 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 16:48:09 -0400 Subject: [PATCH 033/136] Prevent quotes being removed from list --- gui/app.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/gui/app.py b/gui/app.py index 3c7f16550..5e83c82d8 100644 --- a/gui/app.py +++ b/gui/app.py @@ -13,6 +13,7 @@ os.system("bash install_pysr.sh") + def greet( file_obj: tempfile._TemporaryFileWrapper, col_to_fit: str, @@ -36,7 +37,16 @@ def greet( "Please upload a CSV file!", ) - os.system(f"python run_pysr_and_save.py --niterations {niterations} --binary_operators '{binary_operators}' --unary_operators '{unary_operators}' --col_to_fit {col_to_fit} --filename {file_obj.name}") + binary_operators = str(binary_operators).replace("'", '"') + unary_operators = str(unary_operators).replace("'", '"') + os.system( + f"python run_pysr_and_save.py " + f"--niterations {niterations} " + f"--binary_operators '{binary_operators}' " + f"--unary_operators '{unary_operators}' " + f"--col_to_fit {col_to_fit} " + f"--filename {file_obj.name}" + ) df = pd.read_csv("pysr_output.csv") error_log = open("error.log", "r").read() return df, error_log From c56b8863eeb91c08bcc1e7d379ed14c5f2453b70 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 17:19:59 -0400 Subject: [PATCH 034/136] Fix error with not printing to error log --- gui/run_pysr_and_save.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/gui/run_pysr_and_save.py b/gui/run_pysr_and_save.py index 08bc39e6a..eca1efe35 100644 --- a/gui/run_pysr_and_save.py +++ b/gui/run_pysr_and_save.py @@ -58,11 +58,13 @@ df = model.equations_[["equation", "loss", "complexity"]] # Convert all columns to string type: df = df.astype(str) - df.to_csv("pysr_output.csv", index=False) + error_message = "Success!" except Exception as e: error_message = tb.format_exc() # Dump to file: - empty_df.to_csv("pysr_output.csv", index=False) - with open("error.log", "w") as f: - f.write(error_message) + df = empty_df + + df.to_csv("pysr_output.csv", index=False) + with open("error.log", "w") as f: + f.write(error_message) \ No newline at end of file From fadaa8d2a58fccaca14f044666baf5dd7bcfeb90 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 17:31:23 -0400 Subject: [PATCH 035/136] More warnings before running --- gui/app.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/gui/app.py b/gui/app.py index 5e83c82d8..af9d6bcbb 100644 --- a/gui/app.py +++ b/gui/app.py @@ -20,6 +20,7 @@ def greet( niterations: int, binary_operators: list, unary_operators: list, + force_run: bool, ): if col_to_fit == "": return ( @@ -36,6 +37,31 @@ def greet( empty_df, "Please upload a CSV file!", ) + # Look at some statistics of the file: + df = pd.read_csv(file_obj.name) + if len(df) == 0: + return ( + empty_df, + "The file is empty!", + ) + if len(df.columns) == 1: + return ( + empty_df, + "The file has only one column!", + ) + if col_to_fit not in df.columns: + return ( + empty_df, + "The column to predict is not in the file!", + ) + if len(df) > 1000 and not force_run: + return ( + empty_df, + "You have uploaded a file with more than 2000 rows. " + "This will take very long to run. " + "Please upload a subsample of the data, " + "or check the box 'Ignore Warnings'." + ) binary_operators = str(binary_operators).replace("'", '"') unary_operators = str(unary_operators).replace("'", '"') @@ -75,6 +101,10 @@ def main(): label="Unary Operators", default=[], ), + gr.inputs.Checkbox( + default=False, + label="Ignore Warnings", + ) ], outputs=[ "dataframe", From 48aee58771ab9b82ad82888aa5fec1162f369116 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 17:31:39 -0400 Subject: [PATCH 036/136] Don't print progress when running --- gui/run_pysr_and_save.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gui/run_pysr_and_save.py b/gui/run_pysr_and_save.py index eca1efe35..73637c384 100644 --- a/gui/run_pysr_and_save.py +++ b/gui/run_pysr_and_save.py @@ -49,6 +49,7 @@ model = PySRRegressor( update=False, + progress=False, niterations=niterations, binary_operators=binary_operators, unary_operators=unary_operators, From 458f2f73e67bdeec144999acb2eb0201b7d10a28 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 17:43:56 -0400 Subject: [PATCH 037/136] Add more unary operators --- gui/app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gui/app.py b/gui/app.py index af9d6bcbb..161b68144 100644 --- a/gui/app.py +++ b/gui/app.py @@ -97,7 +97,8 @@ def main(): default=["+", "-", "*", "/"], ), gr.inputs.CheckboxGroup( - choices=["sin", "cos", "exp", "log"], + choices=["sin", "cos", "exp", "log", "square", "cube", + "sqrt", "abs", "tan"], label="Unary Operators", default=[], ), From bd3106ea9bb7b04f4ec49b1cdbda26c494fbefba Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 17:53:57 -0400 Subject: [PATCH 038/136] Extended app description --- gui/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/app.py b/gui/app.py index 161b68144..101691043 100644 --- a/gui/app.py +++ b/gui/app.py @@ -81,7 +81,7 @@ def greet( def main(): demo = gr.Interface( fn=greet, - description="PySR Demo", + description="Symbolic Regression with PySR. Watch search progress by clicking 'See logs'!", inputs=[ gr.inputs.File(label="Upload a CSV File"), gr.inputs.Textbox(label="Column to Predict", placeholder="y"), From a704d53f5f5452687081c7b541e840fd4a31b10e Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 17:54:33 -0400 Subject: [PATCH 039/136] Output parameters for local runs --- gui/run_pysr_and_save.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/gui/run_pysr_and_save.py b/gui/run_pysr_and_save.py index 73637c384..a1c3c891e 100644 --- a/gui/run_pysr_and_save.py +++ b/gui/run_pysr_and_save.py @@ -37,8 +37,10 @@ try: import pysr from julia.api import JuliaInfo + info = JuliaInfo.load(julia="/home/user/.local/bin/julia") from julia import Main as _Main + pysr.sr.Main = _Main from pysr import PySRRegressor @@ -59,7 +61,11 @@ df = model.equations_[["equation", "loss", "complexity"]] # Convert all columns to string type: df = df.astype(str) - error_message = "Success!" + error_message = ( + "Success!\n" + f"You may run the model locally (faster) with " + f"the following parameters:\n" + str(model.get_params()) + ) except Exception as e: error_message = tb.format_exc() # Dump to file: @@ -68,4 +74,3 @@ df.to_csv("pysr_output.csv", index=False) with open("error.log", "w") as f: f.write(error_message) - \ No newline at end of file From 6644c43e63342439a1daf836b33966643b5201fd Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 17:56:58 -0400 Subject: [PATCH 040/136] Enable adjustable maxsize --- gui/app.py | 10 +++++++++- gui/run_pysr_and_save.py | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/gui/app.py b/gui/app.py index 101691043..770292eb5 100644 --- a/gui/app.py +++ b/gui/app.py @@ -16,6 +16,7 @@ def greet( file_obj: tempfile._TemporaryFileWrapper, + maxsize: int, col_to_fit: str, niterations: int, binary_operators: list, @@ -68,6 +69,7 @@ def greet( os.system( f"python run_pysr_and_save.py " f"--niterations {niterations} " + f"--maxsize {maxsize} " f"--binary_operators '{binary_operators}' " f"--unary_operators '{unary_operators}' " f"--col_to_fit {col_to_fit} " @@ -89,7 +91,13 @@ def main(): minimum=1, maximum=1000, default=40, - label="Number of iterations", + label="Number of Iterations", + ), + gr.inputs.Slider( + minimum=7, + maximum=35, + default=20, + label="Maximum Complexity" ), gr.inputs.CheckboxGroup( choices=["+", "-", "*", "/", "^"], diff --git a/gui/run_pysr_and_save.py b/gui/run_pysr_and_save.py index a1c3c891e..899e90476 100644 --- a/gui/run_pysr_and_save.py +++ b/gui/run_pysr_and_save.py @@ -21,6 +21,7 @@ if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("--niterations", type=int) + parser.add_argument("--maxsize", type=int) parser.add_argument("--binary_operators", type=str) parser.add_argument("--unary_operators", type=str) parser.add_argument("--col_to_fit", type=str) @@ -31,6 +32,7 @@ unary_operators = eval(args.unary_operators) col_to_fit = args.col_to_fit filename = args.filename + maxsize = args.maxsize os.environ["PATH"] += ":/home/user/.local/bin/" @@ -52,6 +54,7 @@ model = PySRRegressor( update=False, progress=False, + maxsize=maxsize, niterations=niterations, binary_operators=binary_operators, unary_operators=unary_operators, From ea8feced16b05b9e6a92d36a16c74f7636cfc3b0 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 18:03:02 -0400 Subject: [PATCH 041/136] Fix slider to be int steps --- gui/app.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gui/app.py b/gui/app.py index 770292eb5..c69e71dad 100644 --- a/gui/app.py +++ b/gui/app.py @@ -92,12 +92,14 @@ def main(): maximum=1000, default=40, label="Number of Iterations", + step=1, ), gr.inputs.Slider( minimum=7, maximum=35, default=20, - label="Maximum Complexity" + label="Maximum Complexity", + step=1, ), gr.inputs.CheckboxGroup( choices=["+", "-", "*", "/", "^"], From 59a445b24b0d313f43295edffd128d42ea50e272 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 18:08:00 -0400 Subject: [PATCH 042/136] Fix issue with col not found --- gui/app.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/gui/app.py b/gui/app.py index c69e71dad..ee3a709c8 100644 --- a/gui/app.py +++ b/gui/app.py @@ -50,11 +50,6 @@ def greet( empty_df, "The file has only one column!", ) - if col_to_fit not in df.columns: - return ( - empty_df, - "The column to predict is not in the file!", - ) if len(df) > 1000 and not force_run: return ( empty_df, From efb57c1b1ce61a89385a8d538ca227afc4c9350b Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 18:13:22 -0400 Subject: [PATCH 043/136] Fix order of maxsize parameter --- gui/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/app.py b/gui/app.py index ee3a709c8..1ffdaa2e8 100644 --- a/gui/app.py +++ b/gui/app.py @@ -16,9 +16,9 @@ def greet( file_obj: tempfile._TemporaryFileWrapper, - maxsize: int, col_to_fit: str, niterations: int, + maxsize: int, binary_operators: list, unary_operators: list, force_run: bool, From 3dc13509e3675d4249d8f6c08fc8193b2a6ed44d Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 18:17:22 -0400 Subject: [PATCH 044/136] More helpful error message with missing columns --- gui/app.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/gui/app.py b/gui/app.py index 1ffdaa2e8..a5a402b9c 100644 --- a/gui/app.py +++ b/gui/app.py @@ -50,13 +50,19 @@ def greet( empty_df, "The file has only one column!", ) + if col_to_fit not in df.columns: + return ( + empty_df, + f"The column to predict, {col_to_fit}, is not in the file!" + f"I found {df.columns}.", + ) if len(df) > 1000 and not force_run: return ( empty_df, "You have uploaded a file with more than 2000 rows. " "This will take very long to run. " "Please upload a subsample of the data, " - "or check the box 'Ignore Warnings'." + "or check the box 'Ignore Warnings'.", ) binary_operators = str(binary_operators).replace("'", '"') @@ -102,15 +108,24 @@ def main(): default=["+", "-", "*", "/"], ), gr.inputs.CheckboxGroup( - choices=["sin", "cos", "exp", "log", "square", "cube", - "sqrt", "abs", "tan"], + choices=[ + "sin", + "cos", + "exp", + "log", + "square", + "cube", + "sqrt", + "abs", + "tan", + ], label="Unary Operators", default=[], ), gr.inputs.Checkbox( default=False, label="Ignore Warnings", - ) + ), ], outputs=[ "dataframe", From 81b651bde4074fcd747152d28101bb11f3f4bfb1 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 22 Jun 2022 18:26:26 -0400 Subject: [PATCH 045/136] Better description for running locally --- gui/run_pysr_and_save.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/gui/run_pysr_and_save.py b/gui/run_pysr_and_save.py index 899e90476..15841c046 100644 --- a/gui/run_pysr_and_save.py +++ b/gui/run_pysr_and_save.py @@ -67,8 +67,15 @@ error_message = ( "Success!\n" f"You may run the model locally (faster) with " - f"the following parameters:\n" + str(model.get_params()) - ) + f"the following parameters:" + +f""" +model = PySRRegressor( + niterations={niterations}, + binary_operators={str(binary_operators)}, + unary_operators={str(unary_operators)}, + maxsize={maxsize}, +) +model.fit(X, y)""") except Exception as e: error_message = tb.format_exc() # Dump to file: From c3d41aaaa11d999d4238b29955174eedc2a2e7e3 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 16 Nov 2022 14:48:59 -0500 Subject: [PATCH 046/136] Update PySR version --- gui/install_pysr.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gui/install_pysr.sh b/gui/install_pysr.sh index 222d31f25..9ddbb39d3 100644 --- a/gui/install_pysr.sh +++ b/gui/install_pysr.sh @@ -4,11 +4,11 @@ import os if [ ! -f "/home/user/.local/bin/julia" ]; then wget https://raw.githubusercontent.com/abelsiqueira/jill/main/jill.sh chmod a+x jill.sh - ./jill.sh --version 1.7.3 -y + ./jill.sh --version 1.8.2 -y fi # Need to install PySR in separate python instance: -if [ ! -d "/home/user/.julia/environments/pysr-0.9.3" ]; then +if [ ! -d "/home/user/.julia/environments/pysr-0.11.9" ]; then export PATH="$PATH:/home/user/julia/bin/" python -c 'import pysr; pysr.install()' fi \ No newline at end of file From 5a4bba91f25d889736d8eeccef9c522ea9e121d0 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 16 Nov 2022 14:52:06 -0500 Subject: [PATCH 047/136] Update requirements.txt --- gui/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/requirements.txt b/gui/requirements.txt index 0262f6f9e..34832856c 100644 --- a/gui/requirements.txt +++ b/gui/requirements.txt @@ -1,3 +1,3 @@ -pysr==0.9.3 +pysr==0.11.9 numpy pandas \ No newline at end of file From ee0ed3e4eff8b0a0abbdc33f714ff8d5754c01e7 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 16 Nov 2022 14:55:46 -0500 Subject: [PATCH 048/136] Fix path to Julia --- gui/install_pysr.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/install_pysr.sh b/gui/install_pysr.sh index 9ddbb39d3..3885cfc03 100644 --- a/gui/install_pysr.sh +++ b/gui/install_pysr.sh @@ -9,6 +9,6 @@ fi # Need to install PySR in separate python instance: if [ ! -d "/home/user/.julia/environments/pysr-0.11.9" ]; then - export PATH="$PATH:/home/user/julia/bin/" + export PATH="$HOME/.local/bin:$PATH" python -c 'import pysr; pysr.install()' fi \ No newline at end of file From 65baea3e36a0f2f1c53456b53f592f79bee19e73 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 02:32:08 +0000 Subject: [PATCH 049/136] Update to PySR 0.18.1 --- gui/app.py | 2 -- gui/install_pysr.sh | 14 ------------ gui/requirements.txt | 2 +- gui/run_pysr_and_save.py | 49 ++++++++++++++-------------------------- 4 files changed, 18 insertions(+), 49 deletions(-) delete mode 100644 gui/install_pysr.sh diff --git a/gui/app.py b/gui/app.py index a5a402b9c..bcd89cca7 100644 --- a/gui/app.py +++ b/gui/app.py @@ -11,8 +11,6 @@ } ) -os.system("bash install_pysr.sh") - def greet( file_obj: tempfile._TemporaryFileWrapper, diff --git a/gui/install_pysr.sh b/gui/install_pysr.sh deleted file mode 100644 index 3885cfc03..000000000 --- a/gui/install_pysr.sh +++ /dev/null @@ -1,14 +0,0 @@ -import os - -# Install Julia: -if [ ! -f "/home/user/.local/bin/julia" ]; then - wget https://raw.githubusercontent.com/abelsiqueira/jill/main/jill.sh - chmod a+x jill.sh - ./jill.sh --version 1.8.2 -y -fi - -# Need to install PySR in separate python instance: -if [ ! -d "/home/user/.julia/environments/pysr-0.11.9" ]; then - export PATH="$HOME/.local/bin:$PATH" - python -c 'import pysr; pysr.install()' -fi \ No newline at end of file diff --git a/gui/requirements.txt b/gui/requirements.txt index 34832856c..3259413bc 100644 --- a/gui/requirements.txt +++ b/gui/requirements.txt @@ -1,3 +1,3 @@ -pysr==0.11.9 +pysr==0.18.1 numpy pandas \ No newline at end of file diff --git a/gui/run_pysr_and_save.py b/gui/run_pysr_and_save.py index 15841c046..de843ae8b 100644 --- a/gui/run_pysr_and_save.py +++ b/gui/run_pysr_and_save.py @@ -2,6 +2,7 @@ import pandas as pd import traceback as tb import numpy as np +from pysr import PySRRegressor from argparse import ArgumentParser # Args: @@ -34,37 +35,25 @@ filename = args.filename maxsize = args.maxsize - os.environ["PATH"] += ":/home/user/.local/bin/" - try: - import pysr - from julia.api import JuliaInfo + df = pd.read_csv(filename) + y = np.array(df[col_to_fit]) + X = df.drop([col_to_fit], axis=1) - info = JuliaInfo.load(julia="/home/user/.local/bin/julia") - from julia import Main as _Main + model = PySRRegressor( + progress=False, + verbosity=0, + maxsize=maxsize, + niterations=niterations, + binary_operators=binary_operators, + unary_operators=unary_operators, + ) + model.fit(X, y) - pysr.sr.Main = _Main - - from pysr import PySRRegressor - - df = pd.read_csv(filename) - y = np.array(df[col_to_fit]) - X = df.drop([col_to_fit], axis=1) - - model = PySRRegressor( - update=False, - progress=False, - maxsize=maxsize, - niterations=niterations, - binary_operators=binary_operators, - unary_operators=unary_operators, - ) - model.fit(X, y) - - df = model.equations_[["equation", "loss", "complexity"]] - # Convert all columns to string type: - df = df.astype(str) - error_message = ( + df = model.equations_[["equation", "loss", "complexity"]] + # Convert all columns to string type: + df = df.astype(str) + error_message = ( "Success!\n" f"You may run the model locally (faster) with " f"the following parameters:" @@ -76,10 +65,6 @@ maxsize={maxsize}, ) model.fit(X, y)""") - except Exception as e: - error_message = tb.format_exc() - # Dump to file: - df = empty_df df.to_csv("pysr_output.csv", index=False) with open("error.log", "w") as f: From 13219e68404e7f84d073b4c55a06a0c57721a21f Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 02:47:32 +0000 Subject: [PATCH 050/136] Move everything to app.py --- gui/app.py | 49 +++++++++++++++++++-------- gui/run_pysr_and_save.py | 71 ---------------------------------------- 2 files changed, 36 insertions(+), 84 deletions(-) delete mode 100644 gui/run_pysr_and_save.py diff --git a/gui/app.py b/gui/app.py index bcd89cca7..cf3bf7831 100644 --- a/gui/app.py +++ b/gui/app.py @@ -1,7 +1,10 @@ import gradio as gr +import numpy as np import os -import tempfile import pandas as pd +import pysr +import tempfile +from typing import Optional empty_df = pd.DataFrame( { @@ -13,7 +16,7 @@ def greet( - file_obj: tempfile._TemporaryFileWrapper, + file_obj: Optional[tempfile._TemporaryFileWrapper], col_to_fit: str, niterations: int, maxsize: int, @@ -65,19 +68,39 @@ def greet( binary_operators = str(binary_operators).replace("'", '"') unary_operators = str(unary_operators).replace("'", '"') - os.system( - f"python run_pysr_and_save.py " - f"--niterations {niterations} " - f"--maxsize {maxsize} " - f"--binary_operators '{binary_operators}' " - f"--unary_operators '{unary_operators}' " - f"--col_to_fit {col_to_fit} " - f"--filename {file_obj.name}" + + df = pd.read_csv(file_obj) + y = np.array(df[col_to_fit]) + X = df.drop([col_to_fit], axis=1) + + model = pysr.PySRRegressor( + progress=False, + verbosity=0, + maxsize=maxsize, + niterations=niterations, + binary_operators=binary_operators, + unary_operators=unary_operators, ) - df = pd.read_csv("pysr_output.csv") - error_log = open("error.log", "r").read() - return df, error_log + model.fit(X, y) + + df = model.equations_[["equation", "loss", "complexity"]] + # Convert all columns to string type: + df = df.astype(str) + msg = ( + "Success!\n" + f"You may run the model locally (faster) with " + f"the following parameters:" + +f""" +model = PySRRegressor( + niterations={niterations}, + binary_operators={str(binary_operators)}, + unary_operators={str(unary_operators)}, + maxsize={maxsize}, +) +model.fit(X, y)""") + df.to_csv("pysr_output.csv", index=False) + return df, msg def main(): demo = gr.Interface( diff --git a/gui/run_pysr_and_save.py b/gui/run_pysr_and_save.py deleted file mode 100644 index de843ae8b..000000000 --- a/gui/run_pysr_and_save.py +++ /dev/null @@ -1,71 +0,0 @@ -import os -import pandas as pd -import traceback as tb -import numpy as np -from pysr import PySRRegressor -from argparse import ArgumentParser - -# Args: -# niterations -# binary_operators -# unary_operators -# col_to_fit - -empty_df = pd.DataFrame( - { - "equation": [], - "loss": [], - "complexity": [], - } -) - -if __name__ == "__main__": - parser = ArgumentParser() - parser.add_argument("--niterations", type=int) - parser.add_argument("--maxsize", type=int) - parser.add_argument("--binary_operators", type=str) - parser.add_argument("--unary_operators", type=str) - parser.add_argument("--col_to_fit", type=str) - parser.add_argument("--filename", type=str) - args = parser.parse_args() - niterations = args.niterations - binary_operators = eval(args.binary_operators) - unary_operators = eval(args.unary_operators) - col_to_fit = args.col_to_fit - filename = args.filename - maxsize = args.maxsize - - - df = pd.read_csv(filename) - y = np.array(df[col_to_fit]) - X = df.drop([col_to_fit], axis=1) - - model = PySRRegressor( - progress=False, - verbosity=0, - maxsize=maxsize, - niterations=niterations, - binary_operators=binary_operators, - unary_operators=unary_operators, - ) - model.fit(X, y) - - df = model.equations_[["equation", "loss", "complexity"]] - # Convert all columns to string type: - df = df.astype(str) - error_message = ( - "Success!\n" - f"You may run the model locally (faster) with " - f"the following parameters:" - +f""" -model = PySRRegressor( - niterations={niterations}, - binary_operators={str(binary_operators)}, - unary_operators={str(unary_operators)}, - maxsize={maxsize}, -) -model.fit(X, y)""") - - df.to_csv("pysr_output.csv", index=False) - with open("error.log", "w") as f: - f.write(error_message) From 8ec0807239ec23b3d9d8c3c12618733790421d08 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 02:52:32 +0000 Subject: [PATCH 051/136] Add custom docker file --- gui/Dockerfile | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 gui/Dockerfile diff --git a/gui/Dockerfile b/gui/Dockerfile new file mode 100644 index 000000000..ed0cb90a7 --- /dev/null +++ b/gui/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.12 + +WORKDIR /code + +COPY ./requirements.txt /code/requirements.txt + +# Install Python dependencies: +RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt + +# Install Julia: +RUN python -c "import pysr" + +# Set up a new user named "user" with user ID 1000 +RUN useradd -m -u 1000 user +USER user +ENV HOME=/home/user +ENV PATH=/home/user/.local/bin:$PATH + +WORKDIR $HOME/app + +COPY --chown=user . $HOME/app + +CMD ["python", "app.py"] From 126dce4cb140f9c95cc8046961da79b1799c3d35 Mon Sep 17 00:00:00 2001 From: Miles Cranmer Date: Thu, 28 Mar 2024 02:55:34 +0000 Subject: [PATCH 052/136] initial commit --- gui/.gitattributes | 27 --------------------------- gui/README.md | 12 ------------ 2 files changed, 39 deletions(-) delete mode 100644 gui/.gitattributes delete mode 100644 gui/README.md diff --git a/gui/.gitattributes b/gui/.gitattributes deleted file mode 100644 index ac481c8eb..000000000 --- a/gui/.gitattributes +++ /dev/null @@ -1,27 +0,0 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zstandard filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/gui/README.md b/gui/README.md deleted file mode 100644 index affe03b50..000000000 --- a/gui/README.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: PySR -emoji: 🌍 -colorFrom: green -colorTo: indigo -sdk: gradio -sdk_version: 3.0.15 -app_file: app.py -pinned: false ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference From 27373bb2961458140f93fc152bd335919d2fab5a Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 03:00:39 +0000 Subject: [PATCH 053/136] Add application file --- gui/Dockerfile | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/gui/Dockerfile b/gui/Dockerfile index ed0cb90a7..be4f806e5 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.12 +FROM python:3.9 WORKDIR /code @@ -10,14 +10,6 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt # Install Julia: RUN python -c "import pysr" -# Set up a new user named "user" with user ID 1000 -RUN useradd -m -u 1000 user -USER user -ENV HOME=/home/user -ENV PATH=/home/user/.local/bin:$PATH - -WORKDIR $HOME/app - -COPY --chown=user . $HOME/app +COPY . . CMD ["python", "app.py"] From 5fed067ae6ff8c185b9bea6309ced7907c88691b Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 03:04:36 +0000 Subject: [PATCH 054/136] Try to fix gradio app --- gui/Dockerfile | 16 ++++++++++++++++ gui/app.py | 1 - gui/requirements.txt | 3 ++- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/gui/Dockerfile b/gui/Dockerfile index be4f806e5..353d15a5f 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -1,5 +1,19 @@ FROM python:3.9 +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + git \ + libgl1-mesa-glx \ + libglib2.0-0 \ + libpython3-dev \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + WORKDIR /code COPY ./requirements.txt /code/requirements.txt @@ -12,4 +26,6 @@ RUN python -c "import pysr" COPY . . +EXPOSE 7860 + CMD ["python", "app.py"] diff --git a/gui/app.py b/gui/app.py index cf3bf7831..72d6b1e60 100644 --- a/gui/app.py +++ b/gui/app.py @@ -75,7 +75,6 @@ def greet( model = pysr.PySRRegressor( progress=False, - verbosity=0, maxsize=maxsize, niterations=niterations, binary_operators=binary_operators, diff --git a/gui/requirements.txt b/gui/requirements.txt index 3259413bc..362c04afd 100644 --- a/gui/requirements.txt +++ b/gui/requirements.txt @@ -1,3 +1,4 @@ pysr==0.18.1 numpy -pandas \ No newline at end of file +pandas +gradio \ No newline at end of file From 24ea2190203d734d3c9c1dcbc3d0bb96c8c255b6 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 03:09:29 +0000 Subject: [PATCH 055/136] Install juliaup --- gui/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gui/Dockerfile b/gui/Dockerfile index 353d15a5f..4753bf093 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -22,6 +22,8 @@ COPY ./requirements.txt /code/requirements.txt RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt # Install Julia: +RUN curl -fsSL https://install.julialang.org | sh -s -- -y --default-channel=1.10.0 +# Install Julia dependencies RUN python -c "import pysr" COPY . . From a0ac11394ed16f26a966ed2adbe546f0125fe114 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 03:10:49 +0000 Subject: [PATCH 056/136] Link julia to local bin --- gui/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/gui/Dockerfile b/gui/Dockerfile index 4753bf093..f1a7e7c5a 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -23,6 +23,7 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt # Install Julia: RUN curl -fsSL https://install.julialang.org | sh -s -- -y --default-channel=1.10.0 +RUN ln -s /root/.julia/bin/julia /usr/local/bin/julia # Install Julia dependencies RUN python -c "import pysr" From 67597d0787d60227fab9481dcd226b1d6aa183c2 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 03:14:02 +0000 Subject: [PATCH 057/136] Fix path issue --- gui/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/Dockerfile b/gui/Dockerfile index f1a7e7c5a..151c592f4 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -23,7 +23,7 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt # Install Julia: RUN curl -fsSL https://install.julialang.org | sh -s -- -y --default-channel=1.10.0 -RUN ln -s /root/.julia/bin/julia /usr/local/bin/julia +ENV PYTHON_JULIACALL_BINDIR="/root/.julia/bin" # Install Julia dependencies RUN python -c "import pysr" From c9ef87bad26218d22a982d28abd6c362d35a5531 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 03:19:56 +0000 Subject: [PATCH 058/136] Copy from julia base container instead --- gui/Dockerfile | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/gui/Dockerfile b/gui/Dockerfile index 151c592f4..23eaffb62 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -1,7 +1,8 @@ +FROM julia:1.10.0 AS jl FROM python:3.9 -ENV PYTHONUNBUFFERED=1 \ - PYTHONDONTWRITEBYTECODE=1 +COPY --from=jl /usr/local/julia /usr/local/julia +ENV PATH="/usr/local/julia/bin:${PATH}" RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -21,9 +22,6 @@ COPY ./requirements.txt /code/requirements.txt # Install Python dependencies: RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt -# Install Julia: -RUN curl -fsSL https://install.julialang.org | sh -s -- -y --default-channel=1.10.0 -ENV PYTHON_JULIACALL_BINDIR="/root/.julia/bin" # Install Julia dependencies RUN python -c "import pysr" From 375d20ad8356eccc4f21fab6019895f31bdd5e3e Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 03:20:09 +0000 Subject: [PATCH 059/136] Bump Python to 3.12 --- gui/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/Dockerfile b/gui/Dockerfile index 23eaffb62..9dcbfcb89 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -1,5 +1,5 @@ FROM julia:1.10.0 AS jl -FROM python:3.9 +FROM python:3.12 COPY --from=jl /usr/local/julia /usr/local/julia ENV PATH="/usr/local/julia/bin:${PATH}" From b31a378afb768162de70cfe75080ad7263f07d71 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 03:24:00 +0000 Subject: [PATCH 060/136] Fix python runner --- gui/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/Dockerfile b/gui/Dockerfile index 9dcbfcb89..844bb7d3b 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -29,4 +29,4 @@ COPY . . EXPOSE 7860 -CMD ["python", "app.py"] +CMD ["python", "-X", "juliacall-home=/usr/local/julia/bin", "app.py"] From 4b720d99565a6da5d12386f5dd977632656f5b7a Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 03:33:21 +0000 Subject: [PATCH 061/136] Again try to fix --- gui/Dockerfile | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/gui/Dockerfile b/gui/Dockerfile index 844bb7d3b..0d7f2319a 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -19,14 +19,24 @@ WORKDIR /code COPY ./requirements.txt /code/requirements.txt -# Install Python dependencies: -RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt +# Set up a new user named "user" with user ID 1000 +RUN useradd -m -u 1000 user +# Switch to the "user" user +USER user +# Set home to the user's home directory +ENV HOME=/home/user +ENV PATH=/home/user/.local/bin:$PATH + +# Install Python dependencies in a virtual environment +RUN python -m venv /home/user/.venv +RUN source /home/user/.venv/bin/activate && pip install --no-cache-dir --upgrade -r /code/requirements.txt # Install Julia dependencies -RUN python -c "import pysr" +RUN source /home/user/.venv/bin/activate && python -c "import pysr" -COPY . . +WORKDIR $HOME/app +COPY --chown=user . $HOME/app EXPOSE 7860 -CMD ["python", "-X", "juliacall-home=/usr/local/julia/bin", "app.py"] +CMD ["/bin/bash", "-c", "source /home/user/.venv/bin/activate && python /home/user/app/app.py"] From d50f3f0c5fd1482b7255267e7d4d2749d43309ad Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 15:39:30 +0000 Subject: [PATCH 062/136] Up dockerfile --- gui/Dockerfile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/gui/Dockerfile b/gui/Dockerfile index 0d7f2319a..25ee1d322 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -28,15 +28,14 @@ ENV HOME=/home/user ENV PATH=/home/user/.local/bin:$PATH # Install Python dependencies in a virtual environment -RUN python -m venv /home/user/.venv -RUN source /home/user/.venv/bin/activate && pip install --no-cache-dir --upgrade -r /code/requirements.txt +RUN /home/user/.venv/bin/python -m pip install --no-cache-dir --upgrade -r /code/requirements.txt # Install Julia dependencies -RUN source /home/user/.venv/bin/activate && python -c "import pysr" +RUN source /home/user/.venv/bin/python -c "import pysr" WORKDIR $HOME/app COPY --chown=user . $HOME/app EXPOSE 7860 -CMD ["/bin/bash", "-c", "source /home/user/.venv/bin/activate && python /home/user/app/app.py"] +CMD ["/bin/bash", "-l", "-c", "/home/user/.venv/bin/python /home/user/app/app.py"] From 5ed4b1151385acde9095d84678786e76d8313dce Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 15:50:43 +0000 Subject: [PATCH 063/136] Blank --- gui/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/gui/Dockerfile b/gui/Dockerfile index 25ee1d322..85ad73b2a 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -27,6 +27,7 @@ USER user ENV HOME=/home/user ENV PATH=/home/user/.local/bin:$PATH + # Install Python dependencies in a virtual environment RUN /home/user/.venv/bin/python -m pip install --no-cache-dir --upgrade -r /code/requirements.txt From f6dc42355a9415145008f1920af18d338ad5a73e Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 15:51:50 +0000 Subject: [PATCH 064/136] Up dockerfile --- gui/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/gui/Dockerfile b/gui/Dockerfile index 85ad73b2a..5a63bf280 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -27,6 +27,7 @@ USER user ENV HOME=/home/user ENV PATH=/home/user/.local/bin:$PATH +RUN python -m venv /home/user/.venv # Install Python dependencies in a virtual environment RUN /home/user/.venv/bin/python -m pip install --no-cache-dir --upgrade -r /code/requirements.txt From 91ce4dddee2251c43a7ac1a536ddc90554a89ef6 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 15:55:21 +0000 Subject: [PATCH 065/136] Fix --- gui/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/Dockerfile b/gui/Dockerfile index 5a63bf280..ad6f70258 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -33,7 +33,7 @@ RUN python -m venv /home/user/.venv RUN /home/user/.venv/bin/python -m pip install --no-cache-dir --upgrade -r /code/requirements.txt # Install Julia dependencies -RUN source /home/user/.venv/bin/python -c "import pysr" +RUN /home/user/.venv/bin/python -c "import pysr" WORKDIR $HOME/app COPY --chown=user . $HOME/app From 801ce9cbfcaddfdd8280228a2be8c1b9bf91fa02 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 15:57:08 +0000 Subject: [PATCH 066/136] Fix length limit --- gui/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gui/app.py b/gui/app.py index 72d6b1e60..2101a73bf 100644 --- a/gui/app.py +++ b/gui/app.py @@ -57,10 +57,10 @@ def greet( f"The column to predict, {col_to_fit}, is not in the file!" f"I found {df.columns}.", ) - if len(df) > 1000 and not force_run: + if len(df) > 10_000 and not force_run: return ( empty_df, - "You have uploaded a file with more than 2000 rows. " + "You have uploaded a file with more than 10,000 rows. " "This will take very long to run. " "Please upload a subsample of the data, " "or check the box 'Ignore Warnings'.", From a9e19e6f2c259a4d1976c5eafa6dbe172d0462c7 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 15:59:15 +0000 Subject: [PATCH 067/136] Install Bumper as well --- gui/Dockerfile | 5 +++-- gui/app.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/gui/Dockerfile b/gui/Dockerfile index ad6f70258..46e5febba 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -32,8 +32,9 @@ RUN python -m venv /home/user/.venv # Install Python dependencies in a virtual environment RUN /home/user/.venv/bin/python -m pip install --no-cache-dir --upgrade -r /code/requirements.txt -# Install Julia dependencies -RUN /home/user/.venv/bin/python -c "import pysr" +# Install and pre-compile Julia dependencies, +# including the Bumper extension +RUN /home/user/.venv/bin/python -c "import pysr; pysr.PySRRegressor(bumper=True, verbosity=0, progress=False, max_evals=1).fit([[1]], [1])" WORKDIR $HOME/app COPY --chown=user . $HOME/app diff --git a/gui/app.py b/gui/app.py index 2101a73bf..08f342fc7 100644 --- a/gui/app.py +++ b/gui/app.py @@ -75,6 +75,7 @@ def greet( model = pysr.PySRRegressor( progress=False, + bumper=True, maxsize=maxsize, niterations=niterations, binary_operators=binary_operators, From 27d64c8ca32617e4bff3e957732650c1a6b8beb9 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 16:25:51 +0000 Subject: [PATCH 068/136] Fix up docker --- gui/Dockerfile | 4 +++- gui/app.py | 26 +++++++++++++------------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/gui/Dockerfile b/gui/Dockerfile index 46e5febba..faea90f6a 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -23,6 +23,7 @@ COPY ./requirements.txt /code/requirements.txt RUN useradd -m -u 1000 user # Switch to the "user" user USER user +WORKDIR /home/user/ # Set home to the user's home directory ENV HOME=/home/user ENV PATH=/home/user/.local/bin:$PATH @@ -34,9 +35,10 @@ RUN /home/user/.venv/bin/python -m pip install --no-cache-dir --upgrade -r /code # Install and pre-compile Julia dependencies, # including the Bumper extension +RUN /home/user/.venv/bin/python -c "import pysr" RUN /home/user/.venv/bin/python -c "import pysr; pysr.PySRRegressor(bumper=True, verbosity=0, progress=False, max_evals=1).fit([[1]], [1])" -WORKDIR $HOME/app +WORKDIR /home/user/app COPY --chown=user . $HOME/app EXPOSE 7860 diff --git a/gui/app.py b/gui/app.py index 08f342fc7..efe718648 100644 --- a/gui/app.py +++ b/gui/app.py @@ -107,28 +107,28 @@ def main(): fn=greet, description="Symbolic Regression with PySR. Watch search progress by clicking 'See logs'!", inputs=[ - gr.inputs.File(label="Upload a CSV File"), - gr.inputs.Textbox(label="Column to Predict", placeholder="y"), - gr.inputs.Slider( + gr.File(label="Upload a CSV File"), + gr.Textbox(label="Column to Predict", placeholder="y"), + gr.Slider( minimum=1, maximum=1000, - default=40, + value=40, label="Number of Iterations", step=1, ), - gr.inputs.Slider( + gr.Slider( minimum=7, maximum=35, - default=20, + value=20, label="Maximum Complexity", step=1, ), - gr.inputs.CheckboxGroup( + gr.CheckboxGroup( choices=["+", "-", "*", "/", "^"], label="Binary Operators", - default=["+", "-", "*", "/"], + value=["+", "-", "*", "/"], ), - gr.inputs.CheckboxGroup( + gr.CheckboxGroup( choices=[ "sin", "cos", @@ -141,16 +141,16 @@ def main(): "tan", ], label="Unary Operators", - default=[], + value=[], ), - gr.inputs.Checkbox( - default=False, + gr.Checkbox( + value=False, label="Ignore Warnings", ), ], outputs=[ "dataframe", - gr.outputs.Textbox(label="Error Log"), + gr.Textbox(label="Error Log"), ], ) # Add file to the demo: From 4de6d3f7075ec9601afd14ce6c8a5585ffd8d8ff Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 16:31:07 +0000 Subject: [PATCH 069/136] Set correct env variables for binding to HF --- gui/Dockerfile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gui/Dockerfile b/gui/Dockerfile index faea90f6a..8c25e6932 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -42,5 +42,10 @@ WORKDIR /home/user/app COPY --chown=user . $HOME/app EXPOSE 7860 +ENV GRADIO_ALLOW_FLAGGING=never \ + GRADIO_NUM_PORTS=1 \ + GRADIO_SERVER_NAME=0.0.0.0 \ + GRADIO_THEME=huggingface \ + SYSTEM=spaces CMD ["/bin/bash", "-l", "-c", "/home/user/.venv/bin/python /home/user/app/app.py"] From ce963bc2212248c6c44e3936a0c0b69d7b460d2e Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 16:37:21 +0000 Subject: [PATCH 070/136] Try to show progress in logs --- gui/app.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gui/app.py b/gui/app.py index efe718648..e3086d412 100644 --- a/gui/app.py +++ b/gui/app.py @@ -74,7 +74,6 @@ def greet( X = df.drop([col_to_fit], axis=1) model = pysr.PySRRegressor( - progress=False, bumper=True, maxsize=maxsize, niterations=niterations, @@ -105,7 +104,7 @@ def greet( def main(): demo = gr.Interface( fn=greet, - description="Symbolic Regression with PySR. Watch search progress by clicking 'See logs'!", + description="Symbolic Regression with PySR. Watch search progress by following the logs.", inputs=[ gr.File(label="Upload a CSV File"), gr.Textbox(label="Column to Predict", placeholder="y"), From d39a013c267c7115bd4a665a036cafe65e23a806 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 16:43:47 +0000 Subject: [PATCH 071/136] Add timeout --- gui/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gui/app.py b/gui/app.py index e3086d412..dbe131017 100644 --- a/gui/app.py +++ b/gui/app.py @@ -40,7 +40,7 @@ def greet( "Please upload a CSV file!", ) # Look at some statistics of the file: - df = pd.read_csv(file_obj.name) + df = pd.read_csv(file_obj) if len(df) == 0: return ( empty_df, @@ -69,7 +69,6 @@ def greet( binary_operators = str(binary_operators).replace("'", '"') unary_operators = str(unary_operators).replace("'", '"') - df = pd.read_csv(file_obj) y = np.array(df[col_to_fit]) X = df.drop([col_to_fit], axis=1) @@ -79,6 +78,7 @@ def greet( niterations=niterations, binary_operators=binary_operators, unary_operators=unary_operators, + timeout_in_seconds=1000, ) model.fit(X, y) From 08f8ef766e1390c30fdb86992ab0fb2c409fade0 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 16:45:06 +0000 Subject: [PATCH 072/136] Fix operator selection --- gui/app.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/gui/app.py b/gui/app.py index dbe131017..3722fa34f 100644 --- a/gui/app.py +++ b/gui/app.py @@ -66,9 +66,6 @@ def greet( "or check the box 'Ignore Warnings'.", ) - binary_operators = str(binary_operators).replace("'", '"') - unary_operators = str(unary_operators).replace("'", '"') - y = np.array(df[col_to_fit]) X = df.drop([col_to_fit], axis=1) From 88a78a4d997959cc2c43b90730d4ab0ccef94009 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 16:48:41 +0000 Subject: [PATCH 073/136] Black formatting --- gui/app.py | 12 +++++++----- gui/gen_example_data.py | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/gui/app.py b/gui/app.py index 3722fa34f..752789ec9 100644 --- a/gui/app.py +++ b/gui/app.py @@ -83,21 +83,23 @@ def greet( # Convert all columns to string type: df = df.astype(str) msg = ( - "Success!\n" - f"You may run the model locally (faster) with " - f"the following parameters:" - +f""" + "Success!\n" + f"You may run the model locally (faster) with " + f"the following parameters:" + + f""" model = PySRRegressor( niterations={niterations}, binary_operators={str(binary_operators)}, unary_operators={str(unary_operators)}, maxsize={maxsize}, ) -model.fit(X, y)""") +model.fit(X, y)""" + ) df.to_csv("pysr_output.csv", index=False) return df, msg + def main(): demo = gr.Interface( fn=greet, diff --git a/gui/gen_example_data.py b/gui/gen_example_data.py index 4eef2be26..3b992c875 100644 --- a/gui/gen_example_data.py +++ b/gui/gen_example_data.py @@ -14,4 +14,4 @@ R = 8.3144598 # J/mol/K X["y"] = X["n"] * R * X["T"] / X["P"] -X.to_csv("data.csv", index=False) \ No newline at end of file +X.to_csv("data.csv", index=False) From 73042d9f307570810039abb5988c85eafd964746 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 16:59:32 +0000 Subject: [PATCH 074/136] Add test data generator to app --- gui/app.py | 114 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 69 insertions(+), 45 deletions(-) diff --git a/gui/app.py b/gui/app.py index 752789ec9..c4ef68f05 100644 --- a/gui/app.py +++ b/gui/app.py @@ -14,60 +14,76 @@ } ) +test_equations = { + "Complex Polynomial": "3*x^3 + 2*x^2 - x + sin(x)", + "Exponential and Logarithmic": "exp(-x) + log(x+1)", + "Trigonometric Polynomial": "sin(x) + cos(2*x) + tan(x/3)", + "Mixed Functions": "sqrt(x)*exp(-x) + cos(pi*x)", + "Rational Function": "(x^2 + 1) / (x - 2)", +} + + +def generate_data(equation: str, num_points: int, noise_level: float): + x = np.linspace(-10, 10, num_points) + s = test_equations[equation] + for (k, v) in { + "sin": "np.sin", + "cos": "np.cos", + "exp": "np.exp", + "log": "np.log", + "tan": "np.tan", + "^": "**", + }.items(): + s = s.replace(k, v) + y = eval(s) + noise = np.random.normal(0, noise_level, y.shape) + y_noisy = y + noise + return pd.DataFrame({"x": x}), y_noisy + def greet( file_obj: Optional[tempfile._TemporaryFileWrapper], - col_to_fit: str, + test_equation: str, + num_points: int, + noise_level: float, niterations: int, maxsize: int, binary_operators: list, unary_operators: list, force_run: bool, ): - if col_to_fit == "": - return ( - empty_df, - "Please enter a column to predict!", - ) - if len(binary_operators) == 0 and len(unary_operators) == 0: - return ( - empty_df, - "Please select at least one operator!", - ) - if file_obj is None: - return ( - empty_df, - "Please upload a CSV file!", - ) - # Look at some statistics of the file: - df = pd.read_csv(file_obj) - if len(df) == 0: - return ( - empty_df, - "The file is empty!", - ) - if len(df.columns) == 1: - return ( - empty_df, - "The file has only one column!", - ) - if col_to_fit not in df.columns: - return ( - empty_df, - f"The column to predict, {col_to_fit}, is not in the file!" - f"I found {df.columns}.", - ) - if len(df) > 10_000 and not force_run: - return ( - empty_df, - "You have uploaded a file with more than 10,000 rows. " - "This will take very long to run. " - "Please upload a subsample of the data, " - "or check the box 'Ignore Warnings'.", - ) + if file_obj is not None: + if len(binary_operators) == 0 and len(unary_operators) == 0: + return ( + empty_df, + "Please select at least one operator!", + ) + # Look at some statistics of the file: + df = pd.read_csv(file_obj) + if len(df) == 0: + return ( + empty_df, + "The file is empty!", + ) + if len(df.columns) == 1: + return ( + empty_df, + "The file has only one column!", + ) + if len(df) > 10_000 and not force_run: + return ( + empty_df, + "You have uploaded a file with more than 10,000 rows. " + "This will take very long to run. " + "Please upload a subsample of the data, " + "or check the box 'Ignore Warnings'.", + ) - y = np.array(df[col_to_fit]) - X = df.drop([col_to_fit], axis=1) + col_to_fit = df.columns[-1] + y = np.array(df[col_to_fit]) + X = df.drop([col_to_fit], axis=1) + else: + X, y = generate_data(test_equation, num_points, noise_level) model = pysr.PySRRegressor( bumper=True, @@ -106,7 +122,15 @@ def main(): description="Symbolic Regression with PySR. Watch search progress by following the logs.", inputs=[ gr.File(label="Upload a CSV File"), - gr.Textbox(label="Column to Predict", placeholder="y"), + gr.Radio(list(test_equations.keys()), label="Test Equation"), + gr.Slider( + minimum=10, + maximum=1000, + value=100, + label="Number of Data Points", + step=1, + ), + gr.Slider(minimum=0, maximum=1, value=0.1, label="Noise Level"), gr.Slider( minimum=1, maximum=1000, From 46fdaa6c0bd6a38765838910a51b3a3beaa01d2b Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 17:37:10 +0000 Subject: [PATCH 075/136] Automatically plot test data --- gui/app.py | 181 ++++++++++++++++++++++++++----------------- gui/requirements.txt | 2 +- 2 files changed, 112 insertions(+), 71 deletions(-) diff --git a/gui/app.py b/gui/app.py index c4ef68f05..63e92c43f 100644 --- a/gui/app.py +++ b/gui/app.py @@ -1,6 +1,5 @@ import gradio as gr import numpy as np -import os import pandas as pd import pysr import tempfile @@ -14,18 +13,13 @@ } ) -test_equations = { - "Complex Polynomial": "3*x^3 + 2*x^2 - x + sin(x)", - "Exponential and Logarithmic": "exp(-x) + log(x+1)", - "Trigonometric Polynomial": "sin(x) + cos(2*x) + tan(x/3)", - "Mixed Functions": "sqrt(x)*exp(-x) + cos(pi*x)", - "Rational Function": "(x^2 + 1) / (x - 2)", -} +test_equations = [ + "sin(x) + cos(2*x) + tan(x/3)", +] -def generate_data(equation: str, num_points: int, noise_level: float): - x = np.linspace(-10, 10, num_points) - s = test_equations[equation] +def generate_data(s: str, num_points: int, noise_level: float): + x = np.linspace(0, 10, num_points) for (k, v) in { "sin": "np.sin", "cos": "np.cos", @@ -117,68 +111,115 @@ def greet( def main(): - demo = gr.Interface( - fn=greet, - description="Symbolic Regression with PySR. Watch search progress by following the logs.", - inputs=[ - gr.File(label="Upload a CSV File"), - gr.Radio(list(test_equations.keys()), label="Test Equation"), - gr.Slider( - minimum=10, - maximum=1000, - value=100, - label="Number of Data Points", - step=1, - ), - gr.Slider(minimum=0, maximum=1, value=0.1, label="Noise Level"), - gr.Slider( - minimum=1, - maximum=1000, - value=40, - label="Number of Iterations", - step=1, - ), - gr.Slider( - minimum=7, - maximum=35, - value=20, - label="Maximum Complexity", - step=1, - ), - gr.CheckboxGroup( - choices=["+", "-", "*", "/", "^"], - label="Binary Operators", - value=["+", "-", "*", "/"], - ), - gr.CheckboxGroup( - choices=[ - "sin", - "cos", - "exp", - "log", - "square", - "cube", - "sqrt", - "abs", - "tan", - ], - label="Unary Operators", - value=[], - ), - gr.Checkbox( - value=False, - label="Ignore Warnings", - ), - ], - outputs=[ - "dataframe", - gr.Textbox(label="Error Log"), - ], - ) - # Add file to the demo: + with gr.Blocks() as demo: + with gr.Row(): + with gr.Column(): + with gr.Row(): + with gr.Tab("Example Data"): + # Plot of the example data: + example_plot = gr.ScatterPlot( + x="x", + y="y", + tooltip=["x", "y"], + x_lim=[0, 10], + y_lim=[-5, 5], + width=350, + height=300, + ) + test_equation = gr.Radio( + test_equations, + value=test_equations[0], + label="Test Equation" + ) + num_points = gr.Slider( + minimum=10, + maximum=1000, + value=100, + label="Number of Data Points", + step=1, + ) + noise_level = gr.Slider( + minimum=0, maximum=1, value=0.1, label="Noise Level" + ) + with gr.Tab("Upload Data"): + file_input = gr.File(label="Upload a CSV File") + with gr.Row(): + binary_operators = gr.CheckboxGroup( + choices=["+", "-", "*", "/", "^"], + label="Binary Operators", + value=["+", "-", "*", "/"], + ) + unary_operators = gr.CheckboxGroup( + choices=[ + "sin", + "cos", + "exp", + "log", + "square", + "cube", + "sqrt", + "abs", + "tan", + ], + label="Unary Operators", + value=[], + ) + niterations = gr.Slider( + minimum=1, + maximum=1000, + value=40, + label="Number of Iterations", + step=1, + ) + maxsize = gr.Slider( + minimum=7, + maximum=35, + value=20, + label="Maximum Complexity", + step=1, + ) + force_run = gr.Checkbox( + value=False, + label="Ignore Warnings", + ) + + with gr.Column(): + with gr.Row(): + df = gr.Dataframe( + headers=["Equation", "Loss", "Complexity"], + datatype=["str", "number", "number"], + ) + error_log = gr.Textbox(label="Error Log") + with gr.Row(): + run_button = gr.Button() + + run_button.click( + greet, + inputs=[ + file_input, + test_equation, + num_points, + noise_level, + niterations, + maxsize, + binary_operators, + unary_operators, + force_run, + ], + outputs=[df, error_log], + ) + + # Any update to the equation choice will trigger a replot: + for eqn_component in [test_equation, num_points, noise_level]: + eqn_component.change(replot, [test_equation, num_points, noise_level], example_plot) demo.launch() +def replot(test_equation, num_points, noise_level): + X, y = generate_data(test_equation, num_points, noise_level) + df = pd.DataFrame({"x": X["x"], "y": y}) + return df + if __name__ == "__main__": main() diff --git a/gui/requirements.txt b/gui/requirements.txt index 362c04afd..7d0b032c0 100644 --- a/gui/requirements.txt +++ b/gui/requirements.txt @@ -1,4 +1,4 @@ pysr==0.18.1 numpy pandas -gradio \ No newline at end of file +gradio From e1cf25c66d0cb9a0f85a81a3681210387c71fb4c Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 17:38:06 +0000 Subject: [PATCH 076/136] Instructions in upload tab --- gui/app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gui/app.py b/gui/app.py index 63e92c43f..7c884ce60 100644 --- a/gui/app.py +++ b/gui/app.py @@ -143,6 +143,7 @@ def main(): ) with gr.Tab("Upload Data"): file_input = gr.File(label="Upload a CSV File") + gr.Markdown("Upload a CSV file with the data to fit. The last column will be used as the target variable.") with gr.Row(): binary_operators = gr.CheckboxGroup( choices=["+", "-", "*", "/", "^"], From dd651362d0b209a5acb20aa8b42f39b3e824028c Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 17:43:55 +0000 Subject: [PATCH 077/136] Refactor app --- gui/app.py | 197 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 114 insertions(+), 83 deletions(-) diff --git a/gui/app.py b/gui/app.py index 7c884ce60..41b3f946a 100644 --- a/gui/app.py +++ b/gui/app.py @@ -110,112 +110,143 @@ def greet( return df, msg +def _data_layout(): + with gr.Tab("Example Data"): + # Plot of the example data: + example_plot = gr.ScatterPlot( + x="x", + y="y", + tooltip=["x", "y"], + x_lim=[0, 10], + y_lim=[-5, 5], + width=350, + height=300, + ) + test_equation = gr.Radio( + test_equations, value=test_equations[0], label="Test Equation" + ) + num_points = gr.Slider( + minimum=10, + maximum=1000, + value=100, + label="Number of Data Points", + step=1, + ) + noise_level = gr.Slider(minimum=0, maximum=1, value=0.1, label="Noise Level") + with gr.Tab("Upload Data"): + file_input = gr.File(label="Upload a CSV File") + gr.Markdown( + "Upload a CSV file with the data to fit. The last column will be used as the target variable." + ) + + return dict( + file_input=file_input, + test_equation=test_equation, + num_points=num_points, + noise_level=noise_level, + example_plot=example_plot, + ) + + +def _settings_layout(): + binary_operators = gr.CheckboxGroup( + choices=["+", "-", "*", "/", "^"], + label="Binary Operators", + value=["+", "-", "*", "/"], + ) + unary_operators = gr.CheckboxGroup( + choices=[ + "sin", + "cos", + "exp", + "log", + "square", + "cube", + "sqrt", + "abs", + "tan", + ], + label="Unary Operators", + value=[], + ) + niterations = gr.Slider( + minimum=1, + maximum=1000, + value=40, + label="Number of Iterations", + step=1, + ) + maxsize = gr.Slider( + minimum=7, + maximum=35, + value=20, + label="Maximum Complexity", + step=1, + ) + force_run = gr.Checkbox( + value=False, + label="Ignore Warnings", + ) + return dict( + binary_operators=binary_operators, + unary_operators=unary_operators, + niterations=niterations, + maxsize=maxsize, + force_run=force_run, + ) + + def main(): + blocks = {} with gr.Blocks() as demo: with gr.Row(): with gr.Column(): with gr.Row(): - with gr.Tab("Example Data"): - # Plot of the example data: - example_plot = gr.ScatterPlot( - x="x", - y="y", - tooltip=["x", "y"], - x_lim=[0, 10], - y_lim=[-5, 5], - width=350, - height=300, - ) - test_equation = gr.Radio( - test_equations, - value=test_equations[0], - label="Test Equation" - ) - num_points = gr.Slider( - minimum=10, - maximum=1000, - value=100, - label="Number of Data Points", - step=1, - ) - noise_level = gr.Slider( - minimum=0, maximum=1, value=0.1, label="Noise Level" - ) - with gr.Tab("Upload Data"): - file_input = gr.File(label="Upload a CSV File") - gr.Markdown("Upload a CSV file with the data to fit. The last column will be used as the target variable.") + blocks = {**blocks, **_data_layout()} with gr.Row(): - binary_operators = gr.CheckboxGroup( - choices=["+", "-", "*", "/", "^"], - label="Binary Operators", - value=["+", "-", "*", "/"], - ) - unary_operators = gr.CheckboxGroup( - choices=[ - "sin", - "cos", - "exp", - "log", - "square", - "cube", - "sqrt", - "abs", - "tan", - ], - label="Unary Operators", - value=[], - ) - niterations = gr.Slider( - minimum=1, - maximum=1000, - value=40, - label="Number of Iterations", - step=1, - ) - maxsize = gr.Slider( - minimum=7, - maximum=35, - value=20, - label="Maximum Complexity", - step=1, - ) - force_run = gr.Checkbox( - value=False, - label="Ignore Warnings", - ) + blocks = {**blocks, **_settings_layout()} with gr.Column(): with gr.Row(): - df = gr.Dataframe( + blocks["df"] = gr.Dataframe( headers=["Equation", "Loss", "Complexity"], datatype=["str", "number", "number"], ) - error_log = gr.Textbox(label="Error Log") + blocks["error_log"] = gr.Textbox(label="Error Log") with gr.Row(): - run_button = gr.Button() + blocks["run"] = gr.Button() - run_button.click( + blocks["run"].click( greet, inputs=[ - file_input, - test_equation, - num_points, - noise_level, - niterations, - maxsize, - binary_operators, - unary_operators, - force_run, + blocks[k] + for k in [ + "file_input", + "test_equation", + "num_points", + "noise_level", + "niterations", + "maxsize", + "binary_operators", + "unary_operators", + "force_run", + ] ], - outputs=[df, error_log], + outputs=[blocks["df"], blocks["error_log"]], ) # Any update to the equation choice will trigger a replot: - for eqn_component in [test_equation, num_points, noise_level]: - eqn_component.change(replot, [test_equation, num_points, noise_level], example_plot) + eqn_components = [ + blocks["test_equation"], + blocks["num_points"], + blocks["noise_level"], + ] + for eqn_component in eqn_components: + eqn_component.change(replot, eqn_components, blocks["example_plot"]) demo.launch() + def replot(test_equation, num_points, noise_level): X, y = generate_data(test_equation, num_points, noise_level) df = pd.DataFrame({"x": X["x"], "y": y}) From c353ada35486e20aeb052a5cc683dd78d8a3ab1b Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 17:45:08 +0000 Subject: [PATCH 078/136] Clean up output col --- gui/app.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/gui/app.py b/gui/app.py index 41b3f946a..8b9e4d47a 100644 --- a/gui/app.py +++ b/gui/app.py @@ -207,14 +207,12 @@ def main(): blocks = {**blocks, **_settings_layout()} with gr.Column(): - with gr.Row(): - blocks["df"] = gr.Dataframe( - headers=["Equation", "Loss", "Complexity"], - datatype=["str", "number", "number"], - ) - blocks["error_log"] = gr.Textbox(label="Error Log") - with gr.Row(): - blocks["run"] = gr.Button() + blocks["df"] = gr.Dataframe( + headers=["Equation", "Loss", "Complexity"], + datatype=["str", "number", "number"], + ) + blocks["run"] = gr.Button() + blocks["error_log"] = gr.Textbox(label="Error Log") blocks["run"].click( greet, From 5a5a76f9776ba8bcc85175899474093ff0e664d4 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 17:54:59 +0000 Subject: [PATCH 079/136] Turn off multithreading --- gui/Dockerfile | 1 + gui/app.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/gui/Dockerfile b/gui/Dockerfile index 8c25e6932..ff88cc768 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -48,4 +48,5 @@ ENV GRADIO_ALLOW_FLAGGING=never \ GRADIO_THEME=huggingface \ SYSTEM=spaces +ENV JULIA_NUM_THREADS=1 CMD ["/bin/bash", "-l", "-c", "/home/user/.venv/bin/python /home/user/app/app.py"] diff --git a/gui/app.py b/gui/app.py index 8b9e4d47a..3b4c31124 100644 --- a/gui/app.py +++ b/gui/app.py @@ -44,6 +44,7 @@ def greet( maxsize: int, binary_operators: list, unary_operators: list, + seed: int, force_run: bool, ): if file_obj is not None: @@ -86,6 +87,10 @@ def greet( binary_operators=binary_operators, unary_operators=unary_operators, timeout_in_seconds=1000, + multithreading=False, + procs=0, + deterministic=True, + random_state=seed, ) model.fit(X, y) @@ -183,6 +188,10 @@ def _settings_layout(): label="Maximum Complexity", step=1, ) + seed = gr.Number( + value=0, + label="Random Seed", + ) force_run = gr.Checkbox( value=False, label="Ignore Warnings", @@ -193,6 +202,7 @@ def _settings_layout(): niterations=niterations, maxsize=maxsize, force_run=force_run, + seed=seed, ) @@ -227,6 +237,7 @@ def main(): "maxsize", "binary_operators", "unary_operators", + "seed", "force_run", ] ], @@ -242,7 +253,7 @@ def main(): for eqn_component in eqn_components: eqn_component.change(replot, eqn_components, blocks["example_plot"]) - demo.launch() + demo.launch(debug=True) def replot(test_equation, num_points, noise_level): From bb76c1f4ec4f2ae8982bf253d962a67a6054ac5c Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 19:33:25 +0000 Subject: [PATCH 080/136] Only run PySR in another process --- gui/app.py | 101 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 58 insertions(+), 43 deletions(-) diff --git a/gui/app.py b/gui/app.py index 3b4c31124..126ac640b 100644 --- a/gui/app.py +++ b/gui/app.py @@ -1,7 +1,7 @@ import gradio as gr import numpy as np import pandas as pd -import pysr +import multiprocessing as mp import tempfile from typing import Optional @@ -35,26 +35,22 @@ def generate_data(s: str, num_points: int, noise_level: float): return pd.DataFrame({"x": x}), y_noisy -def greet( - file_obj: Optional[tempfile._TemporaryFileWrapper], - test_equation: str, - num_points: int, - noise_level: float, - niterations: int, - maxsize: int, - binary_operators: list, - unary_operators: list, - seed: int, - force_run: bool, +def _greet_dispatch( + file_input, + force_run, + test_equation, + num_points, + noise_level, + niterations, + maxsize, + binary_operators, + unary_operators, + seed, ): - if file_obj is not None: - if len(binary_operators) == 0 and len(unary_operators) == 0: - return ( - empty_df, - "Please select at least one operator!", - ) + """Load data, then spawn a process to run the greet function.""" + if file_input is not None: # Look at some statistics of the file: - df = pd.read_csv(file_obj) + df = pd.read_csv(file_input) if len(df) == 0: return ( empty_df, @@ -78,10 +74,44 @@ def greet( y = np.array(df[col_to_fit]) X = df.drop([col_to_fit], axis=1) else: + # X, y = generate_data(block["test_equation"], block["num_points"], block["noise_level"]) X, y = generate_data(test_equation, num_points, noise_level) + queue = mp.Queue() + process = mp.Process( + target=greet, + kwargs=dict( + X=X, + y=y, + queue=queue, + niterations=niterations, + maxsize=maxsize, + binary_operators=binary_operators, + unary_operators=unary_operators, + seed=seed, + ), + ) + process.start() + output = queue.get() + process.join() + return output + + +def greet( + *, + queue: mp.Queue, + X, + y, + niterations: int, + maxsize: int, + binary_operators: list, + unary_operators: list, + seed: int, +): + import pysr + model = pysr.PySRRegressor( - bumper=True, + progress=False, maxsize=maxsize, niterations=niterations, binary_operators=binary_operators, @@ -94,25 +124,11 @@ def greet( ) model.fit(X, y) - df = model.equations_[["equation", "loss", "complexity"]] + df = model.equations_[["complexity", "loss", "equation"]] # Convert all columns to string type: - df = df.astype(str) - msg = ( - "Success!\n" - f"You may run the model locally (faster) with " - f"the following parameters:" - + f""" -model = PySRRegressor( - niterations={niterations}, - binary_operators={str(binary_operators)}, - unary_operators={str(unary_operators)}, - maxsize={maxsize}, -) -model.fit(X, y)""" - ) + queue.put(df) - df.to_csv("pysr_output.csv", index=False) - return df, msg + return 0 def _data_layout(): @@ -218,18 +234,18 @@ def main(): with gr.Column(): blocks["df"] = gr.Dataframe( - headers=["Equation", "Loss", "Complexity"], - datatype=["str", "number", "number"], + headers=["complexity", "loss", "equation"], + datatype=["number", "number", "str"], ) blocks["run"] = gr.Button() - blocks["error_log"] = gr.Textbox(label="Error Log") blocks["run"].click( - greet, + _greet_dispatch, inputs=[ blocks[k] for k in [ "file_input", + "force_run", "test_equation", "num_points", "noise_level", @@ -238,10 +254,9 @@ def main(): "binary_operators", "unary_operators", "seed", - "force_run", ] ], - outputs=[blocks["df"], blocks["error_log"]], + outputs=[blocks["df"]], ) # Any update to the equation choice will trigger a replot: From fea9443ce60cf447e45c42b09a573f9d41ade0ea Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 21:16:49 +0000 Subject: [PATCH 081/136] Plotting of pareto front --- gui/app.py | 126 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 94 insertions(+), 32 deletions(-) diff --git a/gui/app.py b/gui/app.py index 126ac640b..380d540b3 100644 --- a/gui/app.py +++ b/gui/app.py @@ -1,9 +1,14 @@ import gradio as gr import numpy as np +import os import pandas as pd +import time import multiprocessing as mp +from matplotlib import pyplot as plt +plt.ioff() import tempfile -from typing import Optional +from typing import Optional, Union +from pathlib import Path empty_df = pd.DataFrame( { @@ -18,7 +23,7 @@ ] -def generate_data(s: str, num_points: int, noise_level: float): +def generate_data(s: str, num_points: int, noise_level: float, data_seed: int): x = np.linspace(0, 10, num_points) for (k, v) in { "sin": "np.sin", @@ -30,7 +35,8 @@ def generate_data(s: str, num_points: int, noise_level: float): }.items(): s = s.replace(k, v) y = eval(s) - noise = np.random.normal(0, noise_level, y.shape) + rstate = np.random.RandomState(data_seed) + noise = rstate.normal(0, noise_level, y.shape) y_noisy = y + noise return pd.DataFrame({"x": x}), y_noisy @@ -41,6 +47,7 @@ def _greet_dispatch( test_equation, num_points, noise_level, + data_seed, niterations, maxsize, binary_operators, @@ -74,32 +81,56 @@ def _greet_dispatch( y = np.array(df[col_to_fit]) X = df.drop([col_to_fit], axis=1) else: - # X, y = generate_data(block["test_equation"], block["num_points"], block["noise_level"]) - X, y = generate_data(test_equation, num_points, noise_level) - - queue = mp.Queue() - process = mp.Process( - target=greet, - kwargs=dict( - X=X, - y=y, - queue=queue, - niterations=niterations, - maxsize=maxsize, - binary_operators=binary_operators, - unary_operators=unary_operators, - seed=seed, - ), - ) - process.start() - output = queue.get() - process.join() - return output + X, y = generate_data(test_equation, num_points, noise_level, data_seed) + + with tempfile.TemporaryDirectory() as tmpdirname: + base = Path(tmpdirname) + equation_file = base / "hall_of_fame.csv" + equation_file_bkup = base / "hall_of_fame.csv.bkup" + process = mp.Process( + target=greet, + kwargs=dict( + X=X, + y=y, + niterations=niterations, + maxsize=maxsize, + binary_operators=binary_operators, + unary_operators=unary_operators, + seed=seed, + equation_file=equation_file, + ), + ) + process.start() + while process.is_alive(): + if equation_file_bkup.exists(): + try: + # First, copy the file to a the copy file + equation_file_copy = base / "hall_of_fame_copy.csv" + os.system(f"cp {equation_file_bkup} {equation_file_copy}") + df = pd.read_csv(equation_file_copy) + # Ensure it is pareto dominated, with more complex expressions + # having higher loss. Otherwise remove those rows. + # TODO: Not sure why this occurs; could be the result of a late copy? + df.sort_values("Complexity", ascending=True, inplace=True) + df.reset_index(inplace=True) + bad_idx = [] + min_loss = None + for i in df.index: + if min_loss is None or df.loc[i, "Loss"] < min_loss: + min_loss = float(df.loc[i, "Loss"]) + else: + bad_idx.append(i) + df.drop(index=bad_idx, inplace=True) + yield df[["Complexity", "Loss", "Equation"]] + except pd.errors.EmptyDataError: + pass + time.sleep(1) + + process.join() def greet( *, - queue: mp.Queue, X, y, niterations: int, @@ -107,6 +138,7 @@ def greet( binary_operators: list, unary_operators: list, seed: int, + equation_file: Union[str, Path], ): import pysr @@ -121,13 +153,10 @@ def greet( procs=0, deterministic=True, random_state=seed, + equation_file=equation_file, ) model.fit(X, y) - df = model.equations_[["complexity", "loss", "equation"]] - # Convert all columns to string type: - queue.put(df) - return 0 @@ -154,6 +183,7 @@ def _data_layout(): step=1, ) noise_level = gr.Slider(minimum=0, maximum=1, value=0.1, label="Noise Level") + data_seed = gr.Number(value=0, label="Random Seed") with gr.Tab("Upload Data"): file_input = gr.File(label="Upload a CSV File") gr.Markdown( @@ -165,6 +195,7 @@ def _data_layout(): test_equation=test_equation, num_points=num_points, noise_level=noise_level, + data_seed=data_seed, example_plot=example_plot, ) @@ -233,6 +264,7 @@ def main(): blocks = {**blocks, **_settings_layout()} with gr.Column(): + blocks["pareto"] = gr.Plot() blocks["df"] = gr.Dataframe( headers=["complexity", "loss", "equation"], datatype=["number", "number", "str"], @@ -249,6 +281,7 @@ def main(): "test_equation", "num_points", "noise_level", + "data_seed", "niterations", "maxsize", "binary_operators", @@ -256,7 +289,7 @@ def main(): "seed", ] ], - outputs=[blocks["df"]], + outputs=blocks["df"], ) # Any update to the equation choice will trigger a replot: @@ -264,18 +297,47 @@ def main(): blocks["test_equation"], blocks["num_points"], blocks["noise_level"], + blocks["data_seed"], ] for eqn_component in eqn_components: eqn_component.change(replot, eqn_components, blocks["example_plot"]) + # Update plot when dataframe is updated: + blocks["df"].change( + replot_pareto, + inputs=[blocks["df"], blocks["maxsize"]], + outputs=[blocks["pareto"]], + ) + demo.launch(debug=True) -def replot(test_equation, num_points, noise_level): - X, y = generate_data(test_equation, num_points, noise_level) +def replot(test_equation, num_points, noise_level, data_seed): + X, y = generate_data(test_equation, num_points, noise_level, data_seed) df = pd.DataFrame({"x": X["x"], "y": y}) return df +def replot_pareto(df, maxsize): + # Matplotlib log-log plot of loss vs complexity: + fig, ax = plt.subplots(figsize=(5, 5)) + + ax.set_xlabel('Complexity', fontsize=14) + ax.set_ylabel('Loss', fontsize=14) + if len(df) == 0 or 'Equation' not in df.columns: + return fig + + ax.loglog(df['Complexity'], df['Loss'], marker='o', linestyle='-', color='b') + ax.set_xlim(1, maxsize + 1) + # Set ylim to next power of 2: + ytop = 2 ** (np.ceil(np.log2(df['Loss'].max()))) + ybottom = 2 ** (np.floor(np.log2(df['Loss'].min() + 1e-20))) + ax.set_ylim(ybottom, ytop) + ax.grid(True, which="both", ls="--", linewidth=0.5) + fig.tight_layout() + ax.tick_params(axis='both', which='major', labelsize=12) + ax.tick_params(axis='both', which='minor', labelsize=10) + + return fig if __name__ == "__main__": main() From 4eac4912beb0b101ec51070f8687b0d9a267e332 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 21:36:01 +0000 Subject: [PATCH 082/136] Improve formatting of plot --- gui/Dockerfile | 7 +++++++ gui/app.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/gui/Dockerfile b/gui/Dockerfile index ff88cc768..44676d14a 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -11,10 +11,17 @@ RUN apt-get update && \ libgl1-mesa-glx \ libglib2.0-0 \ libpython3-dev \ + libfreetype6-dev \ + pkg-config \ + libfontconfig1 \ + fontconfig \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* +COPY fonts/*.ttf /usr/local/share/fonts/ +RUN fc-cache -f -v + WORKDIR /code COPY ./requirements.txt /code/requirements.txt diff --git a/gui/app.py b/gui/app.py index 380d540b3..37a9af39a 100644 --- a/gui/app.py +++ b/gui/app.py @@ -268,6 +268,8 @@ def main(): blocks["df"] = gr.Dataframe( headers=["complexity", "loss", "equation"], datatype=["number", "number", "str"], + wrap=True, + column_widths=[100, 100, 300], ) blocks["run"] = gr.Button() @@ -339,5 +341,39 @@ def replot_pareto(df, maxsize): return fig +def replot_pareto(df, maxsize): + plt.rcParams['font.family'] = 'IBM Plex Mono' + fig, ax = plt.subplots(figsize=(6, 6), dpi=100) + + if len(df) == 0 or 'Equation' not in df.columns: + return fig + + # Plotting the data + ax.loglog(df['Complexity'], df['Loss'], marker='o', linestyle='-', color='#333f48', linewidth=1.5, markersize=6) + + # Set the axis limits + ax.set_xlim(0.5, maxsize + 1) + ytop = 2 ** (np.ceil(np.log2(df['Loss'].max()))) + ybottom = 2 ** (np.floor(np.log2(df['Loss'].min() + 1e-20))) + ax.set_ylim(ybottom, ytop) + + ax.grid(True, which="both", ls="--", linewidth=0.5, color='gray', alpha=0.5) + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + + # Range-frame the plot + for direction in ['bottom', 'left']: + ax.spines[direction].set_position(('outward', 10)) + + # Delete far ticks + ax.tick_params(axis='both', which='major', labelsize=10, direction='out', length=5) + ax.tick_params(axis='both', which='minor', labelsize=8, direction='out', length=3) + + ax.set_xlabel('Complexity') + ax.set_ylabel('Loss') + fig.tight_layout(pad=2) + + return fig + if __name__ == "__main__": main() From 758e952d2de1cb7ff20091d17c59b42b55fec354 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 21:59:40 +0000 Subject: [PATCH 083/136] Make nicer plot for example data --- gui/app.py | 103 ++++++++++++++++++++++++++--------------------------- 1 file changed, 50 insertions(+), 53 deletions(-) diff --git a/gui/app.py b/gui/app.py index 37a9af39a..92334e244 100644 --- a/gui/app.py +++ b/gui/app.py @@ -19,12 +19,13 @@ ) test_equations = [ - "sin(x) + cos(2*x) + tan(x/3)", + "sin(2*x)/x + 0.1*x" ] def generate_data(s: str, num_points: int, noise_level: float, data_seed: int): - x = np.linspace(0, 10, num_points) + rstate = np.random.RandomState(data_seed) + x = rstate.uniform(-10, 10, num_points) for (k, v) in { "sin": "np.sin", "cos": "np.cos", @@ -35,7 +36,6 @@ def generate_data(s: str, num_points: int, noise_level: float, data_seed: int): }.items(): s = s.replace(k, v) y = eval(s) - rstate = np.random.RandomState(data_seed) noise = rstate.normal(0, noise_level, y.shape) y_noisy = y + noise return pd.DataFrame({"x": x}), y_noisy @@ -101,30 +101,37 @@ def _greet_dispatch( ), ) process.start() + last_yield_time = None while process.is_alive(): if equation_file_bkup.exists(): try: # First, copy the file to a the copy file equation_file_copy = base / "hall_of_fame_copy.csv" os.system(f"cp {equation_file_bkup} {equation_file_copy}") - df = pd.read_csv(equation_file_copy) + equations = pd.read_csv(equation_file_copy) # Ensure it is pareto dominated, with more complex expressions # having higher loss. Otherwise remove those rows. # TODO: Not sure why this occurs; could be the result of a late copy? - df.sort_values("Complexity", ascending=True, inplace=True) - df.reset_index(inplace=True) + equations.sort_values("Complexity", ascending=True, inplace=True) + equations.reset_index(inplace=True) bad_idx = [] min_loss = None - for i in df.index: - if min_loss is None or df.loc[i, "Loss"] < min_loss: - min_loss = float(df.loc[i, "Loss"]) + for i in equations.index: + if min_loss is None or equations.loc[i, "Loss"] < min_loss: + min_loss = float(equations.loc[i, "Loss"]) else: bad_idx.append(i) - df.drop(index=bad_idx, inplace=True) - yield df[["Complexity", "Loss", "Equation"]] + equations.drop(index=bad_idx, inplace=True) + + while last_yield_time is not None and time.time() - last_yield_time < 1: + time.sleep(0.1) + + yield equations[["Complexity", "Loss", "Equation"]] + + last_yield_time = time.time() except pd.errors.EmptyDataError: pass - time.sleep(1) + process.join() @@ -163,31 +170,23 @@ def greet( def _data_layout(): with gr.Tab("Example Data"): # Plot of the example data: - example_plot = gr.ScatterPlot( - x="x", - y="y", - tooltip=["x", "y"], - x_lim=[0, 10], - y_lim=[-5, 5], - width=350, - height=300, - ) + example_plot = gr.Plot() test_equation = gr.Radio( test_equations, value=test_equations[0], label="Test Equation" ) num_points = gr.Slider( minimum=10, maximum=1000, - value=100, + value=200, label="Number of Data Points", step=1, ) - noise_level = gr.Slider(minimum=0, maximum=1, value=0.1, label="Noise Level") + noise_level = gr.Slider(minimum=0, maximum=1, value=0.05, label="Noise Level") data_seed = gr.Number(value=0, label="Random Seed") with gr.Tab("Upload Data"): file_input = gr.File(label="Upload a CSV File") gr.Markdown( - "Upload a CSV file with the data to fit. The last column will be used as the target variable." + "The rightmost column of your CSV file be used as the target variable." ) return dict( @@ -219,7 +218,7 @@ def _settings_layout(): "tan", ], label="Unary Operators", - value=[], + value=["sin"], ) niterations = gr.Slider( minimum=1, @@ -304,43 +303,17 @@ def main(): for eqn_component in eqn_components: eqn_component.change(replot, eqn_components, blocks["example_plot"]) + # Update plot when dataframe is updated: blocks["df"].change( replot_pareto, inputs=[blocks["df"], blocks["maxsize"]], outputs=[blocks["pareto"]], ) + demo.load(replot, eqn_components, blocks["example_plot"]) demo.launch(debug=True) - -def replot(test_equation, num_points, noise_level, data_seed): - X, y = generate_data(test_equation, num_points, noise_level, data_seed) - df = pd.DataFrame({"x": X["x"], "y": y}) - return df - -def replot_pareto(df, maxsize): - # Matplotlib log-log plot of loss vs complexity: - fig, ax = plt.subplots(figsize=(5, 5)) - - ax.set_xlabel('Complexity', fontsize=14) - ax.set_ylabel('Loss', fontsize=14) - if len(df) == 0 or 'Equation' not in df.columns: - return fig - - ax.loglog(df['Complexity'], df['Loss'], marker='o', linestyle='-', color='b') - ax.set_xlim(1, maxsize + 1) - # Set ylim to next power of 2: - ytop = 2 ** (np.ceil(np.log2(df['Loss'].max()))) - ybottom = 2 ** (np.floor(np.log2(df['Loss'].min() + 1e-20))) - ax.set_ylim(ybottom, ytop) - ax.grid(True, which="both", ls="--", linewidth=0.5) - fig.tight_layout() - ax.tick_params(axis='both', which='major', labelsize=12) - ax.tick_params(axis='both', which='minor', labelsize=10) - - return fig - def replot_pareto(df, maxsize): plt.rcParams['font.family'] = 'IBM Plex Mono' fig, ax = plt.subplots(figsize=(6, 6), dpi=100) @@ -375,5 +348,29 @@ def replot_pareto(df, maxsize): return fig +def replot(test_equation, num_points, noise_level, data_seed): + X, y = generate_data(test_equation, num_points, noise_level, data_seed) + x = X["x"] + + plt.rcParams['font.family'] = 'IBM Plex Mono' + fig, ax = plt.subplots(figsize=(6, 6), dpi=100) + + ax.scatter(x, y, alpha=0.7, edgecolors='w', s=50) + + ax.grid(True, which="major", linestyle='--', linewidth=0.5, color='gray', alpha=0.7) + ax.grid(True, which="minor", linestyle=':', linewidth=0.5, color='gray', alpha=0.5) + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.spines['bottom'].set_color('gray') + ax.spines['left'].set_color('gray') + ax.tick_params(axis='both', which='major', labelsize=12, direction='out', length=6) + ax.tick_params(axis='both', which='minor', labelsize=10, direction='out', length=4) + ax.set_xlabel("x") + ax.set_ylabel("y") + + fig.tight_layout() + return fig + + if __name__ == "__main__": main() From 2681c825156e8a723201b330b2b6c3cbb69df78e Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 22:00:55 +0000 Subject: [PATCH 084/136] Turn multithreading back on --- gui/Dockerfile | 1 - gui/app.py | 4 ---- 2 files changed, 5 deletions(-) diff --git a/gui/Dockerfile b/gui/Dockerfile index 44676d14a..4a527a0f6 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -55,5 +55,4 @@ ENV GRADIO_ALLOW_FLAGGING=never \ GRADIO_THEME=huggingface \ SYSTEM=spaces -ENV JULIA_NUM_THREADS=1 CMD ["/bin/bash", "-l", "-c", "/home/user/.venv/bin/python /home/user/app/app.py"] diff --git a/gui/app.py b/gui/app.py index 92334e244..946a75a6f 100644 --- a/gui/app.py +++ b/gui/app.py @@ -156,10 +156,6 @@ def greet( binary_operators=binary_operators, unary_operators=unary_operators, timeout_in_seconds=1000, - multithreading=False, - procs=0, - deterministic=True, - random_state=seed, equation_file=equation_file, ) model.fit(X, y) From f751163555db21b9380805fb901ca11b73762bf9 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 22:09:21 +0000 Subject: [PATCH 085/136] Fix formatting of data gen tab --- gui/app.py | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/gui/app.py b/gui/app.py index 946a75a6f..866a89094 100644 --- a/gui/app.py +++ b/gui/app.py @@ -166,19 +166,22 @@ def greet( def _data_layout(): with gr.Tab("Example Data"): # Plot of the example data: - example_plot = gr.Plot() - test_equation = gr.Radio( - test_equations, value=test_equations[0], label="Test Equation" - ) - num_points = gr.Slider( - minimum=10, - maximum=1000, - value=200, - label="Number of Data Points", - step=1, - ) - noise_level = gr.Slider(minimum=0, maximum=1, value=0.05, label="Noise Level") - data_seed = gr.Number(value=0, label="Random Seed") + with gr.Row(): + with gr.Column(): + example_plot = gr.Plot() + with gr.Column(): + test_equation = gr.Radio( + test_equations, value=test_equations[0], label="Test Equation" + ) + num_points = gr.Slider( + minimum=10, + maximum=1000, + value=200, + label="Number of Data Points", + step=1, + ) + noise_level = gr.Slider(minimum=0, maximum=1, value=0.05, label="Noise Level") + data_seed = gr.Number(value=0, label="Random Seed") with gr.Tab("Upload Data"): file_input = gr.File(label="Upload a CSV File") gr.Markdown( @@ -353,18 +356,22 @@ def replot(test_equation, num_points, noise_level, data_seed): ax.scatter(x, y, alpha=0.7, edgecolors='w', s=50) - ax.grid(True, which="major", linestyle='--', linewidth=0.5, color='gray', alpha=0.7) - ax.grid(True, which="minor", linestyle=':', linewidth=0.5, color='gray', alpha=0.5) + ax.grid(True, which="both", ls="--", linewidth=0.5, color='gray', alpha=0.5) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) - ax.spines['bottom'].set_color('gray') - ax.spines['left'].set_color('gray') - ax.tick_params(axis='both', which='major', labelsize=12, direction='out', length=6) - ax.tick_params(axis='both', which='minor', labelsize=10, direction='out', length=4) + + # Range-frame the plot + for direction in ['bottom', 'left']: + ax.spines[direction].set_position(('outward', 10)) + + # Delete far ticks + ax.tick_params(axis='both', which='major', labelsize=10, direction='out', length=5) + ax.tick_params(axis='both', which='minor', labelsize=8, direction='out', length=3) + ax.set_xlabel("x") ax.set_ylabel("y") + fig.tight_layout(pad=2) - fig.tight_layout() return fig From 9d6017e45e13df73cd64041144a64e91f4632c5f Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 22:17:07 +0000 Subject: [PATCH 086/136] Add setting for plot update rate --- gui/app.py | 168 +++++++++++++++++++++++++++++------------------------ 1 file changed, 91 insertions(+), 77 deletions(-) diff --git a/gui/app.py b/gui/app.py index 866a89094..5eb98f0ec 100644 --- a/gui/app.py +++ b/gui/app.py @@ -5,6 +5,7 @@ import time import multiprocessing as mp from matplotlib import pyplot as plt + plt.ioff() import tempfile from typing import Optional, Union @@ -18,9 +19,7 @@ } ) -test_equations = [ - "sin(2*x)/x + 0.1*x" -] +test_equations = ["sin(2*x)/x + 0.1*x"] def generate_data(s: str, num_points: int, noise_level: float, data_seed: int): @@ -52,7 +51,7 @@ def _greet_dispatch( maxsize, binary_operators, unary_operators, - seed, + plot_update_delay, ): """Load data, then spawn a process to run the greet function.""" if file_input is not None: @@ -96,7 +95,6 @@ def _greet_dispatch( maxsize=maxsize, binary_operators=binary_operators, unary_operators=unary_operators, - seed=seed, equation_file=equation_file, ), ) @@ -123,7 +121,10 @@ def _greet_dispatch( bad_idx.append(i) equations.drop(index=bad_idx, inplace=True) - while last_yield_time is not None and time.time() - last_yield_time < 1: + while ( + last_yield_time is not None + and time.time() - last_yield_time < plot_update_delay + ): time.sleep(0.1) yield equations[["Complexity", "Loss", "Equation"]] @@ -132,7 +133,6 @@ def _greet_dispatch( except pd.errors.EmptyDataError: pass - process.join() @@ -144,7 +144,6 @@ def greet( maxsize: int, binary_operators: list, unary_operators: list, - seed: int, equation_file: Union[str, Path], ): import pysr @@ -180,7 +179,9 @@ def _data_layout(): label="Number of Data Points", step=1, ) - noise_level = gr.Slider(minimum=0, maximum=1, value=0.05, label="Noise Level") + noise_level = gr.Slider( + minimum=0, maximum=1, value=0.05, label="Noise Level" + ) data_seed = gr.Number(value=0, label="Random Seed") with gr.Tab("Upload Data"): file_input = gr.File(label="Upload a CSV File") @@ -199,55 +200,59 @@ def _data_layout(): def _settings_layout(): - binary_operators = gr.CheckboxGroup( - choices=["+", "-", "*", "/", "^"], - label="Binary Operators", - value=["+", "-", "*", "/"], - ) - unary_operators = gr.CheckboxGroup( - choices=[ - "sin", - "cos", - "exp", - "log", - "square", - "cube", - "sqrt", - "abs", - "tan", - ], - label="Unary Operators", - value=["sin"], - ) - niterations = gr.Slider( - minimum=1, - maximum=1000, - value=40, - label="Number of Iterations", - step=1, - ) - maxsize = gr.Slider( - minimum=7, - maximum=35, - value=20, - label="Maximum Complexity", - step=1, - ) - seed = gr.Number( - value=0, - label="Random Seed", - ) - force_run = gr.Checkbox( - value=False, - label="Ignore Warnings", - ) + with gr.Tab("Basic Settings"): + binary_operators = gr.CheckboxGroup( + choices=["+", "-", "*", "/", "^"], + label="Binary Operators", + value=["+", "-", "*", "/"], + ) + unary_operators = gr.CheckboxGroup( + choices=[ + "sin", + "cos", + "exp", + "log", + "square", + "cube", + "sqrt", + "abs", + "tan", + ], + label="Unary Operators", + value=["sin"], + ) + niterations = gr.Slider( + minimum=1, + maximum=1000, + value=40, + label="Number of Iterations", + step=1, + ) + maxsize = gr.Slider( + minimum=7, + maximum=35, + value=20, + label="Maximum Complexity", + step=1, + ) + force_run = gr.Checkbox( + value=False, + label="Ignore Warnings", + ) + with gr.Tab("Gradio Settings"): + plot_update_delay = gr.Slider( + minimum=1, + maximum=100, + value=3, + label="Plot Update Delay", + ) return dict( binary_operators=binary_operators, unary_operators=unary_operators, niterations=niterations, maxsize=maxsize, force_run=force_run, - seed=seed, + plot_update_delay=plot_update_delay, ) @@ -286,7 +291,7 @@ def main(): "maxsize", "binary_operators", "unary_operators", - "seed", + "plot_update_delay", ] ], outputs=blocks["df"], @@ -302,7 +307,6 @@ def main(): for eqn_component in eqn_components: eqn_component.change(replot, eqn_components, blocks["example_plot"]) - # Update plot when dataframe is updated: blocks["df"].change( replot_pareto, @@ -313,60 +317,70 @@ def main(): demo.launch(debug=True) + def replot_pareto(df, maxsize): - plt.rcParams['font.family'] = 'IBM Plex Mono' + plt.rcParams["font.family"] = "IBM Plex Mono" fig, ax = plt.subplots(figsize=(6, 6), dpi=100) - if len(df) == 0 or 'Equation' not in df.columns: + if len(df) == 0 or "Equation" not in df.columns: return fig # Plotting the data - ax.loglog(df['Complexity'], df['Loss'], marker='o', linestyle='-', color='#333f48', linewidth=1.5, markersize=6) + ax.loglog( + df["Complexity"], + df["Loss"], + marker="o", + linestyle="-", + color="#333f48", + linewidth=1.5, + markersize=6, + ) # Set the axis limits ax.set_xlim(0.5, maxsize + 1) - ytop = 2 ** (np.ceil(np.log2(df['Loss'].max()))) - ybottom = 2 ** (np.floor(np.log2(df['Loss'].min() + 1e-20))) + ytop = 2 ** (np.ceil(np.log2(df["Loss"].max()))) + ybottom = 2 ** (np.floor(np.log2(df["Loss"].min() + 1e-20))) ax.set_ylim(ybottom, ytop) - ax.grid(True, which="both", ls="--", linewidth=0.5, color='gray', alpha=0.5) - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) + ax.grid(True, which="both", ls="--", linewidth=0.5, color="gray", alpha=0.5) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) # Range-frame the plot - for direction in ['bottom', 'left']: - ax.spines[direction].set_position(('outward', 10)) + for direction in ["bottom", "left"]: + ax.spines[direction].set_position(("outward", 10)) # Delete far ticks - ax.tick_params(axis='both', which='major', labelsize=10, direction='out', length=5) - ax.tick_params(axis='both', which='minor', labelsize=8, direction='out', length=3) + ax.tick_params(axis="both", which="major", labelsize=10, direction="out", length=5) + ax.tick_params(axis="both", which="minor", labelsize=8, direction="out", length=3) - ax.set_xlabel('Complexity') - ax.set_ylabel('Loss') + ax.set_xlabel("Complexity") + ax.set_ylabel("Loss") fig.tight_layout(pad=2) return fig + def replot(test_equation, num_points, noise_level, data_seed): X, y = generate_data(test_equation, num_points, noise_level, data_seed) x = X["x"] - plt.rcParams['font.family'] = 'IBM Plex Mono' + plt.rcParams["font.family"] = "IBM Plex Mono" fig, ax = plt.subplots(figsize=(6, 6), dpi=100) - ax.scatter(x, y, alpha=0.7, edgecolors='w', s=50) + ax.scatter(x, y, alpha=0.7, edgecolors="w", s=50) - ax.grid(True, which="both", ls="--", linewidth=0.5, color='gray', alpha=0.5) - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) + ax.grid(True, which="both", ls="--", linewidth=0.5, color="gray", alpha=0.5) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) # Range-frame the plot - for direction in ['bottom', 'left']: - ax.spines[direction].set_position(('outward', 10)) + for direction in ["bottom", "left"]: + ax.spines[direction].set_position(("outward", 10)) # Delete far ticks - ax.tick_params(axis='both', which='major', labelsize=10, direction='out', length=5) - ax.tick_params(axis='both', which='minor', labelsize=8, direction='out', length=3) + ax.tick_params(axis="both", which="major", labelsize=10, direction="out", length=5) + ax.tick_params(axis="both", which="minor", labelsize=8, direction="out", length=3) ax.set_xlabel("x") ax.set_ylabel("y") From 8a2bd535f117804a9bdaa1ea0756698c914cfcc3 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 22:37:30 +0000 Subject: [PATCH 087/136] Add more advanced settings --- gui/app.py | 115 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 102 insertions(+), 13 deletions(-) diff --git a/gui/app.py b/gui/app.py index 5eb98f0ec..e06ec693a 100644 --- a/gui/app.py +++ b/gui/app.py @@ -52,6 +52,16 @@ def _greet_dispatch( binary_operators, unary_operators, plot_update_delay, + parsimony, + populations, + population_size, + ncycles_per_iteration, + elementwise_loss, + adaptive_parsimony_scaling, + optimizer_algorithm, + optimizer_iterations, + batching, + batch_size, ): """Load data, then spawn a process to run the greet function.""" if file_input is not None: @@ -96,6 +106,16 @@ def _greet_dispatch( binary_operators=binary_operators, unary_operators=unary_operators, equation_file=equation_file, + parsimony=parsimony, + populations=populations, + population_size=population_size, + ncycles_per_iteration=ncycles_per_iteration, + elementwise_loss=elementwise_loss, + adaptive_parsimony_scaling=adaptive_parsimony_scaling, + optimizer_algorithm=optimizer_algorithm, + optimizer_iterations=optimizer_iterations, + batching=batching, + batch_size=batch_size, ), ) process.start() @@ -140,22 +160,14 @@ def greet( *, X, y, - niterations: int, - maxsize: int, - binary_operators: list, - unary_operators: list, - equation_file: Union[str, Path], + **pysr_kwargs, ): import pysr model = pysr.PySRRegressor( progress=False, - maxsize=maxsize, - niterations=niterations, - binary_operators=binary_operators, - unary_operators=unary_operators, timeout_in_seconds=1000, - equation_file=equation_file, + **pysr_kwargs, ) model.fit(X, y) @@ -230,15 +242,68 @@ def _settings_layout(): ) maxsize = gr.Slider( minimum=7, - maximum=35, + maximum=100, value=20, label="Maximum Complexity", step=1, ) - force_run = gr.Checkbox( + parsimony = gr.Number( + value=0.0032, + label="Parsimony Coefficient", + ) + with gr.Tab("Advanced Settings"): + populations = gr.Slider( + minimum=2, + maximum=100, + value=15, + label="Number of Populations", + step=1, + ) + population_size = gr.Slider( + minimum=2, + maximum=1000, + value=33, + label="Population Size", + step=1, + ) + ncycles_per_iteration = gr.Number( + value=550, + label="Cycles per Iteration", + ) + elementwise_loss = gr.Radio( + ["L2DistLoss()", "L1DistLoss()", "LogitDistLoss()", "HuberLoss()"], + value="L2DistLoss()", + label="Loss Function", + ) + adaptive_parsimony_scaling = gr.Number( + value=20.0, + label="Adaptive Parsimony Scaling", + ) + optimizer_algorithm = gr.Radio( + ["BFGS", "NelderMead"], + value="BFGS", + label="Optimizer Algorithm", + ) + optimizer_iterations = gr.Slider( + minimum=1, + maximum=100, + value=8, + label="Optimizer Iterations", + step=1, + ) + # Bool: + batching = gr.Checkbox( value=False, - label="Ignore Warnings", + label="Batching", ) + batch_size = gr.Slider( + minimum=2, + maximum=1000, + value=50, + label="Batch Size", + step=1, + ) + with gr.Tab("Gradio Settings"): plot_update_delay = gr.Slider( minimum=1, @@ -246,6 +311,10 @@ def _settings_layout(): value=3, label="Plot Update Delay", ) + force_run = gr.Checkbox( + value=False, + label="Ignore Warnings", + ) return dict( binary_operators=binary_operators, unary_operators=unary_operators, @@ -253,6 +322,16 @@ def _settings_layout(): maxsize=maxsize, force_run=force_run, plot_update_delay=plot_update_delay, + parsimony=parsimony, + populations=populations, + population_size=population_size, + ncycles_per_iteration=ncycles_per_iteration, + elementwise_loss=elementwise_loss, + adaptive_parsimony_scaling=adaptive_parsimony_scaling, + optimizer_algorithm=optimizer_algorithm, + optimizer_iterations=optimizer_iterations, + batching=batching, + batch_size=batch_size, ) @@ -292,6 +371,16 @@ def main(): "binary_operators", "unary_operators", "plot_update_delay", + "parsimony", + "populations", + "population_size", + "ncycles_per_iteration", + "elementwise_loss", + "adaptive_parsimony_scaling", + "optimizer_algorithm", + "optimizer_iterations", + "batching", + "batch_size", ] ], outputs=blocks["df"], From e487754025bb1a7da5053198ff5939cb74dbe481 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 22:38:01 +0000 Subject: [PATCH 088/136] Typo --- gui/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/app.py b/gui/app.py index e06ec693a..83eb8fd4f 100644 --- a/gui/app.py +++ b/gui/app.py @@ -198,7 +198,7 @@ def _data_layout(): with gr.Tab("Upload Data"): file_input = gr.File(label="Upload a CSV File") gr.Markdown( - "The rightmost column of your CSV file be used as the target variable." + "The rightmost column of your CSV file will be used as the target variable." ) return dict( From 0dc382d911fe2feebd95a2326ea310736768e5ab Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 22:41:47 +0000 Subject: [PATCH 089/136] More operators --- gui/app.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/gui/app.py b/gui/app.py index 83eb8fd4f..9bb559508 100644 --- a/gui/app.py +++ b/gui/app.py @@ -214,7 +214,7 @@ def _data_layout(): def _settings_layout(): with gr.Tab("Basic Settings"): binary_operators = gr.CheckboxGroup( - choices=["+", "-", "*", "/", "^"], + choices=["+", "-", "*", "/", "^", "max", "min", "mod", "cond"], label="Binary Operators", value=["+", "-", "*", "/"], ) @@ -229,6 +229,18 @@ def _settings_layout(): "sqrt", "abs", "tan", + "sinh", + "cosh", + "tanh", + "atan", + "asinh", + "acosh", + "erf", + "relu", + "round", + "floor", + "ceil", + "sign", ], label="Unary Operators", value=["sin"], From d3c4f72230bf7aa0b164eda372f022dd3fd46023 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 22:45:17 +0000 Subject: [PATCH 090/136] Remove huge number of operators --- gui/app.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/gui/app.py b/gui/app.py index 9bb559508..55b0c56e6 100644 --- a/gui/app.py +++ b/gui/app.py @@ -222,24 +222,16 @@ def _settings_layout(): choices=[ "sin", "cos", + "tan", "exp", "log", "square", "cube", "sqrt", "abs", - "tan", - "sinh", - "cosh", - "tanh", - "atan", - "asinh", - "acosh", "erf", "relu", "round", - "floor", - "ceil", "sign", ], label="Unary Operators", From 63ae9cd12084b57c8ef1bebb9a4f0c3eede9ca73 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 28 Mar 2024 23:13:02 +0000 Subject: [PATCH 091/136] Add pre-commit config --- gui/.pre-commit-config.yaml | 33 +++++++++++++++++++++++++++++++++ gui/app.py | 10 +++++----- gui/gen_example_data.py | 2 +- 3 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 gui/.pre-commit-config.yaml diff --git a/gui/.pre-commit-config.yaml b/gui/.pre-commit-config.yaml new file mode 100644 index 000000000..95a9578f2 --- /dev/null +++ b/gui/.pre-commit-config.yaml @@ -0,0 +1,33 @@ +repos: + # General linting + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + # General formatting + - repo: https://github.com/psf/black + rev: 23.12.1 + hooks: + - id: black + - id: black-jupyter + exclude: pysr/test/test_nb.ipynb + # Stripping notebooks + - repo: https://github.com/kynan/nbstripout + rev: 0.6.1 + hooks: + - id: nbstripout + exclude: pysr/test/test_nb.ipynb + # Unused imports + - repo: https://github.com/hadialqattan/pycln + rev: "v2.4.0" + hooks: + - id: pycln + # Sorted imports + - repo: https://github.com/PyCQA/isort + rev: "5.13.2" + hooks: + - id: isort + additional_dependencies: [toml] diff --git a/gui/app.py b/gui/app.py index 55b0c56e6..95220f253 100644 --- a/gui/app.py +++ b/gui/app.py @@ -1,14 +1,14 @@ +import multiprocessing as mp +import os +import time + import gradio as gr import numpy as np -import os import pandas as pd -import time -import multiprocessing as mp from matplotlib import pyplot as plt plt.ioff() import tempfile -from typing import Optional, Union from pathlib import Path empty_df = pd.DataFrame( @@ -25,7 +25,7 @@ def generate_data(s: str, num_points: int, noise_level: float, data_seed: int): rstate = np.random.RandomState(data_seed) x = rstate.uniform(-10, 10, num_points) - for (k, v) in { + for k, v in { "sin": "np.sin", "cos": "np.cos", "exp": "np.exp", diff --git a/gui/gen_example_data.py b/gui/gen_example_data.py index 3b992c875..744b493d3 100644 --- a/gui/gen_example_data.py +++ b/gui/gen_example_data.py @@ -1,5 +1,5 @@ -import pandas as pd import numpy as np +import pandas as pd rand_between = lambda a, b, size: np.random.rand(*size) * (b - a) + a From 146981ea7bdd0850db882bddcd67fef9a4512fec Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 30 Mar 2024 19:43:32 +0000 Subject: [PATCH 092/136] Start prediction plot --- gui/app.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/gui/app.py b/gui/app.py index 95220f253..4e122198e 100644 --- a/gui/app.py +++ b/gui/app.py @@ -350,12 +350,17 @@ def main(): blocks = {**blocks, **_settings_layout()} with gr.Column(): - blocks["pareto"] = gr.Plot() + with gr.Tab("Pareto Front"): + blocks["pareto"] = gr.Plot() + with gr.Tab("Predictions"): + blocks["predictions_plot"] = gr.Plot() + blocks["df"] = gr.Dataframe( headers=["complexity", "loss", "equation"], datatype=["number", "number", "str"], wrap=True, - column_widths=[100, 100, 300], + column_widths=[75, 75, 200], + interactive=False, ) blocks["run"] = gr.Button() From e72c14ab6bea61f73f820941e6553275e036b573 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 30 Mar 2024 20:21:55 +0000 Subject: [PATCH 093/136] Download IBM Plex Mono for GUI --- gui/Dockerfile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gui/Dockerfile b/gui/Dockerfile index 4a527a0f6..7f91d0c53 100644 --- a/gui/Dockerfile +++ b/gui/Dockerfile @@ -15,11 +15,17 @@ RUN apt-get update && \ pkg-config \ libfontconfig1 \ fontconfig \ + curl \ + unzip \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -COPY fonts/*.ttf /usr/local/share/fonts/ +# Install IBM Plex Mono font (so our plots look Gradio-style) +RUN mkdir -p /usr/local/share/fonts/IBM_Plex_Mono && \ + curl -L https://github.com/IBM/plex/releases/download/v6.4.0/IBM-Plex-Mono.zip -o /tmp/IBM_Plex_Mono.zip && \ + unzip /tmp/IBM_Plex_Mono.zip -d /usr/local/share/fonts/IBM_Plex_Mono && \ + rm /tmp/IBM_Plex_Mono.zip RUN fc-cache -f -v WORKDIR /code From 8192cb803183f9fee76921ddd01dce4dc1e4e8d0 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 30 Mar 2024 22:15:22 +0000 Subject: [PATCH 094/136] Add missing readme --- gui/README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 gui/README.md diff --git a/gui/README.md b/gui/README.md new file mode 100644 index 000000000..7a1ed37a6 --- /dev/null +++ b/gui/README.md @@ -0,0 +1,11 @@ +--- +title: PySR +emoji: 🌍 +colorFrom: green +colorTo: indigo +sdk: docker +pinned: false +license: apache-2.0 +--- + +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference From fc128f443564fbfb1a9e23bbefcb903797daab8c Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 30 Mar 2024 22:17:11 +0000 Subject: [PATCH 095/136] Move HF contents to main readme --- README.md | 10 ++++++++++ gui/README.md | 11 ----------- 2 files changed, 10 insertions(+), 11 deletions(-) delete mode 100644 gui/README.md diff --git a/README.md b/README.md index 8ee07b0b9..e7d61db69 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,13 @@ +--- +title: PySR +emoji: 🌍 +colorFrom: green +colorTo: indigo +sdk: docker +pinned: false +license: apache-2.0 +--- + [//]: # (Logo:)
diff --git a/gui/README.md b/gui/README.md deleted file mode 100644 index 7a1ed37a6..000000000 --- a/gui/README.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -title: PySR -emoji: 🌍 -colorFrom: green -colorTo: indigo -sdk: docker -pinned: false -license: apache-2.0 ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference From 896066b92940793d6e35d3a1c113ce7be1b96b2a Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 30 Mar 2024 22:32:01 +0000 Subject: [PATCH 096/136] Bump jl version in container --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 8b87b9256..b95391d52 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ # This builds a dockerfile containing a working copy of PySR # with all pre-requisites installed. -ARG JLVERSION=1.9.4 +ARG JLVERSION=1.10.0 ARG PYVERSION=3.11.6 ARG BASE_IMAGE=bullseye From 0430395f443351c601d262c22a8473df884d323b Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 30 Mar 2024 22:32:39 +0000 Subject: [PATCH 097/136] Have GUI be default mode of dockerfile --- Dockerfile | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index b95391d52..3dcfcf7ad 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,6 +12,24 @@ FROM python:${PYVERSION}-${BASE_IMAGE} COPY --from=jl /usr/local/julia /usr/local/julia ENV PATH="/usr/local/julia/bin:${PATH}" +# Install dependencies for GUI: +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + git \ + libgl1-mesa-glx \ + libglib2.0-0 \ + libpython3-dev \ + libfreetype6-dev \ + pkg-config \ + libfontconfig1 \ + fontconfig \ + curl \ + unzip \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + # Install IPython and other useful libraries: RUN pip install --no-cache-dir ipython matplotlib @@ -21,6 +39,18 @@ WORKDIR /pysr ADD ./requirements.txt /pysr/requirements.txt RUN pip3 install --no-cache-dir -r /pysr/requirements.txt +################################################################################ +## GUI setup +# Install IBM Plex Mono font (so our plots look Gradio-style) +RUN mkdir -p /usr/local/share/fonts/IBM_Plex_Mono && \ + curl -L https://github.com/IBM/plex/releases/download/v6.4.0/IBM-Plex-Mono.zip -o /tmp/IBM_Plex_Mono.zip && \ + unzip /tmp/IBM_Plex_Mono.zip -d /usr/local/share/fonts/IBM_Plex_Mono && \ + rm /tmp/IBM_Plex_Mono.zip +RUN fc-cache -f -v +ADD ./gui/requirements.txt /pysr/gui/requirements.txt +RUN pip3 install --no-cache-dir -r /pysr/gui/requirements.txt +################################################################################ + # Install PySR: # We do a minimal copy so it doesn't need to rerun at every file change: ADD ./pyproject.toml /pysr/pyproject.toml @@ -31,9 +61,18 @@ RUN pip3 install --no-cache-dir . # Install Julia pre-requisites: RUN python3 -c 'import pysr' +EXPOSE 7860 +ENV GRADIO_ALLOW_FLAGGING=never \ + GRADIO_NUM_PORTS=1 \ + GRADIO_SERVER_NAME=0.0.0.0 \ + GRADIO_THEME=huggingface \ + SYSTEM=spaces + +ADD ./gui/app.py /pysr/gui/app.py + # metainformation LABEL org.opencontainers.image.authors = "Miles Cranmer" LABEL org.opencontainers.image.source = "https://github.com/MilesCranmer/PySR" LABEL org.opencontainers.image.licenses = "Apache License 2.0" -CMD ["ipython"] +CMD ["python3", "/pysr/gui/app.py"] From a300bd3f35da42e2f7b2c2427e4031c20904e595 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 30 Mar 2024 22:32:57 +0000 Subject: [PATCH 098/136] Remove example data generator --- gui/gen_example_data.py | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 gui/gen_example_data.py diff --git a/gui/gen_example_data.py b/gui/gen_example_data.py deleted file mode 100644 index 744b493d3..000000000 --- a/gui/gen_example_data.py +++ /dev/null @@ -1,17 +0,0 @@ -import numpy as np -import pandas as pd - -rand_between = lambda a, b, size: np.random.rand(*size) * (b - a) + a - -X = pd.DataFrame( - { - "T": rand_between(273, 373, (100,)), # Kelvin - "P": rand_between(100, 200, (100,)) * 1e3, # Pa - "n": rand_between(0, 10, (100,)), # mole - } -) - -R = 8.3144598 # J/mol/K -X["y"] = X["n"] * R * X["T"] / X["P"] - -X.to_csv("data.csv", index=False) From 754d45bbcb7504c28fa26bcb44f043fd96ea8394 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 30 Mar 2024 22:33:20 +0000 Subject: [PATCH 099/136] Remove unused parts of gui --- gui/.gitignore | 1 - gui/.pre-commit-config.yaml | 33 ------------------- gui/Dockerfile | 64 ------------------------------------- 3 files changed, 98 deletions(-) delete mode 100644 gui/.gitignore delete mode 100644 gui/.pre-commit-config.yaml delete mode 100644 gui/Dockerfile diff --git a/gui/.gitignore b/gui/.gitignore deleted file mode 100644 index 074bb4715..000000000 --- a/gui/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.csv* diff --git a/gui/.pre-commit-config.yaml b/gui/.pre-commit-config.yaml deleted file mode 100644 index 95a9578f2..000000000 --- a/gui/.pre-commit-config.yaml +++ /dev/null @@ -1,33 +0,0 @@ -repos: - # General linting - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-yaml - - id: check-added-large-files - # General formatting - - repo: https://github.com/psf/black - rev: 23.12.1 - hooks: - - id: black - - id: black-jupyter - exclude: pysr/test/test_nb.ipynb - # Stripping notebooks - - repo: https://github.com/kynan/nbstripout - rev: 0.6.1 - hooks: - - id: nbstripout - exclude: pysr/test/test_nb.ipynb - # Unused imports - - repo: https://github.com/hadialqattan/pycln - rev: "v2.4.0" - hooks: - - id: pycln - # Sorted imports - - repo: https://github.com/PyCQA/isort - rev: "5.13.2" - hooks: - - id: isort - additional_dependencies: [toml] diff --git a/gui/Dockerfile b/gui/Dockerfile deleted file mode 100644 index 7f91d0c53..000000000 --- a/gui/Dockerfile +++ /dev/null @@ -1,64 +0,0 @@ -FROM julia:1.10.0 AS jl -FROM python:3.12 - -COPY --from=jl /usr/local/julia /usr/local/julia -ENV PATH="/usr/local/julia/bin:${PATH}" - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - build-essential \ - git \ - libgl1-mesa-glx \ - libglib2.0-0 \ - libpython3-dev \ - libfreetype6-dev \ - pkg-config \ - libfontconfig1 \ - fontconfig \ - curl \ - unzip \ - && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# Install IBM Plex Mono font (so our plots look Gradio-style) -RUN mkdir -p /usr/local/share/fonts/IBM_Plex_Mono && \ - curl -L https://github.com/IBM/plex/releases/download/v6.4.0/IBM-Plex-Mono.zip -o /tmp/IBM_Plex_Mono.zip && \ - unzip /tmp/IBM_Plex_Mono.zip -d /usr/local/share/fonts/IBM_Plex_Mono && \ - rm /tmp/IBM_Plex_Mono.zip -RUN fc-cache -f -v - -WORKDIR /code - -COPY ./requirements.txt /code/requirements.txt - -# Set up a new user named "user" with user ID 1000 -RUN useradd -m -u 1000 user -# Switch to the "user" user -USER user -WORKDIR /home/user/ -# Set home to the user's home directory -ENV HOME=/home/user -ENV PATH=/home/user/.local/bin:$PATH - -RUN python -m venv /home/user/.venv - -# Install Python dependencies in a virtual environment -RUN /home/user/.venv/bin/python -m pip install --no-cache-dir --upgrade -r /code/requirements.txt - -# Install and pre-compile Julia dependencies, -# including the Bumper extension -RUN /home/user/.venv/bin/python -c "import pysr" -RUN /home/user/.venv/bin/python -c "import pysr; pysr.PySRRegressor(bumper=True, verbosity=0, progress=False, max_evals=1).fit([[1]], [1])" - -WORKDIR /home/user/app -COPY --chown=user . $HOME/app - -EXPOSE 7860 -ENV GRADIO_ALLOW_FLAGGING=never \ - GRADIO_NUM_PORTS=1 \ - GRADIO_SERVER_NAME=0.0.0.0 \ - GRADIO_THEME=huggingface \ - SYSTEM=spaces - -CMD ["/bin/bash", "-l", "-c", "/home/user/.venv/bin/python /home/user/app/app.py"] From 2b187fda154bc4dba32ab50f8d00807404948d08 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 30 Mar 2024 22:35:01 +0000 Subject: [PATCH 100/136] Update gui requirements --- gui/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/requirements.txt b/gui/requirements.txt index 7d0b032c0..ffe53a2ce 100644 --- a/gui/requirements.txt +++ b/gui/requirements.txt @@ -1,4 +1,4 @@ -pysr==0.18.1 numpy pandas +matplotlib gradio From 177ae5afee519fd0c877ff65961dbc4f8459485a Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 30 Mar 2024 22:41:47 +0000 Subject: [PATCH 101/136] Minimize requirements --- Dockerfile | 39 +++++++-------------------------------- gui/requirements.txt | 2 -- 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3dcfcf7ad..c151bc37f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,47 +12,22 @@ FROM python:${PYVERSION}-${BASE_IMAGE} COPY --from=jl /usr/local/julia /usr/local/julia ENV PATH="/usr/local/julia/bin:${PATH}" -# Install dependencies for GUI: -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - build-essential \ - git \ - libgl1-mesa-glx \ - libglib2.0-0 \ - libpython3-dev \ - libfreetype6-dev \ - pkg-config \ - libfontconfig1 \ - fontconfig \ - curl \ - unzip \ - && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# Install IPython and other useful libraries: -RUN pip install --no-cache-dir ipython matplotlib +# Install font used for GUI +RUN mkdir -p /usr/local/share/fonts/IBM_Plex_Mono && \ + curl -L https://github.com/IBM/plex/releases/download/v6.4.0/IBM-Plex-Mono.zip -o /tmp/IBM_Plex_Mono.zip && \ + unzip /tmp/IBM_Plex_Mono.zip -d /usr/local/share/fonts/IBM_Plex_Mono && \ + rm /tmp/IBM_Plex_Mono.zip +RUN fc-cache -f -v WORKDIR /pysr -# Caches install (https://stackoverflow.com/questions/25305788/how-to-avoid-reinstalling-packages-when-building-docker-image-for-python-project) +# Install all requirements, and then PySR itself ADD ./requirements.txt /pysr/requirements.txt RUN pip3 install --no-cache-dir -r /pysr/requirements.txt -################################################################################ -## GUI setup -# Install IBM Plex Mono font (so our plots look Gradio-style) -RUN mkdir -p /usr/local/share/fonts/IBM_Plex_Mono && \ - curl -L https://github.com/IBM/plex/releases/download/v6.4.0/IBM-Plex-Mono.zip -o /tmp/IBM_Plex_Mono.zip && \ - unzip /tmp/IBM_Plex_Mono.zip -d /usr/local/share/fonts/IBM_Plex_Mono && \ - rm /tmp/IBM_Plex_Mono.zip -RUN fc-cache -f -v ADD ./gui/requirements.txt /pysr/gui/requirements.txt RUN pip3 install --no-cache-dir -r /pysr/gui/requirements.txt -################################################################################ -# Install PySR: -# We do a minimal copy so it doesn't need to rerun at every file change: ADD ./pyproject.toml /pysr/pyproject.toml ADD ./setup.py /pysr/setup.py ADD ./pysr /pysr/pysr diff --git a/gui/requirements.txt b/gui/requirements.txt index ffe53a2ce..a2f50f486 100644 --- a/gui/requirements.txt +++ b/gui/requirements.txt @@ -1,4 +1,2 @@ -numpy -pandas matplotlib gradio From 2665e2b8e6ec018b49bf3e38d30c133eaf1c1dcc Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 30 Mar 2024 22:48:08 +0000 Subject: [PATCH 102/136] Clean up how fonts are set --- gui/app.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/gui/app.py b/gui/app.py index 4e122198e..a122c2bf5 100644 --- a/gui/app.py +++ b/gui/app.py @@ -1,6 +1,8 @@ import multiprocessing as mp import os +import tempfile import time +from pathlib import Path import gradio as gr import numpy as np @@ -8,10 +10,15 @@ from matplotlib import pyplot as plt plt.ioff() -import tempfile -from pathlib import Path - -empty_df = pd.DataFrame( +plt.rcParams["font.family"] = [ + "IBM Plex Mono", + # Fallback fonts: + "DejaVu Sans Mono", + "Courier New", + "monospace", +] + +empty_df = lambda: pd.DataFrame( { "equation": [], "loss": [], @@ -69,17 +76,17 @@ def _greet_dispatch( df = pd.read_csv(file_input) if len(df) == 0: return ( - empty_df, + empty_df(), "The file is empty!", ) if len(df.columns) == 1: return ( - empty_df, + empty_df(), "The file has only one column!", ) if len(df) > 10_000 and not force_run: return ( - empty_df, + empty_df(), "You have uploaded a file with more than 10,000 rows. " "This will take very long to run. " "Please upload a subsample of the data, " @@ -417,7 +424,6 @@ def main(): def replot_pareto(df, maxsize): - plt.rcParams["font.family"] = "IBM Plex Mono" fig, ax = plt.subplots(figsize=(6, 6), dpi=100) if len(df) == 0 or "Equation" not in df.columns: From 6cb0a58598384e7c8702276b1a2bd36059c7a908 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 30 Mar 2024 22:49:19 +0000 Subject: [PATCH 103/136] Set versions for matplotlib and gradio --- gui/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gui/requirements.txt b/gui/requirements.txt index a2f50f486..7c0b8f8df 100644 --- a/gui/requirements.txt +++ b/gui/requirements.txt @@ -1,2 +1,2 @@ -matplotlib -gradio +matplotlib>=3.0.0,<4.0.0 +gradio>=4.0.0,<5.0.0 From bccdea1c0cc8e8ff23d90bf7e08cf274145f6930 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sat, 30 Mar 2024 23:29:11 +0000 Subject: [PATCH 104/136] Fix permissions issue in docker GUI --- Dockerfile | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index c151bc37f..9a2ea0dba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,22 +19,36 @@ RUN mkdir -p /usr/local/share/fonts/IBM_Plex_Mono && \ rm /tmp/IBM_Plex_Mono.zip RUN fc-cache -f -v -WORKDIR /pysr +# Set up a new user named "user" with user ID 1000 +RUN useradd -m -u 1000 user +USER user +WORKDIR /home/user/ +ENV HOME=/home/user +ENV PATH=/home/user/.local/bin:$PATH + +RUN python -m venv $HOME/.venv + +ENV PYTHON="${HOME}/.venv/bin/python" +ENV PIP="${PYTHON} -m pip" + +WORKDIR $HOME/pysr # Install all requirements, and then PySR itself -ADD ./requirements.txt /pysr/requirements.txt -RUN pip3 install --no-cache-dir -r /pysr/requirements.txt +COPY --chown=user ./requirements.txt $HOME/pysr/requirements.txt +RUN $PIP install --no-cache-dir -r $HOME/pysr/requirements.txt -ADD ./gui/requirements.txt /pysr/gui/requirements.txt -RUN pip3 install --no-cache-dir -r /pysr/gui/requirements.txt +COPY --chown=user ./gui/requirements.txt $HOME/pysr/gui/requirements.txt +RUN $PIP install --no-cache-dir -r $HOME/pysr/gui/requirements.txt -ADD ./pyproject.toml /pysr/pyproject.toml -ADD ./setup.py /pysr/setup.py -ADD ./pysr /pysr/pysr -RUN pip3 install --no-cache-dir . +COPY --chown=user ./pyproject.toml $HOME/pysr/pyproject.toml +COPY --chown=user ./setup.py $HOME/pysr/setup.py +COPY --chown=user ./pysr $HOME/pysr/pysr +RUN $PIP install --no-cache-dir . # Install Julia pre-requisites: -RUN python3 -c 'import pysr' +RUN $PYTHON -c 'import pysr' + +COPY --chown=user ./gui/app.py $HOME/pysr/gui/app.py EXPOSE 7860 ENV GRADIO_ALLOW_FLAGGING=never \ @@ -43,11 +57,9 @@ ENV GRADIO_ALLOW_FLAGGING=never \ GRADIO_THEME=huggingface \ SYSTEM=spaces -ADD ./gui/app.py /pysr/gui/app.py - # metainformation LABEL org.opencontainers.image.authors = "Miles Cranmer" LABEL org.opencontainers.image.source = "https://github.com/MilesCranmer/PySR" LABEL org.opencontainers.image.licenses = "Apache License 2.0" -CMD ["python3", "/pysr/gui/app.py"] +CMD ["/home/user/.venv/bin/python", "/home/user/pysr/gui/app.py"] From 7f0b93d21ac331cd9733ba02e4e2bcf721309b07 Mon Sep 17 00:00:00 2001 From: Miles Cranmer Date: Sun, 31 Mar 2024 14:51:28 +0100 Subject: [PATCH 105/136] Add virtualenv to PATH --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 9a2ea0dba..99b72414f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,6 +30,7 @@ RUN python -m venv $HOME/.venv ENV PYTHON="${HOME}/.venv/bin/python" ENV PIP="${PYTHON} -m pip" +ENV PATH="${HOME}/.venv/bin:${PATH}" WORKDIR $HOME/pysr From 9fa2182467eca2e9da584f7aa3066d8275577a7d Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 1 Apr 2024 06:16:31 +0100 Subject: [PATCH 106/136] Refactor GUI to multiple files --- gui/app.py | 254 +--------------------------------------------- gui/data.py | 22 ++++ gui/plots.py | 84 +++++++++++++++ gui/processing.py | 150 +++++++++++++++++++++++++++ 4 files changed, 260 insertions(+), 250 deletions(-) create mode 100644 gui/data.py create mode 100644 gui/plots.py create mode 100644 gui/processing.py diff --git a/gui/app.py b/gui/app.py index a122c2bf5..93d09f68c 100644 --- a/gui/app.py +++ b/gui/app.py @@ -1,184 +1,8 @@ -import multiprocessing as mp -import os -import tempfile -import time -from pathlib import Path - import gradio as gr -import numpy as np -import pandas as pd -from matplotlib import pyplot as plt - -plt.ioff() -plt.rcParams["font.family"] = [ - "IBM Plex Mono", - # Fallback fonts: - "DejaVu Sans Mono", - "Courier New", - "monospace", -] - -empty_df = lambda: pd.DataFrame( - { - "equation": [], - "loss": [], - "complexity": [], - } -) - -test_equations = ["sin(2*x)/x + 0.1*x"] - - -def generate_data(s: str, num_points: int, noise_level: float, data_seed: int): - rstate = np.random.RandomState(data_seed) - x = rstate.uniform(-10, 10, num_points) - for k, v in { - "sin": "np.sin", - "cos": "np.cos", - "exp": "np.exp", - "log": "np.log", - "tan": "np.tan", - "^": "**", - }.items(): - s = s.replace(k, v) - y = eval(s) - noise = rstate.normal(0, noise_level, y.shape) - y_noisy = y + noise - return pd.DataFrame({"x": x}), y_noisy - - -def _greet_dispatch( - file_input, - force_run, - test_equation, - num_points, - noise_level, - data_seed, - niterations, - maxsize, - binary_operators, - unary_operators, - plot_update_delay, - parsimony, - populations, - population_size, - ncycles_per_iteration, - elementwise_loss, - adaptive_parsimony_scaling, - optimizer_algorithm, - optimizer_iterations, - batching, - batch_size, -): - """Load data, then spawn a process to run the greet function.""" - if file_input is not None: - # Look at some statistics of the file: - df = pd.read_csv(file_input) - if len(df) == 0: - return ( - empty_df(), - "The file is empty!", - ) - if len(df.columns) == 1: - return ( - empty_df(), - "The file has only one column!", - ) - if len(df) > 10_000 and not force_run: - return ( - empty_df(), - "You have uploaded a file with more than 10,000 rows. " - "This will take very long to run. " - "Please upload a subsample of the data, " - "or check the box 'Ignore Warnings'.", - ) - - col_to_fit = df.columns[-1] - y = np.array(df[col_to_fit]) - X = df.drop([col_to_fit], axis=1) - else: - X, y = generate_data(test_equation, num_points, noise_level, data_seed) - - with tempfile.TemporaryDirectory() as tmpdirname: - base = Path(tmpdirname) - equation_file = base / "hall_of_fame.csv" - equation_file_bkup = base / "hall_of_fame.csv.bkup" - process = mp.Process( - target=greet, - kwargs=dict( - X=X, - y=y, - niterations=niterations, - maxsize=maxsize, - binary_operators=binary_operators, - unary_operators=unary_operators, - equation_file=equation_file, - parsimony=parsimony, - populations=populations, - population_size=population_size, - ncycles_per_iteration=ncycles_per_iteration, - elementwise_loss=elementwise_loss, - adaptive_parsimony_scaling=adaptive_parsimony_scaling, - optimizer_algorithm=optimizer_algorithm, - optimizer_iterations=optimizer_iterations, - batching=batching, - batch_size=batch_size, - ), - ) - process.start() - last_yield_time = None - while process.is_alive(): - if equation_file_bkup.exists(): - try: - # First, copy the file to a the copy file - equation_file_copy = base / "hall_of_fame_copy.csv" - os.system(f"cp {equation_file_bkup} {equation_file_copy}") - equations = pd.read_csv(equation_file_copy) - # Ensure it is pareto dominated, with more complex expressions - # having higher loss. Otherwise remove those rows. - # TODO: Not sure why this occurs; could be the result of a late copy? - equations.sort_values("Complexity", ascending=True, inplace=True) - equations.reset_index(inplace=True) - bad_idx = [] - min_loss = None - for i in equations.index: - if min_loss is None or equations.loc[i, "Loss"] < min_loss: - min_loss = float(equations.loc[i, "Loss"]) - else: - bad_idx.append(i) - equations.drop(index=bad_idx, inplace=True) - - while ( - last_yield_time is not None - and time.time() - last_yield_time < plot_update_delay - ): - time.sleep(0.1) - - yield equations[["Complexity", "Loss", "Equation"]] - - last_yield_time = time.time() - except pd.errors.EmptyDataError: - pass - - process.join() - -def greet( - *, - X, - y, - **pysr_kwargs, -): - import pysr - - model = pysr.PySRRegressor( - progress=False, - timeout_in_seconds=1000, - **pysr_kwargs, - ) - model.fit(X, y) - - return 0 +from .data import test_equations +from .plots import replot, replot_pareto +from .processing import process def _data_layout(): @@ -372,7 +196,7 @@ def main(): blocks["run"] = gr.Button() blocks["run"].click( - _greet_dispatch, + process, inputs=[ blocks[k] for k in [ @@ -423,75 +247,5 @@ def main(): demo.launch(debug=True) -def replot_pareto(df, maxsize): - fig, ax = plt.subplots(figsize=(6, 6), dpi=100) - - if len(df) == 0 or "Equation" not in df.columns: - return fig - - # Plotting the data - ax.loglog( - df["Complexity"], - df["Loss"], - marker="o", - linestyle="-", - color="#333f48", - linewidth=1.5, - markersize=6, - ) - - # Set the axis limits - ax.set_xlim(0.5, maxsize + 1) - ytop = 2 ** (np.ceil(np.log2(df["Loss"].max()))) - ybottom = 2 ** (np.floor(np.log2(df["Loss"].min() + 1e-20))) - ax.set_ylim(ybottom, ytop) - - ax.grid(True, which="both", ls="--", linewidth=0.5, color="gray", alpha=0.5) - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - - # Range-frame the plot - for direction in ["bottom", "left"]: - ax.spines[direction].set_position(("outward", 10)) - - # Delete far ticks - ax.tick_params(axis="both", which="major", labelsize=10, direction="out", length=5) - ax.tick_params(axis="both", which="minor", labelsize=8, direction="out", length=3) - - ax.set_xlabel("Complexity") - ax.set_ylabel("Loss") - fig.tight_layout(pad=2) - - return fig - - -def replot(test_equation, num_points, noise_level, data_seed): - X, y = generate_data(test_equation, num_points, noise_level, data_seed) - x = X["x"] - - plt.rcParams["font.family"] = "IBM Plex Mono" - fig, ax = plt.subplots(figsize=(6, 6), dpi=100) - - ax.scatter(x, y, alpha=0.7, edgecolors="w", s=50) - - ax.grid(True, which="both", ls="--", linewidth=0.5, color="gray", alpha=0.5) - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - - # Range-frame the plot - for direction in ["bottom", "left"]: - ax.spines[direction].set_position(("outward", 10)) - - # Delete far ticks - ax.tick_params(axis="both", which="major", labelsize=10, direction="out", length=5) - ax.tick_params(axis="both", which="minor", labelsize=8, direction="out", length=3) - - ax.set_xlabel("x") - ax.set_ylabel("y") - fig.tight_layout(pad=2) - - return fig - - if __name__ == "__main__": main() diff --git a/gui/data.py b/gui/data.py new file mode 100644 index 000000000..81ad16b52 --- /dev/null +++ b/gui/data.py @@ -0,0 +1,22 @@ +import numpy as np +import pandas as pd + +test_equations = ["sin(2*x)/x + 0.1*x"] + + +def generate_data(s: str, num_points: int, noise_level: float, data_seed: int): + rstate = np.random.RandomState(data_seed) + x = rstate.uniform(-10, 10, num_points) + for k, v in { + "sin": "np.sin", + "cos": "np.cos", + "exp": "np.exp", + "log": "np.log", + "tan": "np.tan", + "^": "**", + }.items(): + s = s.replace(k, v) + y = eval(s) + noise = rstate.normal(0, noise_level, y.shape) + y_noisy = y + noise + return pd.DataFrame({"x": x}), y_noisy diff --git a/gui/plots.py b/gui/plots.py new file mode 100644 index 000000000..e102b4dde --- /dev/null +++ b/gui/plots.py @@ -0,0 +1,84 @@ +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt + +plt.ioff() +plt.rcParams["font.family"] = [ + "IBM Plex Mono", + # Fallback fonts: + "DejaVu Sans Mono", + "Courier New", + "monospace", +] + +from .data import generate_data + + +def replot_pareto(df: pd.DataFrame, maxsize: int): + fig, ax = plt.subplots(figsize=(6, 6), dpi=100) + + if len(df) == 0 or "Equation" not in df.columns: + return fig + + # Plotting the data + ax.loglog( + df["Complexity"], + df["Loss"], + marker="o", + linestyle="-", + color="#333f48", + linewidth=1.5, + markersize=6, + ) + + # Set the axis limits + ax.set_xlim(0.5, maxsize + 1) + ytop = 2 ** (np.ceil(np.log2(df["Loss"].max()))) + ybottom = 2 ** (np.floor(np.log2(df["Loss"].min() + 1e-20))) + ax.set_ylim(ybottom, ytop) + + ax.grid(True, which="both", ls="--", linewidth=0.5, color="gray", alpha=0.5) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + + # Range-frame the plot + for direction in ["bottom", "left"]: + ax.spines[direction].set_position(("outward", 10)) + + # Delete far ticks + ax.tick_params(axis="both", which="major", labelsize=10, direction="out", length=5) + ax.tick_params(axis="both", which="minor", labelsize=8, direction="out", length=3) + + ax.set_xlabel("Complexity") + ax.set_ylabel("Loss") + fig.tight_layout(pad=2) + + return fig + + +def replot(test_equation, num_points, noise_level, data_seed): + X, y = generate_data(test_equation, num_points, noise_level, data_seed) + x = X["x"] + + plt.rcParams["font.family"] = "IBM Plex Mono" + fig, ax = plt.subplots(figsize=(6, 6), dpi=100) + + ax.scatter(x, y, alpha=0.7, edgecolors="w", s=50) + + ax.grid(True, which="both", ls="--", linewidth=0.5, color="gray", alpha=0.5) + ax.spines["top"].set_visible(False) + ax.spines["right"].set_visible(False) + + # Range-frame the plot + for direction in ["bottom", "left"]: + ax.spines[direction].set_position(("outward", 10)) + + # Delete far ticks + ax.tick_params(axis="both", which="major", labelsize=10, direction="out", length=5) + ax.tick_params(axis="both", which="minor", labelsize=8, direction="out", length=3) + + ax.set_xlabel("x") + ax.set_ylabel("y") + fig.tight_layout(pad=2) + + return fig diff --git a/gui/processing.py b/gui/processing.py new file mode 100644 index 000000000..e244a80d2 --- /dev/null +++ b/gui/processing.py @@ -0,0 +1,150 @@ +import multiprocessing as mp +import os +import tempfile +import time +from pathlib import Path + +import numpy as np +import pandas as pd + +from .data import generate_data + +EMPTY_DF = lambda: pd.DataFrame( + { + "Equation": [], + "Loss": [], + "Complexity": [], + } +) + + +def process( + file_input, + force_run, + test_equation, + num_points, + noise_level, + data_seed, + niterations, + maxsize, + binary_operators, + unary_operators, + plot_update_delay, + parsimony, + populations, + population_size, + ncycles_per_iteration, + elementwise_loss, + adaptive_parsimony_scaling, + optimizer_algorithm, + optimizer_iterations, + batching, + batch_size, +): + """Load data, then spawn a process to run the greet function.""" + if file_input is not None: + # Look at some statistics of the file: + df = pd.read_csv(file_input) + if len(df) == 0: + return ( + EMPTY_DF(), + "The file is empty!", + ) + if len(df.columns) == 1: + return ( + EMPTY_DF(), + "The file has only one column!", + ) + if len(df) > 10_000 and not force_run: + return ( + EMPTY_DF(), + "You have uploaded a file with more than 10,000 rows. " + "This will take very long to run. " + "Please upload a subsample of the data, " + "or check the box 'Ignore Warnings'.", + ) + + col_to_fit = df.columns[-1] + y = np.array(df[col_to_fit]) + X = df.drop([col_to_fit], axis=1) + else: + X, y = generate_data(test_equation, num_points, noise_level, data_seed) + + with tempfile.TemporaryDirectory() as tmpdirname: + base = Path(tmpdirname) + equation_file = base / "hall_of_fame.csv" + equation_file_bkup = base / "hall_of_fame.csv.bkup" + process = mp.Process( + target=pysr_fit, + kwargs=dict( + X=X, + y=y, + niterations=niterations, + maxsize=maxsize, + binary_operators=binary_operators, + unary_operators=unary_operators, + equation_file=equation_file, + parsimony=parsimony, + populations=populations, + population_size=population_size, + ncycles_per_iteration=ncycles_per_iteration, + elementwise_loss=elementwise_loss, + adaptive_parsimony_scaling=adaptive_parsimony_scaling, + optimizer_algorithm=optimizer_algorithm, + optimizer_iterations=optimizer_iterations, + batching=batching, + batch_size=batch_size, + ), + ) + process.start() + last_yield_time = None + while process.is_alive(): + if equation_file_bkup.exists(): + try: + # First, copy the file to a the copy file + equation_file_copy = base / "hall_of_fame_copy.csv" + os.system(f"cp {equation_file_bkup} {equation_file_copy}") + equations = pd.read_csv(equation_file_copy) + # Ensure it is pareto dominated, with more complex expressions + # having higher loss. Otherwise remove those rows. + # TODO: Not sure why this occurs; could be the result of a late copy? + equations.sort_values("Complexity", ascending=True, inplace=True) + equations.reset_index(inplace=True) + bad_idx = [] + min_loss = None + for i in equations.index: + if min_loss is None or equations.loc[i, "Loss"] < min_loss: + min_loss = float(equations.loc[i, "Loss"]) + else: + bad_idx.append(i) + equations.drop(index=bad_idx, inplace=True) + + while ( + last_yield_time is not None + and time.time() - last_yield_time < plot_update_delay + ): + time.sleep(0.1) + + yield equations[["Complexity", "Loss", "Equation"]] + + last_yield_time = time.time() + except pd.errors.EmptyDataError: + pass + + process.join() + + +def pysr_fit( + *, + X, + y, + **pysr_kwargs, +): + import pysr + + model = pysr.PySRRegressor( + progress=False, + timeout_in_seconds=1000, + **pysr_kwargs, + ) + model.fit(X, y) From 519fcb99b253c1db178da379bcef9f5e874e8608 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 1 Apr 2024 06:22:00 +0100 Subject: [PATCH 107/136] Move more parts to other files --- gui/app.py | 4 ++-- gui/data.py | 22 ++++++++++++++++++++++ gui/processing.py | 32 ++++++-------------------------- 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/gui/app.py b/gui/app.py index 93d09f68c..f3e3abd64 100644 --- a/gui/app.py +++ b/gui/app.py @@ -2,7 +2,7 @@ from .data import test_equations from .plots import replot, replot_pareto -from .processing import process +from .processing import processing def _data_layout(): @@ -196,7 +196,7 @@ def main(): blocks["run"] = gr.Button() blocks["run"].click( - process, + processing, inputs=[ blocks[k] for k in [ diff --git a/gui/data.py b/gui/data.py index 81ad16b52..7ddf4f6ab 100644 --- a/gui/data.py +++ b/gui/data.py @@ -20,3 +20,25 @@ def generate_data(s: str, num_points: int, noise_level: float, data_seed: int): noise = rstate.normal(0, noise_level, y.shape) y_noisy = y + noise return pd.DataFrame({"x": x}), y_noisy + + +def read_csv(file_input: str, force_run: bool): + # Look at some statistics of the file: + df = pd.read_csv(file_input) + if len(df) == 0: + raise ValueError("The file is empty!") + if len(df.columns) == 1: + raise ValueError("The file has only one column!") + if len(df) > 10_000 and not force_run: + raise ValueError( + "You have uploaded a file with more than 10,000 rows. " + "This will take very long to run. " + "Please upload a subsample of the data, " + "or check the box 'Ignore Warnings'.", + ) + + col_to_fit = df.columns[-1] + y = np.array(df[col_to_fit]) + X = df.drop([col_to_fit], axis=1) + + return X, y diff --git a/gui/processing.py b/gui/processing.py index e244a80d2..a846f8f5b 100644 --- a/gui/processing.py +++ b/gui/processing.py @@ -7,7 +7,7 @@ import numpy as np import pandas as pd -from .data import generate_data +from .data import generate_data, read_csv EMPTY_DF = lambda: pd.DataFrame( { @@ -18,7 +18,7 @@ ) -def process( +def processing( file_input, force_run, test_equation, @@ -43,30 +43,10 @@ def process( ): """Load data, then spawn a process to run the greet function.""" if file_input is not None: - # Look at some statistics of the file: - df = pd.read_csv(file_input) - if len(df) == 0: - return ( - EMPTY_DF(), - "The file is empty!", - ) - if len(df.columns) == 1: - return ( - EMPTY_DF(), - "The file has only one column!", - ) - if len(df) > 10_000 and not force_run: - return ( - EMPTY_DF(), - "You have uploaded a file with more than 10,000 rows. " - "This will take very long to run. " - "Please upload a subsample of the data, " - "or check the box 'Ignore Warnings'.", - ) - - col_to_fit = df.columns[-1] - y = np.array(df[col_to_fit]) - X = df.drop([col_to_fit], axis=1) + try: + X, y = read_csv(file_input, force_run) + except ValueError as e: + return (EMPTY_DF(), str(e)) else: X, y = generate_data(test_equation, num_points, noise_level, data_seed) From 86e9755485e0af7911a90f6bc3b2f07dc061f6ed Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 1 Apr 2024 06:41:36 +0100 Subject: [PATCH 108/136] Integrate gui in main codebase --- pysr/_cli/main.py | 6 ++++ {gui => pysr/gui}/app.py | 18 ++++++++---- {gui => pysr/gui}/data.py | 0 {gui => pysr/gui}/plots.py | 31 +++++++++++++------- {gui => pysr/gui}/processing.py | 0 gui/requirements.txt => requirements-gui.txt | 0 6 files changed, 39 insertions(+), 16 deletions(-) rename {gui => pysr/gui}/app.py (98%) rename {gui => pysr/gui}/data.py (100%) rename {gui => pysr/gui}/plots.py (84%) rename {gui => pysr/gui}/processing.py (100%) rename gui/requirements.txt => requirements-gui.txt (100%) diff --git a/pysr/_cli/main.py b/pysr/_cli/main.py index b27b7cedc..c9802ed05 100644 --- a/pysr/_cli/main.py +++ b/pysr/_cli/main.py @@ -5,6 +5,7 @@ import click +from ..gui import main as gui_main from ..test import ( get_runtests_cli, runtests, @@ -48,6 +49,11 @@ def _install(julia_project, quiet, precompile): ) +@pysr.command("gui", help="Start a Gradio-based GUI.") +def _gui(): + gui_main() + + TEST_OPTIONS = {"main", "jax", "torch", "cli", "dev", "startup"} diff --git a/gui/app.py b/pysr/gui/app.py similarity index 98% rename from gui/app.py rename to pysr/gui/app.py index f3e3abd64..640d54591 100644 --- a/gui/app.py +++ b/pysr/gui/app.py @@ -1,11 +1,17 @@ -import gradio as gr - from .data import test_equations from .plots import replot, replot_pareto from .processing import processing +def get_gr(): + import gradio as gr + + return gr + + def _data_layout(): + gr = get_gr() + with gr.Tab("Example Data"): # Plot of the example data: with gr.Row(): @@ -43,6 +49,8 @@ def _data_layout(): def _settings_layout(): + gr = get_gr() + with gr.Tab("Basic Settings"): binary_operators = gr.CheckboxGroup( choices=["+", "-", "*", "/", "^", "max", "min", "mod", "cond"], @@ -171,6 +179,8 @@ def _settings_layout(): def main(): + gr = get_gr() + blocks = {} with gr.Blocks() as demo: with gr.Row(): @@ -245,7 +255,3 @@ def main(): demo.load(replot, eqn_components, blocks["example_plot"]) demo.launch(debug=True) - - -if __name__ == "__main__": - main() diff --git a/gui/data.py b/pysr/gui/data.py similarity index 100% rename from gui/data.py rename to pysr/gui/data.py diff --git a/gui/plots.py b/pysr/gui/plots.py similarity index 84% rename from gui/plots.py rename to pysr/gui/plots.py index e102b4dde..ccbd5fd48 100644 --- a/gui/plots.py +++ b/pysr/gui/plots.py @@ -1,20 +1,30 @@ import numpy as np import pandas as pd -from matplotlib import pyplot as plt - -plt.ioff() -plt.rcParams["font.family"] = [ - "IBM Plex Mono", - # Fallback fonts: - "DejaVu Sans Mono", - "Courier New", - "monospace", -] from .data import generate_data +FIRST_LOAD = True + + +def get_plt(): + from matplotlib import pyplot as plt + + if FIRST_LOAD: + plt.ioff() + plt.rcParams["font.family"] = [ + "IBM Plex Mono", + # Fallback fonts: + "DejaVu Sans Mono", + "Courier New", + "monospace", + ] + + FIRST_LOAD = False + return plt + def replot_pareto(df: pd.DataFrame, maxsize: int): + plt = get_plt() fig, ax = plt.subplots(figsize=(6, 6), dpi=100) if len(df) == 0 or "Equation" not in df.columns: @@ -60,6 +70,7 @@ def replot(test_equation, num_points, noise_level, data_seed): X, y = generate_data(test_equation, num_points, noise_level, data_seed) x = X["x"] + plt = get_plt() plt.rcParams["font.family"] = "IBM Plex Mono" fig, ax = plt.subplots(figsize=(6, 6), dpi=100) diff --git a/gui/processing.py b/pysr/gui/processing.py similarity index 100% rename from gui/processing.py rename to pysr/gui/processing.py diff --git a/gui/requirements.txt b/requirements-gui.txt similarity index 100% rename from gui/requirements.txt rename to requirements-gui.txt From 985f8faa4da31e4d71f949809530fdc361f1c882 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 1 Apr 2024 06:42:49 +0100 Subject: [PATCH 109/136] Revert "Integrate gui in main codebase" This reverts commit 86e9755485e0af7911a90f6bc3b2f07dc061f6ed. --- {pysr/gui => gui}/app.py | 18 ++++-------- {pysr/gui => gui}/data.py | 0 {pysr/gui => gui}/plots.py | 31 +++++++------------- {pysr/gui => gui}/processing.py | 0 requirements-gui.txt => gui/requirements.txt | 0 pysr/_cli/main.py | 6 ---- 6 files changed, 16 insertions(+), 39 deletions(-) rename {pysr/gui => gui}/app.py (98%) rename {pysr/gui => gui}/data.py (100%) rename {pysr/gui => gui}/plots.py (84%) rename {pysr/gui => gui}/processing.py (100%) rename requirements-gui.txt => gui/requirements.txt (100%) diff --git a/pysr/gui/app.py b/gui/app.py similarity index 98% rename from pysr/gui/app.py rename to gui/app.py index 640d54591..f3e3abd64 100644 --- a/pysr/gui/app.py +++ b/gui/app.py @@ -1,17 +1,11 @@ +import gradio as gr + from .data import test_equations from .plots import replot, replot_pareto from .processing import processing -def get_gr(): - import gradio as gr - - return gr - - def _data_layout(): - gr = get_gr() - with gr.Tab("Example Data"): # Plot of the example data: with gr.Row(): @@ -49,8 +43,6 @@ def _data_layout(): def _settings_layout(): - gr = get_gr() - with gr.Tab("Basic Settings"): binary_operators = gr.CheckboxGroup( choices=["+", "-", "*", "/", "^", "max", "min", "mod", "cond"], @@ -179,8 +171,6 @@ def _settings_layout(): def main(): - gr = get_gr() - blocks = {} with gr.Blocks() as demo: with gr.Row(): @@ -255,3 +245,7 @@ def main(): demo.load(replot, eqn_components, blocks["example_plot"]) demo.launch(debug=True) + + +if __name__ == "__main__": + main() diff --git a/pysr/gui/data.py b/gui/data.py similarity index 100% rename from pysr/gui/data.py rename to gui/data.py diff --git a/pysr/gui/plots.py b/gui/plots.py similarity index 84% rename from pysr/gui/plots.py rename to gui/plots.py index ccbd5fd48..e102b4dde 100644 --- a/pysr/gui/plots.py +++ b/gui/plots.py @@ -1,30 +1,20 @@ import numpy as np import pandas as pd +from matplotlib import pyplot as plt -from .data import generate_data - -FIRST_LOAD = True - +plt.ioff() +plt.rcParams["font.family"] = [ + "IBM Plex Mono", + # Fallback fonts: + "DejaVu Sans Mono", + "Courier New", + "monospace", +] -def get_plt(): - from matplotlib import pyplot as plt - - if FIRST_LOAD: - plt.ioff() - plt.rcParams["font.family"] = [ - "IBM Plex Mono", - # Fallback fonts: - "DejaVu Sans Mono", - "Courier New", - "monospace", - ] - - FIRST_LOAD = False - return plt +from .data import generate_data def replot_pareto(df: pd.DataFrame, maxsize: int): - plt = get_plt() fig, ax = plt.subplots(figsize=(6, 6), dpi=100) if len(df) == 0 or "Equation" not in df.columns: @@ -70,7 +60,6 @@ def replot(test_equation, num_points, noise_level, data_seed): X, y = generate_data(test_equation, num_points, noise_level, data_seed) x = X["x"] - plt = get_plt() plt.rcParams["font.family"] = "IBM Plex Mono" fig, ax = plt.subplots(figsize=(6, 6), dpi=100) diff --git a/pysr/gui/processing.py b/gui/processing.py similarity index 100% rename from pysr/gui/processing.py rename to gui/processing.py diff --git a/requirements-gui.txt b/gui/requirements.txt similarity index 100% rename from requirements-gui.txt rename to gui/requirements.txt diff --git a/pysr/_cli/main.py b/pysr/_cli/main.py index c9802ed05..b27b7cedc 100644 --- a/pysr/_cli/main.py +++ b/pysr/_cli/main.py @@ -5,7 +5,6 @@ import click -from ..gui import main as gui_main from ..test import ( get_runtests_cli, runtests, @@ -49,11 +48,6 @@ def _install(julia_project, quiet, precompile): ) -@pysr.command("gui", help="Start a Gradio-based GUI.") -def _gui(): - gui_main() - - TEST_OPTIONS = {"main", "jax", "torch", "cli", "dev", "startup"} From 967d63f5b8607aeb41fe97199a78db03a95c98a3 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 1 Apr 2024 06:47:19 +0100 Subject: [PATCH 110/136] Disable refresh animation in plots --- gui/app.py | 12 +++++++----- gui/plots.py | 2 +- gui/processing.py | 3 +-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/gui/app.py b/gui/app.py index f3e3abd64..cfb7fd3f1 100644 --- a/gui/app.py +++ b/gui/app.py @@ -1,8 +1,7 @@ import gradio as gr - -from .data import test_equations -from .plots import replot, replot_pareto -from .processing import processing +from data import test_equations +from plots import replot, replot_pareto +from processing import processing def _data_layout(): @@ -234,13 +233,16 @@ def main(): blocks["data_seed"], ] for eqn_component in eqn_components: - eqn_component.change(replot, eqn_components, blocks["example_plot"]) + eqn_component.change( + replot, eqn_components, blocks["example_plot"], show_progress=False + ) # Update plot when dataframe is updated: blocks["df"].change( replot_pareto, inputs=[blocks["df"], blocks["maxsize"]], outputs=[blocks["pareto"]], + show_progress=False, ) demo.load(replot, eqn_components, blocks["example_plot"]) diff --git a/gui/plots.py b/gui/plots.py index e102b4dde..77276dd4b 100644 --- a/gui/plots.py +++ b/gui/plots.py @@ -11,7 +11,7 @@ "monospace", ] -from .data import generate_data +from data import generate_data def replot_pareto(df: pd.DataFrame, maxsize: int): diff --git a/gui/processing.py b/gui/processing.py index a846f8f5b..6fa3e3fd1 100644 --- a/gui/processing.py +++ b/gui/processing.py @@ -6,8 +6,7 @@ import numpy as np import pandas as pd - -from .data import generate_data, read_csv +from data import generate_data, read_csv EMPTY_DF = lambda: pd.DataFrame( { From 0cd448aaf0843dee599bbd74a88049c96e2bd600 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 1 Apr 2024 07:11:13 +0100 Subject: [PATCH 111/136] Attempt to make PySR process a daemon --- gui/processing.py | 107 +++++++++++++++++++++++++++++----------------- 1 file changed, 67 insertions(+), 40 deletions(-) diff --git a/gui/processing.py b/gui/processing.py index 6fa3e3fd1..bc014cfc4 100644 --- a/gui/processing.py +++ b/gui/processing.py @@ -4,7 +4,6 @@ import time from pathlib import Path -import numpy as np import pandas as pd from data import generate_data, read_csv @@ -17,6 +16,37 @@ ) +def pysr_fit(queue: mp.Queue, out_queue: mp.Queue): + import pysr + + while True: + # Get the arguments from the queue, if available + args = queue.get() + if args is None: + break + X = args["X"] + y = args["y"] + kwargs = args["kwargs"] + model = pysr.PySRRegressor( + progress=False, + timeout_in_seconds=1000, + **kwargs, + ) + model.fit(X, y) + out_queue.put(None) + + +class PySRProcess: + def __init__(self): + self.queue = mp.Queue() + self.out_queue = mp.Queue() + self.process = mp.Process(target=pysr_fit, args=(self.queue, self.out_queue)) + self.process.start() + + +PERSISTENT_WRITER = None + + def processing( file_input, force_run, @@ -41,6 +71,11 @@ def processing( batch_size, ): """Load data, then spawn a process to run the greet function.""" + global PERSISTENT_WRITER + if PERSISTENT_WRITER is None: + print("Starting PySR process") + PERSISTENT_WRITER = PySRProcess() + if file_input is not None: try: X, y = read_csv(file_input, force_run) @@ -53,31 +88,41 @@ def processing( base = Path(tmpdirname) equation_file = base / "hall_of_fame.csv" equation_file_bkup = base / "hall_of_fame.csv.bkup" - process = mp.Process( - target=pysr_fit, - kwargs=dict( + # Check if queue is empty, if not, kill the process + # and start a new one + if not PERSISTENT_WRITER.queue.empty(): + print("Restarting PySR process") + if PERSISTENT_WRITER.process.is_alive(): + PERSISTENT_WRITER.process.terminate() + PERSISTENT_WRITER.process.join() + + PERSISTENT_WRITER = PySRProcess() + # Write these to queue instead: + PERSISTENT_WRITER.queue.put( + dict( X=X, y=y, - niterations=niterations, - maxsize=maxsize, - binary_operators=binary_operators, - unary_operators=unary_operators, - equation_file=equation_file, - parsimony=parsimony, - populations=populations, - population_size=population_size, - ncycles_per_iteration=ncycles_per_iteration, - elementwise_loss=elementwise_loss, - adaptive_parsimony_scaling=adaptive_parsimony_scaling, - optimizer_algorithm=optimizer_algorithm, - optimizer_iterations=optimizer_iterations, - batching=batching, - batch_size=batch_size, - ), + kwargs=dict( + niterations=niterations, + maxsize=maxsize, + binary_operators=binary_operators, + unary_operators=unary_operators, + equation_file=equation_file, + parsimony=parsimony, + populations=populations, + population_size=population_size, + ncycles_per_iteration=ncycles_per_iteration, + elementwise_loss=elementwise_loss, + adaptive_parsimony_scaling=adaptive_parsimony_scaling, + optimizer_algorithm=optimizer_algorithm, + optimizer_iterations=optimizer_iterations, + batching=batching, + batch_size=batch_size, + ), + ) ) - process.start() last_yield_time = None - while process.is_alive(): + while PERSISTENT_WRITER.out_queue.empty(): if equation_file_bkup.exists(): try: # First, copy the file to a the copy file @@ -109,21 +154,3 @@ def processing( last_yield_time = time.time() except pd.errors.EmptyDataError: pass - - process.join() - - -def pysr_fit( - *, - X, - y, - **pysr_kwargs, -): - import pysr - - model = pysr.PySRRegressor( - progress=False, - timeout_in_seconds=1000, - **pysr_kwargs, - ) - model.fit(X, y) From c1a4fecf826b2bfa0e6e5b98b5b1eab94f31a68f Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Tue, 2 Apr 2024 03:28:55 +0100 Subject: [PATCH 112/136] Better names for plotting functions --- gui/app.py | 13 ++++++++----- gui/plots.py | 4 ++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/gui/app.py b/gui/app.py index cfb7fd3f1..e4e33f615 100644 --- a/gui/app.py +++ b/gui/app.py @@ -1,6 +1,6 @@ import gradio as gr from data import test_equations -from plots import replot, replot_pareto +from plots import plot_example_data, plot_pareto_curve from processing import processing @@ -225,7 +225,7 @@ def main(): outputs=blocks["df"], ) - # Any update to the equation choice will trigger a replot: + # Any update to the equation choice will trigger a plot_example_data: eqn_components = [ blocks["test_equation"], blocks["num_points"], @@ -234,17 +234,20 @@ def main(): ] for eqn_component in eqn_components: eqn_component.change( - replot, eqn_components, blocks["example_plot"], show_progress=False + plot_example_data, + eqn_components, + blocks["example_plot"], + show_progress=False, ) # Update plot when dataframe is updated: blocks["df"].change( - replot_pareto, + plot_pareto_curve, inputs=[blocks["df"], blocks["maxsize"]], outputs=[blocks["pareto"]], show_progress=False, ) - demo.load(replot, eqn_components, blocks["example_plot"]) + demo.load(plot_example_data, eqn_components, blocks["example_plot"]) demo.launch(debug=True) diff --git a/gui/plots.py b/gui/plots.py index 77276dd4b..ff700c57d 100644 --- a/gui/plots.py +++ b/gui/plots.py @@ -14,7 +14,7 @@ from data import generate_data -def replot_pareto(df: pd.DataFrame, maxsize: int): +def plot_pareto_curve(df: pd.DataFrame, maxsize: int): fig, ax = plt.subplots(figsize=(6, 6), dpi=100) if len(df) == 0 or "Equation" not in df.columns: @@ -56,7 +56,7 @@ def replot_pareto(df: pd.DataFrame, maxsize: int): return fig -def replot(test_equation, num_points, noise_level, data_seed): +def plot_example_data(test_equation, num_points, noise_level, data_seed): X, y = generate_data(test_equation, num_points, noise_level, data_seed) x = X["x"] From c52d151b9bc5460e35780c95a73cbf17dcbfeaf1 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Tue, 2 Apr 2024 03:31:57 +0100 Subject: [PATCH 113/136] Standardize figure settings --- gui/plots.py | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/gui/plots.py b/gui/plots.py index ff700c57d..4130a1a41 100644 --- a/gui/plots.py +++ b/gui/plots.py @@ -20,7 +20,6 @@ def plot_pareto_curve(df: pd.DataFrame, maxsize: int): if len(df) == 0 or "Equation" not in df.columns: return fig - # Plotting the data ax.loglog( df["Complexity"], df["Loss"], @@ -31,23 +30,12 @@ def plot_pareto_curve(df: pd.DataFrame, maxsize: int): markersize=6, ) - # Set the axis limits ax.set_xlim(0.5, maxsize + 1) ytop = 2 ** (np.ceil(np.log2(df["Loss"].max()))) ybottom = 2 ** (np.floor(np.log2(df["Loss"].min() + 1e-20))) ax.set_ylim(ybottom, ytop) - ax.grid(True, which="both", ls="--", linewidth=0.5, color="gray", alpha=0.5) - ax.spines["top"].set_visible(False) - ax.spines["right"].set_visible(False) - - # Range-frame the plot - for direction in ["bottom", "left"]: - ax.spines[direction].set_position(("outward", 10)) - - # Delete far ticks - ax.tick_params(axis="both", which="major", labelsize=10, direction="out", length=5) - ax.tick_params(axis="both", which="minor", labelsize=8, direction="out", length=3) + stylize_axis(ax) ax.set_xlabel("Complexity") ax.set_ylabel("Loss") @@ -57,14 +45,23 @@ def plot_pareto_curve(df: pd.DataFrame, maxsize: int): def plot_example_data(test_equation, num_points, noise_level, data_seed): + fig, ax = plt.subplots(figsize=(6, 6), dpi=100) + X, y = generate_data(test_equation, num_points, noise_level, data_seed) x = X["x"] - plt.rcParams["font.family"] = "IBM Plex Mono" - fig, ax = plt.subplots(figsize=(6, 6), dpi=100) - ax.scatter(x, y, alpha=0.7, edgecolors="w", s=50) + stylize_axis(ax) + + ax.set_xlabel("x") + ax.set_ylabel("y") + fig.tight_layout(pad=2) + + return fig + + +def stylize_axis(ax): ax.grid(True, which="both", ls="--", linewidth=0.5, color="gray", alpha=0.5) ax.spines["top"].set_visible(False) ax.spines["right"].set_visible(False) @@ -76,9 +73,3 @@ def plot_example_data(test_equation, num_points, noise_level, data_seed): # Delete far ticks ax.tick_params(axis="both", which="major", labelsize=10, direction="out", length=5) ax.tick_params(axis="both", which="minor", labelsize=8, direction="out", length=3) - - ax.set_xlabel("x") - ax.set_ylabel("y") - fig.tight_layout(pad=2) - - return fig From 77013109562ba4c447ffb75d308827a2ee78c127 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Tue, 2 Apr 2024 03:58:30 +0100 Subject: [PATCH 114/136] Allow control of file loading verbosity --- pysr/sr.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pysr/sr.py b/pysr/sr.py index 1b24a9c94..fd371b303 100644 --- a/pysr/sr.py +++ b/pysr/sr.py @@ -903,6 +903,7 @@ def from_file( feature_names_in=None, selection_mask=None, nout=1, + verbosity=1, **pysr_kwargs, ): """ @@ -932,6 +933,8 @@ def from_file( Number of outputs of the model. Not needed if loading from a pickle file. Default is `1`. + verbosity : int + What verbosity level to use. 0 means minimal print statements. **pysr_kwargs : dict Any other keyword arguments to initialize the PySRRegressor object. These will overwrite those stored in the pickle file. @@ -946,9 +949,11 @@ def from_file( pkl_filename = _csv_filename_to_pkl_filename(equation_file) # Try to load model from .pkl - print(f"Checking if {pkl_filename} exists...") + if verbosity > 0: + print(f"Checking if {pkl_filename} exists...") if os.path.exists(pkl_filename): - print(f"Loading model from {pkl_filename}") + if verbosity > 0: + print(f"Loading model from {pkl_filename}") assert binary_operators is None assert unary_operators is None assert n_features_in is None @@ -968,10 +973,11 @@ def from_file( return model # Else, we re-create it. - print( - f"{pkl_filename} does not exist, " - "so we must create the model from scratch." - ) + if verbosity > 0: + print( + f"{pkl_filename} does not exist, " + "so we must create the model from scratch." + ) assert binary_operators is not None or unary_operators is not None assert n_features_in is not None From fd28328f14f65375f14c051a2fc9bb6429606344 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Tue, 2 Apr 2024 04:01:08 +0100 Subject: [PATCH 115/136] wip on predictions from equations --- gui/processing.py | 96 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 68 insertions(+), 28 deletions(-) diff --git a/gui/processing.py b/gui/processing.py index bc014cfc4..2098bef9d 100644 --- a/gui/processing.py +++ b/gui/processing.py @@ -36,6 +36,42 @@ def pysr_fit(queue: mp.Queue, out_queue: mp.Queue): out_queue.put(None) +def pysr_predict(queue: mp.Queue, out_queue: mp.Queue): + import numpy as np + + import pysr + + while True: + args = queue.get() + + if args is None: + break + + X = args["X"] + equation_file = str(args["equation_file"]) + complexity = args["complexity"] + + equation_file_pkl = equation_file.replace(".csv", ".pkl") + equation_file_bkup = equation_file + ".bkup" + + equation_file_copy = equation_file.replace(".csv", "_copy.csv") + equation_file_pkl_copy = equation_file.replace(".csv", "_copy.pkl") + + # TODO: See if there is way to get lock on file + os.system(f"cp {equation_file_bkup} {equation_file_copy}") + os.system(f"cp {equation_file_pkl} {equation_file_pkl_copy}") + + try: + model = pysr.PySRRegressor.from_file(equation_file_pkl_copy, verbosity=0) + except pd.errors.EmptyDataError: + continue + + index = np.abs(model.equations_.complexity - complexity).argmin + ypred = model.predict(X, index) + + out_queue.put(ypred) + + class PySRProcess: def __init__(self): self.queue = mp.Queue() @@ -44,6 +80,16 @@ def __init__(self): self.process.start() +class PySRReaderProcess: + def __init__(self): + self.queue = mp.Queue() + self.out_queue = mp.Queue() + self.process = mp.Process( + target=pysr_predict, args=(self.queue, self.out_queue) + ) + self.process.start() + + PERSISTENT_WRITER = None @@ -121,36 +167,30 @@ def processing( ), ) ) - last_yield_time = None while PERSISTENT_WRITER.out_queue.empty(): if equation_file_bkup.exists(): + # First, copy the file to a the copy file + equation_file_copy = base / "hall_of_fame_copy.csv" + os.system(f"cp {equation_file_bkup} {equation_file_copy}") try: - # First, copy the file to a the copy file - equation_file_copy = base / "hall_of_fame_copy.csv" - os.system(f"cp {equation_file_bkup} {equation_file_copy}") equations = pd.read_csv(equation_file_copy) - # Ensure it is pareto dominated, with more complex expressions - # having higher loss. Otherwise remove those rows. - # TODO: Not sure why this occurs; could be the result of a late copy? - equations.sort_values("Complexity", ascending=True, inplace=True) - equations.reset_index(inplace=True) - bad_idx = [] - min_loss = None - for i in equations.index: - if min_loss is None or equations.loc[i, "Loss"] < min_loss: - min_loss = float(equations.loc[i, "Loss"]) - else: - bad_idx.append(i) - equations.drop(index=bad_idx, inplace=True) - - while ( - last_yield_time is not None - and time.time() - last_yield_time < plot_update_delay - ): - time.sleep(0.1) - - yield equations[["Complexity", "Loss", "Equation"]] - - last_yield_time = time.time() except pd.errors.EmptyDataError: - pass + continue + + # Ensure it is pareto dominated, with more complex expressions + # having higher loss. Otherwise remove those rows. + # TODO: Not sure why this occurs; could be the result of a late copy? + equations.sort_values("Complexity", ascending=True, inplace=True) + equations.reset_index(inplace=True) + bad_idx = [] + min_loss = None + for i in equations.index: + if min_loss is None or equations.loc[i, "Loss"] < min_loss: + min_loss = float(equations.loc[i, "Loss"]) + else: + bad_idx.append(i) + equations.drop(index=bad_idx, inplace=True) + + yield equations[["Complexity", "Loss", "Equation"]] + + time.sleep(0.1) From a2492c36c5c3fe1eef3ed1736b184e488a43a3e3 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Tue, 2 Apr 2024 03:02:50 -0400 Subject: [PATCH 116/136] Better support for live reading and predictions --- gui/processing.py | 162 +++++++++++++++++++++++----------------------- 1 file changed, 80 insertions(+), 82 deletions(-) diff --git a/gui/processing.py b/gui/processing.py index 2098bef9d..13f7151ac 100644 --- a/gui/processing.py +++ b/gui/processing.py @@ -3,6 +3,7 @@ import tempfile import time from pathlib import Path +from typing import Callable import pandas as pd from data import generate_data, read_csv @@ -37,8 +38,6 @@ def pysr_fit(queue: mp.Queue, out_queue: mp.Queue): def pysr_predict(queue: mp.Queue, out_queue: mp.Queue): - import numpy as np - import pysr while True: @@ -49,7 +48,7 @@ def pysr_predict(queue: mp.Queue, out_queue: mp.Queue): X = args["X"] equation_file = str(args["equation_file"]) - complexity = args["complexity"] + index = args["index"] equation_file_pkl = equation_file.replace(".csv", ".pkl") equation_file_bkup = equation_file + ".bkup" @@ -66,31 +65,29 @@ def pysr_predict(queue: mp.Queue, out_queue: mp.Queue): except pd.errors.EmptyDataError: continue - index = np.abs(model.equations_.complexity - complexity).argmin ypred = model.predict(X, index) - out_queue.put(ypred) + # Rename the columns to uppercase + equations = model.equations_[["complexity", "loss", "equation"]].copy() + # Remove any row that has worse loss than previous row: + equations = equations[equations["loss"].cummin() == equations["loss"]] + # TODO: Why is this needed? Are rows not being removed? -class PySRProcess: - def __init__(self): - self.queue = mp.Queue() - self.out_queue = mp.Queue() - self.process = mp.Process(target=pysr_fit, args=(self.queue, self.out_queue)) - self.process.start() + equations.columns = ["Complexity", "Loss", "Equation"] + out_queue.put(dict(ypred=ypred, equations=equations)) -class PySRReaderProcess: - def __init__(self): - self.queue = mp.Queue() - self.out_queue = mp.Queue() - self.process = mp.Process( - target=pysr_predict, args=(self.queue, self.out_queue) - ) +class ProcessWrapper: + def __init__(self, target: Callable[[mp.Queue, mp.Queue], None]): + self.queue = mp.Queue(maxsize=1) + self.out_queue = mp.Queue(maxsize=1) + self.process = mp.Process(target=target, args=(self.queue, self.out_queue)) self.process.start() PERSISTENT_WRITER = None +PERSISTENT_READER = None def processing( @@ -118,9 +115,15 @@ def processing( ): """Load data, then spawn a process to run the greet function.""" global PERSISTENT_WRITER + global PERSISTENT_READER + if PERSISTENT_WRITER is None: - print("Starting PySR process") - PERSISTENT_WRITER = PySRProcess() + print("Starting PySR fit process") + PERSISTENT_WRITER = ProcessWrapper(pysr_fit) + + if PERSISTENT_READER is None: + print("Starting PySR predict process") + PERSISTENT_READER = ProcessWrapper(pysr_predict) if file_input is not None: try: @@ -130,67 +133,62 @@ def processing( else: X, y = generate_data(test_equation, num_points, noise_level, data_seed) - with tempfile.TemporaryDirectory() as tmpdirname: - base = Path(tmpdirname) - equation_file = base / "hall_of_fame.csv" - equation_file_bkup = base / "hall_of_fame.csv.bkup" - # Check if queue is empty, if not, kill the process - # and start a new one - if not PERSISTENT_WRITER.queue.empty(): - print("Restarting PySR process") - if PERSISTENT_WRITER.process.is_alive(): - PERSISTENT_WRITER.process.terminate() - PERSISTENT_WRITER.process.join() - - PERSISTENT_WRITER = PySRProcess() - # Write these to queue instead: - PERSISTENT_WRITER.queue.put( - dict( - X=X, - y=y, - kwargs=dict( - niterations=niterations, - maxsize=maxsize, - binary_operators=binary_operators, - unary_operators=unary_operators, + tmpdirname = tempfile.mkdtemp() + base = Path(tmpdirname) + equation_file = base / "hall_of_fame.csv" + # Check if queue is empty, if not, kill the process + # and start a new one + if not PERSISTENT_WRITER.queue.empty(): + print("Restarting PySR fit process") + if PERSISTENT_WRITER.process.is_alive(): + PERSISTENT_WRITER.process.terminate() + PERSISTENT_WRITER.process.join() + + PERSISTENT_WRITER = ProcessWrapper(pysr_fit) + + if not PERSISTENT_READER.queue.empty(): + print("Restarting PySR predict process") + if PERSISTENT_READER.process.is_alive(): + PERSISTENT_READER.process.terminate() + PERSISTENT_READER.process.join() + + PERSISTENT_READER = ProcessWrapper(pysr_predict) + + PERSISTENT_WRITER.queue.put( + dict( + X=X, + y=y, + kwargs=dict( + niterations=niterations, + maxsize=maxsize, + binary_operators=binary_operators, + unary_operators=unary_operators, + equation_file=equation_file, + parsimony=parsimony, + populations=populations, + population_size=population_size, + ncycles_per_iteration=ncycles_per_iteration, + elementwise_loss=elementwise_loss, + adaptive_parsimony_scaling=adaptive_parsimony_scaling, + optimizer_algorithm=optimizer_algorithm, + optimizer_iterations=optimizer_iterations, + batching=batching, + batch_size=batch_size, + ), + ) + ) + while PERSISTENT_WRITER.out_queue.empty(): + if equation_file.exists(): + # First, copy the file to a the copy file + PERSISTENT_READER.queue.put( + dict( + X=X, equation_file=equation_file, - parsimony=parsimony, - populations=populations, - population_size=population_size, - ncycles_per_iteration=ncycles_per_iteration, - elementwise_loss=elementwise_loss, - adaptive_parsimony_scaling=adaptive_parsimony_scaling, - optimizer_algorithm=optimizer_algorithm, - optimizer_iterations=optimizer_iterations, - batching=batching, - batch_size=batch_size, - ), + index=-1, + ) ) - ) - while PERSISTENT_WRITER.out_queue.empty(): - if equation_file_bkup.exists(): - # First, copy the file to a the copy file - equation_file_copy = base / "hall_of_fame_copy.csv" - os.system(f"cp {equation_file_bkup} {equation_file_copy}") - try: - equations = pd.read_csv(equation_file_copy) - except pd.errors.EmptyDataError: - continue - - # Ensure it is pareto dominated, with more complex expressions - # having higher loss. Otherwise remove those rows. - # TODO: Not sure why this occurs; could be the result of a late copy? - equations.sort_values("Complexity", ascending=True, inplace=True) - equations.reset_index(inplace=True) - bad_idx = [] - min_loss = None - for i in equations.index: - if min_loss is None or equations.loc[i, "Loss"] < min_loss: - min_loss = float(equations.loc[i, "Loss"]) - else: - bad_idx.append(i) - equations.drop(index=bad_idx, inplace=True) - - yield equations[["Complexity", "Loss", "Equation"]] - - time.sleep(0.1) + out = PERSISTENT_READER.out_queue.get() + equations = out["equations"] + yield equations[["Complexity", "Loss", "Equation"]] + + time.sleep(0.1) From 84b46accd731ae32326d88049b126c427645d12c Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Tue, 2 Apr 2024 03:11:13 -0400 Subject: [PATCH 117/136] Working prediction plot --- gui/app.py | 3 ++- gui/plots.py | 14 ++++++++++++++ gui/processing.py | 6 +++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/gui/app.py b/gui/app.py index e4e33f615..e0b6f3e9a 100644 --- a/gui/app.py +++ b/gui/app.py @@ -222,7 +222,8 @@ def main(): "batch_size", ] ], - outputs=blocks["df"], + outputs=[blocks["df"], blocks["predictions_plot"]], + show_progress=True, ) # Any update to the equation choice will trigger a plot_example_data: diff --git a/gui/plots.py b/gui/plots.py index 4130a1a41..bce093357 100644 --- a/gui/plots.py +++ b/gui/plots.py @@ -61,6 +61,20 @@ def plot_example_data(test_equation, num_points, noise_level, data_seed): return fig +def plot_predictions(y, ypred): + fig, ax = plt.subplots(figsize=(6, 6), dpi=100) + + ax.scatter(y, ypred, alpha=0.7, edgecolors="w", s=50) + + stylize_axis(ax) + + ax.set_xlabel("true") + ax.set_ylabel("prediction") + fig.tight_layout(pad=2) + + return fig + + def stylize_axis(ax): ax.grid(True, which="both", ls="--", linewidth=0.5, color="gray", alpha=0.5) ax.spines["top"].set_visible(False) diff --git a/gui/processing.py b/gui/processing.py index 13f7151ac..8e381d2c3 100644 --- a/gui/processing.py +++ b/gui/processing.py @@ -7,6 +7,7 @@ import pandas as pd from data import generate_data, read_csv +from plots import plot_predictions EMPTY_DF = lambda: pd.DataFrame( { @@ -188,7 +189,10 @@ def processing( ) ) out = PERSISTENT_READER.out_queue.get() + predictions = out["ypred"] equations = out["equations"] - yield equations[["Complexity", "Loss", "Equation"]] + yield equations[["Complexity", "Loss", "Equation"]], plot_predictions( + y, predictions + ) time.sleep(0.1) From ef7aada96b7cbe153691da2b58c1deded1d430cc Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 17 Apr 2024 18:11:13 +0100 Subject: [PATCH 118/136] style(gui): add default gui for config --- gui/app.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gui/app.py b/gui/app.py index e0b6f3e9a..17ba8b726 100644 --- a/gui/app.py +++ b/gui/app.py @@ -3,6 +3,8 @@ from plots import plot_example_data, plot_pareto_curve from processing import processing +GLOBAL_SETTINGS = dict(theme="default") + def _data_layout(): with gr.Tab("Example Data"): @@ -170,8 +172,9 @@ def _settings_layout(): def main(): + global GLOBAL_SETTINGS blocks = {} - with gr.Blocks() as demo: + with gr.Blocks(**GLOBAL_SETTINGS) as demo: with gr.Row(): with gr.Column(): with gr.Row(): From a206d6a6c11bd2d87706fffec1c5c59d81acd9f0 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 17 Apr 2024 20:13:44 +0100 Subject: [PATCH 119/136] refactor(gui): gradio to use object oriented wrapper --- gui/app.py | 298 +++++++++++++++++++++++++--------------------- gui/data.py | 2 +- gui/processing.py | 2 + 3 files changed, 165 insertions(+), 137 deletions(-) diff --git a/gui/app.py b/gui/app.py index 17ba8b726..a1abef75f 100644 --- a/gui/app.py +++ b/gui/app.py @@ -1,56 +1,78 @@ +from collections import OrderedDict + import gradio as gr -from data import test_equations +import numpy as np +from data import TEST_EQUATIONS +from gradio.components.base import Component from plots import plot_example_data, plot_pareto_curve from processing import processing -GLOBAL_SETTINGS = dict(theme="default") - -def _data_layout(): - with gr.Tab("Example Data"): - # Plot of the example data: +class ExampleData: + def __init__(self, demo: gr.Blocks) -> None: with gr.Row(): + # Plot of the example data: with gr.Column(): - example_plot = gr.Plot() + self.example_plot = gr.Plot() with gr.Column(): - test_equation = gr.Radio( - test_equations, value=test_equations[0], label="Test Equation" + self.test_equation = gr.Radio( + TEST_EQUATIONS, value=TEST_EQUATIONS[0], label="Test Equation" ) - num_points = gr.Slider( + self.num_points = gr.Slider( minimum=10, maximum=1000, value=200, label="Number of Data Points", step=1, ) - noise_level = gr.Slider( + self.noise_level = gr.Slider( minimum=0, maximum=1, value=0.05, label="Noise Level" ) - data_seed = gr.Number(value=0, label="Random Seed") - with gr.Tab("Upload Data"): - file_input = gr.File(label="Upload a CSV File") - gr.Markdown( + self.data_seed = gr.Number(value=0, label="Random Seed") + + # Set up plotting: + + eqn_components = [ + self.test_equation, + self.num_points, + self.noise_level, + self.data_seed, + ] + for eqn_component in eqn_components: + eqn_component.change( + plot_example_data, + eqn_components, + self.example_plot, + show_progress=False, + ) + + demo.load(plot_example_data, eqn_components, self.example_plot) + + +class UploadData: + def __init__(self) -> None: + self.file_input = gr.File(label="Upload a CSV File") + self.label = gr.Markdown( "The rightmost column of your CSV file will be used as the target variable." ) - return dict( - file_input=file_input, - test_equation=test_equation, - num_points=num_points, - noise_level=noise_level, - data_seed=data_seed, - example_plot=example_plot, - ) + +class Data: + def __init__(self, demo: gr.Blocks) -> None: + with gr.Tab("Example Data"): + self.example_data = ExampleData(demo) + with gr.Tab("Upload Data"): + self.upload_data = UploadData() -def _settings_layout(): - with gr.Tab("Basic Settings"): - binary_operators = gr.CheckboxGroup( +class BasicSettings: + def __init__(self) -> None: + self.binary_operators = gr.CheckboxGroup( choices=["+", "-", "*", "/", "^", "max", "min", "mod", "cond"], label="Binary Operators", value=["+", "-", "*", "/"], ) - unary_operators = gr.CheckboxGroup( + self.unary_operators = gr.CheckboxGroup( choices=[ "sin", "cos", @@ -69,58 +91,61 @@ def _settings_layout(): label="Unary Operators", value=["sin"], ) - niterations = gr.Slider( + self.niterations = gr.Slider( minimum=1, maximum=1000, value=40, label="Number of Iterations", step=1, ) - maxsize = gr.Slider( + self.maxsize = gr.Slider( minimum=7, maximum=100, value=20, label="Maximum Complexity", step=1, ) - parsimony = gr.Number( + self.parsimony = gr.Number( value=0.0032, label="Parsimony Coefficient", ) - with gr.Tab("Advanced Settings"): - populations = gr.Slider( + + +class AdvancedSettings: + def __init__(self) -> None: + self.populations = gr.Slider( minimum=2, maximum=100, value=15, label="Number of Populations", step=1, ) - population_size = gr.Slider( + self.population_size = gr.Slider( minimum=2, maximum=1000, value=33, label="Population Size", step=1, ) - ncycles_per_iteration = gr.Number( + self.ncycles_per_iteration = gr.Number( value=550, label="Cycles per Iteration", ) - elementwise_loss = gr.Radio( + self.elementwise_loss = gr.Radio( ["L2DistLoss()", "L1DistLoss()", "LogitDistLoss()", "HuberLoss()"], value="L2DistLoss()", label="Loss Function", ) - adaptive_parsimony_scaling = gr.Number( + self.adaptive_parsimony_scaling = gr.Number( value=20.0, label="Adaptive Parsimony Scaling", ) - optimizer_algorithm = gr.Radio( + self.optimizer_algorithm = gr.Radio( ["BFGS", "NelderMead"], value="BFGS", label="Optimizer Algorithm", ) - optimizer_iterations = gr.Slider( + self.optimizer_iterations = gr.Slider( minimum=1, maximum=100, value=8, @@ -128,11 +153,11 @@ def _settings_layout(): step=1, ) # Bool: - batching = gr.Checkbox( + self.batching = gr.Checkbox( value=False, label="Batching", ) - batch_size = gr.Slider( + self.batch_size = gr.Slider( minimum=2, maximum=1000, value=50, @@ -140,121 +165,122 @@ def _settings_layout(): step=1, ) - with gr.Tab("Gradio Settings"): - plot_update_delay = gr.Slider( + +class GradioSettings: + def __init__(self) -> None: + self.plot_update_delay = gr.Slider( minimum=1, maximum=100, value=3, label="Plot Update Delay", ) - force_run = gr.Checkbox( + self.force_run = gr.Checkbox( value=False, label="Ignore Warnings", ) - return dict( - binary_operators=binary_operators, - unary_operators=unary_operators, - niterations=niterations, - maxsize=maxsize, - force_run=force_run, - plot_update_delay=plot_update_delay, - parsimony=parsimony, - populations=populations, - population_size=population_size, - ncycles_per_iteration=ncycles_per_iteration, - elementwise_loss=elementwise_loss, - adaptive_parsimony_scaling=adaptive_parsimony_scaling, - optimizer_algorithm=optimizer_algorithm, - optimizer_iterations=optimizer_iterations, - batching=batching, - batch_size=batch_size, - ) - - -def main(): - global GLOBAL_SETTINGS - blocks = {} - with gr.Blocks(**GLOBAL_SETTINGS) as demo: + + +class Settings: + def __init__(self): + with gr.Tab("Basic Settings"): + self.basic_settings = BasicSettings() + with gr.Tab("Advanced Settings"): + self.advanced_settings = AdvancedSettings() + with gr.Tab("Gradio Settings"): + self.gradio_settings = GradioSettings() + + +class Results: + def __init__(self): + with gr.Tab("Pareto Front"): + self.pareto = gr.Plot() + with gr.Tab("Predictions"): + self.predictions_plot = gr.Plot() + + self.df = gr.Dataframe( + headers=["complexity", "loss", "equation"], + datatype=["number", "number", "str"], + wrap=True, + column_widths=[75, 75, 200], + interactive=False, + ) + + +def flatten_attributes(component_group, absolute_name: str, d=None) -> OrderedDict: + if d is None: + d = OrderedDict() + + if not hasattr(component_group, "__dict__"): + return d + + for name, elem in component_group.__dict__.items(): + new_absolute_name = absolute_name + "." + name + if name.startswith("_"): + # Private attribute + continue + elif elem in component_group.__dict__.values(): + # Don't duplicate any tiems + continue + elif isinstance(elem, Component): + # Only add components to dict + d[new_absolute_name] = elem + else: + d = flatten_attributes(elem, new_absolute_name, d=d) + + return d + + +class AppInterface: + def __init__(self, demo: gr.Blocks) -> None: with gr.Row(): with gr.Column(): with gr.Row(): - blocks = {**blocks, **_data_layout()} + self.data = Data(demo) with gr.Row(): - blocks = {**blocks, **_settings_layout()} - + self.settings = Settings() with gr.Column(): - with gr.Tab("Pareto Front"): - blocks["pareto"] = gr.Plot() - with gr.Tab("Predictions"): - blocks["predictions_plot"] = gr.Plot() - - blocks["df"] = gr.Dataframe( - headers=["complexity", "loss", "equation"], - datatype=["number", "number", "str"], - wrap=True, - column_widths=[75, 75, 200], - interactive=False, - ) - blocks["run"] = gr.Button() - - blocks["run"].click( - processing, - inputs=[ - blocks[k] - for k in [ - "file_input", - "force_run", - "test_equation", - "num_points", - "noise_level", - "data_seed", - "niterations", - "maxsize", - "binary_operators", - "unary_operators", - "plot_update_delay", - "parsimony", - "populations", - "population_size", - "ncycles_per_iteration", - "elementwise_loss", - "adaptive_parsimony_scaling", - "optimizer_algorithm", - "optimizer_iterations", - "batching", - "batch_size", - ] - ], - outputs=[blocks["df"], blocks["predictions_plot"]], - show_progress=True, - ) - - # Any update to the equation choice will trigger a plot_example_data: - eqn_components = [ - blocks["test_equation"], - blocks["num_points"], - blocks["noise_level"], - blocks["data_seed"], - ] - for eqn_component in eqn_components: - eqn_component.change( - plot_example_data, - eqn_components, - blocks["example_plot"], - show_progress=False, - ) + self.results = Results() + self.run = gr.Button() # Update plot when dataframe is updated: - blocks["df"].change( + self.results.df.change( plot_pareto_curve, - inputs=[blocks["df"], blocks["maxsize"]], - outputs=[blocks["pareto"]], + inputs=[self.results.df, self.settings.basic_settings.maxsize], + outputs=[self.results.pareto], show_progress=False, ) - demo.load(plot_example_data, eqn_components, blocks["example_plot"]) - demo.launch(debug=True) + self.run.click( + create_processing_function(self, ignore=["df", "predictions_plot"]), + inputs=list(flatten_attributes(self, "interface").values()), + outputs=[self.results.df, self.results.predictions_plot], + show_progress=True, + ) + + +def create_processing_function(interface: AppInterface, ignore=[]): + d = flatten_attributes(interface, "interface") + keys = [k.split(".")[-1] for k in d.keys()] + keys = [k for k in keys if k not in ignore] + _, idx, counts = np.unique(keys, return_index=True, return_counts=True) + if np.any(counts > 1): + raise AssertionError("Bad keys: " + ",".join(np.array(keys)[idx[counts > 1]])) + + def f(components): + n = len(components) + assert n == len(keys) + return processing(**{keys[i]: components[i] for i in range(n)}) + + return f + + +class App: + def __init__(self, theme="default") -> None: + with gr.Blocks(theme=theme) as demo: + self.interface = AppInterface(demo) + + demo.launch(debug=True) if __name__ == "__main__": - main() + app = App() diff --git a/gui/data.py b/gui/data.py index 7ddf4f6ab..02a6d1424 100644 --- a/gui/data.py +++ b/gui/data.py @@ -1,7 +1,7 @@ import numpy as np import pandas as pd -test_equations = ["sin(2*x)/x + 0.1*x"] +TEST_EQUATIONS = ["sin(2*x)/x + 0.1*x"] def generate_data(s: str, num_points: int, noise_level: float, data_seed: int): diff --git a/gui/processing.py b/gui/processing.py index 8e381d2c3..df1c0d38b 100644 --- a/gui/processing.py +++ b/gui/processing.py @@ -92,6 +92,7 @@ def __init__(self, target: Callable[[mp.Queue, mp.Queue], None]): def processing( + *, file_input, force_run, test_equation, @@ -113,6 +114,7 @@ def processing( optimizer_iterations, batching, batch_size, + **kwargs, ): """Load data, then spawn a process to run the greet function.""" global PERSISTENT_WRITER From 89fd8076998e9863f65492d95446b2f0c5715794 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 17 Apr 2024 20:39:54 +0100 Subject: [PATCH 120/136] refactor(gui): fix issues with big refactor --- gui/app.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/gui/app.py b/gui/app.py index a1abef75f..5c5a81b71 100644 --- a/gui/app.py +++ b/gui/app.py @@ -206,10 +206,9 @@ def __init__(self): ) -def flatten_attributes(component_group, absolute_name: str, d=None) -> OrderedDict: - if d is None: - d = OrderedDict() - +def flatten_attributes( + component_group, absolute_name: str, d: OrderedDict +) -> OrderedDict: if not hasattr(component_group, "__dict__"): return d @@ -218,14 +217,14 @@ def flatten_attributes(component_group, absolute_name: str, d=None) -> OrderedDi if name.startswith("_"): # Private attribute continue - elif elem in component_group.__dict__.values(): + elif elem in d.values(): # Don't duplicate any tiems continue elif isinstance(elem, Component): # Only add components to dict d[new_absolute_name] = elem else: - d = flatten_attributes(elem, new_absolute_name, d=d) + flatten_attributes(elem, new_absolute_name, d) return d @@ -250,26 +249,35 @@ def __init__(self, demo: gr.Blocks) -> None: show_progress=False, ) + ignore = ["df", "predictions_plot"] self.run.click( - create_processing_function(self, ignore=["df", "predictions_plot"]), - inputs=list(flatten_attributes(self, "interface").values()), + create_processing_function(self, ignore=ignore), + inputs=[ + v + for k, v in flatten_attributes(self, "interface", OrderedDict()).items() + if last_part(k) not in ignore + ], outputs=[self.results.df, self.results.predictions_plot], show_progress=True, ) +def last_part(k: str) -> str: + return k.split(".")[-1] + + def create_processing_function(interface: AppInterface, ignore=[]): - d = flatten_attributes(interface, "interface") - keys = [k.split(".")[-1] for k in d.keys()] - keys = [k for k in keys if k not in ignore] + d = flatten_attributes(interface, "interface", OrderedDict()) + keys = [k for k in map(last_part, d.keys()) if k not in ignore] _, idx, counts = np.unique(keys, return_index=True, return_counts=True) if np.any(counts > 1): raise AssertionError("Bad keys: " + ",".join(np.array(keys)[idx[counts > 1]])) - def f(components): + def f(*components): n = len(components) assert n == len(keys) - return processing(**{keys[i]: components[i] for i in range(n)}) + for output in processing(**{keys[i]: components[i] for i in range(n)}): + yield output return f From 260808892588f46eac49639fe8b80d994cd0e249 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Wed, 17 Apr 2024 20:42:01 +0100 Subject: [PATCH 121/136] refactor(gui): launch code --- gui/app.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/gui/app.py b/gui/app.py index 5c5a81b71..9f16f09f3 100644 --- a/gui/app.py +++ b/gui/app.py @@ -282,13 +282,11 @@ def f(*components): return f -class App: - def __init__(self, theme="default") -> None: - with gr.Blocks(theme=theme) as demo: - self.interface = AppInterface(demo) - - demo.launch(debug=True) +def main(): + with gr.Blocks(theme="default") as demo: + _ = AppInterface(demo) + demo.launch(debug=True) if __name__ == "__main__": - app = App() + main() From 2139b62cd86dd178bc91f6f860f755d4337184d3 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 18 Apr 2024 22:35:42 -0400 Subject: [PATCH 122/136] refactor(gui): remove redundant Row --- gui/app.py | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/gui/app.py b/gui/app.py index 9f16f09f3..746f6152a 100644 --- a/gui/app.py +++ b/gui/app.py @@ -10,25 +10,23 @@ class ExampleData: def __init__(self, demo: gr.Blocks) -> None: - with gr.Row(): - # Plot of the example data: - with gr.Column(): - self.example_plot = gr.Plot() - with gr.Column(): - self.test_equation = gr.Radio( - TEST_EQUATIONS, value=TEST_EQUATIONS[0], label="Test Equation" - ) - self.num_points = gr.Slider( - minimum=10, - maximum=1000, - value=200, - label="Number of Data Points", - step=1, - ) - self.noise_level = gr.Slider( - minimum=0, maximum=1, value=0.05, label="Noise Level" - ) - self.data_seed = gr.Number(value=0, label="Random Seed") + with gr.Column(): + self.example_plot = gr.Plot() + with gr.Column(): + self.test_equation = gr.Radio( + TEST_EQUATIONS, value=TEST_EQUATIONS[0], label="Test Equation" + ) + self.num_points = gr.Slider( + minimum=10, + maximum=1000, + value=200, + label="Number of Data Points", + step=1, + ) + self.noise_level = gr.Slider( + minimum=0, maximum=1, value=0.05, label="Noise Level" + ) + self.data_seed = gr.Number(value=0, label="Random Seed") # Set up plotting: From 06338fc5546c4fb7e9da701814798047cdd1017f Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 18 Apr 2024 23:03:21 -0400 Subject: [PATCH 123/136] refactor(gui): avoid compiling from two processes at once --- gui/app.py | 1 - gui/processing.py | 11 ++++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/gui/app.py b/gui/app.py index 746f6152a..b24bc44e4 100644 --- a/gui/app.py +++ b/gui/app.py @@ -150,7 +150,6 @@ def __init__(self) -> None: label="Optimizer Iterations", step=1, ) - # Bool: self.batching = gr.Checkbox( value=False, label="Batching", diff --git a/gui/processing.py b/gui/processing.py index df1c0d38b..392e8a938 100644 --- a/gui/processing.py +++ b/gui/processing.py @@ -39,8 +39,6 @@ def pysr_fit(queue: mp.Queue, out_queue: mp.Queue): def pysr_predict(queue: mp.Queue, out_queue: mp.Queue): - import pysr - while True: args = queue.get() @@ -61,6 +59,10 @@ def pysr_predict(queue: mp.Queue, out_queue: mp.Queue): os.system(f"cp {equation_file_bkup} {equation_file_copy}") os.system(f"cp {equation_file_pkl} {equation_file_pkl_copy}") + # Note that we import pysr late in this process to avoid + # pre-compiling the code in two places at once + import pysr + try: model = pysr.PySRRegressor.from_file(equation_file_pkl_copy, verbosity=0) except pd.errors.EmptyDataError: @@ -181,7 +183,10 @@ def processing( ) ) while PERSISTENT_WRITER.out_queue.empty(): - if equation_file.exists(): + if ( + equation_file.exists() + and Path(str(equation_file).replace(".csv", ".pkl")).exists() + ): # First, copy the file to a the copy file PERSISTENT_READER.queue.put( dict( From ed67e70191116d0bed615c322646a1accb49cec2 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Thu, 18 Apr 2024 23:45:59 -0400 Subject: [PATCH 124/136] fix(gui): add all gui files to Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 99b72414f..2f79cd21e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -49,7 +49,7 @@ RUN $PIP install --no-cache-dir . # Install Julia pre-requisites: RUN $PYTHON -c 'import pysr' -COPY --chown=user ./gui/app.py $HOME/pysr/gui/app.py +COPY --chown=user ./gui/*.py $HOME/pysr/gui/ EXPOSE 7860 ENV GRADIO_ALLOW_FLAGGING=never \ From 28639ea5f98f6aeaeb5405886909afbaafcd6346 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Fri, 19 Apr 2024 00:07:42 -0400 Subject: [PATCH 125/136] feat(gui): add status messages --- gui/app.py | 10 ++++++++-- gui/processing.py | 18 ++++++++++++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/gui/app.py b/gui/app.py index b24bc44e4..e143d0229 100644 --- a/gui/app.py +++ b/gui/app.py @@ -202,6 +202,8 @@ def __init__(self): interactive=False, ) + self.messages = gr.Textbox(label="Messages", value="", interactive=False) + def flatten_attributes( component_group, absolute_name: str, d: OrderedDict @@ -246,7 +248,7 @@ def __init__(self, demo: gr.Blocks) -> None: show_progress=False, ) - ignore = ["df", "predictions_plot"] + ignore = ["df", "predictions_plot", "pareto", "messages"] self.run.click( create_processing_function(self, ignore=ignore), inputs=[ @@ -254,7 +256,11 @@ def __init__(self, demo: gr.Blocks) -> None: for k, v in flatten_attributes(self, "interface", OrderedDict()).items() if last_part(k) not in ignore ], - outputs=[self.results.df, self.results.predictions_plot], + outputs=[ + self.results.df, + self.results.predictions_plot, + self.results.messages, + ], show_progress=True, ) diff --git a/gui/processing.py b/gui/processing.py index 392e8a938..f1553a1c8 100644 --- a/gui/processing.py +++ b/gui/processing.py @@ -182,6 +182,15 @@ def processing( ), ) ) + + last_yield = ( + pd.DataFrame({"Complexity": [], "Loss": [], "Equation": []}), + plot_predictions([], []), + "Started!", + ) + + yield last_yield + while PERSISTENT_WRITER.out_queue.empty(): if ( equation_file.exists() @@ -198,8 +207,13 @@ def processing( out = PERSISTENT_READER.out_queue.get() predictions = out["ypred"] equations = out["equations"] - yield equations[["Complexity", "Loss", "Equation"]], plot_predictions( - y, predictions + last_yield = ( + equations[["Complexity", "Loss", "Equation"]], + plot_predictions(y, predictions), + "Running...", ) + yield last_yield time.sleep(0.1) + + yield (*last_yield[:-1], "Done") From b58f1db30dcd6ad5cecfb5826dc6096a11151e71 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Fri, 19 Apr 2024 00:21:38 -0400 Subject: [PATCH 126/136] feat(gui): reset plot at run --- gui/processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gui/processing.py b/gui/processing.py index f1553a1c8..073185d87 100644 --- a/gui/processing.py +++ b/gui/processing.py @@ -134,7 +134,7 @@ def processing( try: X, y = read_csv(file_input, force_run) except ValueError as e: - return (EMPTY_DF(), str(e)) + return (EMPTY_DF(), plot_predictions([], []), str(e)) else: X, y = generate_data(test_equation, num_points, noise_level, data_seed) From b955f863a9a6d06ba832187f71c56f8414b6effe Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Fri, 19 Apr 2024 00:24:47 -0400 Subject: [PATCH 127/136] feat(gui): add stop button --- gui/app.py | 4 ++- gui/plots.py | 3 +++ gui/processing.py | 67 +++++++++++++++++++++++++++++------------------ 3 files changed, 48 insertions(+), 26 deletions(-) diff --git a/gui/app.py b/gui/app.py index e143d0229..aca40ff8c 100644 --- a/gui/app.py +++ b/gui/app.py @@ -5,7 +5,7 @@ from data import TEST_EQUATIONS from gradio.components.base import Component from plots import plot_example_data, plot_pareto_curve -from processing import processing +from processing import processing, stop class ExampleData: @@ -239,6 +239,7 @@ def __init__(self, demo: gr.Blocks) -> None: with gr.Column(): self.results = Results() self.run = gr.Button() + self.stop = gr.Button(value="Stop") # Update plot when dataframe is updated: self.results.df.change( @@ -263,6 +264,7 @@ def __init__(self, demo: gr.Blocks) -> None: ], show_progress=True, ) + self.stop.click(stop) def last_part(k: str) -> str: diff --git a/gui/plots.py b/gui/plots.py index bce093357..f133bf5a1 100644 --- a/gui/plots.py +++ b/gui/plots.py @@ -1,3 +1,5 @@ +import logging + import numpy as np import pandas as pd from matplotlib import pyplot as plt @@ -10,6 +12,7 @@ "Courier New", "monospace", ] +logging.getLogger("matplotlib.font_manager").disabled = True from data import generate_data diff --git a/gui/processing.py b/gui/processing.py index 073185d87..c767f1645 100644 --- a/gui/processing.py +++ b/gui/processing.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import Callable +import numpy as np import pandas as pd from data import generate_data, read_csv from plots import plot_predictions @@ -89,8 +90,11 @@ def __init__(self, target: Callable[[mp.Queue, mp.Queue], None]): self.process.start() -PERSISTENT_WRITER = None -PERSISTENT_READER = None +ACTIVE_PROCESS = None + + +def _random_string(): + return "".join(list(np.random.choice("abcdefghijklmnopqrstuvwxyz".split(), 16))) def processing( @@ -118,17 +122,17 @@ def processing( batch_size, **kwargs, ): - """Load data, then spawn a process to run the greet function.""" - global PERSISTENT_WRITER - global PERSISTENT_READER + # random string: + global ACTIVE_PROCESS + cur_process = _random_string() + ACTIVE_PROCESS = cur_process - if PERSISTENT_WRITER is None: - print("Starting PySR fit process") - PERSISTENT_WRITER = ProcessWrapper(pysr_fit) + """Load data, then spawn a process to run the greet function.""" + print("Starting PySR fit process") + writer = ProcessWrapper(pysr_fit) - if PERSISTENT_READER is None: - print("Starting PySR predict process") - PERSISTENT_READER = ProcessWrapper(pysr_predict) + print("Starting PySR predict process") + reader = ProcessWrapper(pysr_predict) if file_input is not None: try: @@ -143,23 +147,23 @@ def processing( equation_file = base / "hall_of_fame.csv" # Check if queue is empty, if not, kill the process # and start a new one - if not PERSISTENT_WRITER.queue.empty(): + if not writer.queue.empty(): print("Restarting PySR fit process") - if PERSISTENT_WRITER.process.is_alive(): - PERSISTENT_WRITER.process.terminate() - PERSISTENT_WRITER.process.join() + if writer.process.is_alive(): + writer.process.terminate() + writer.process.join() - PERSISTENT_WRITER = ProcessWrapper(pysr_fit) + writer = ProcessWrapper(pysr_fit) - if not PERSISTENT_READER.queue.empty(): + if not reader.queue.empty(): print("Restarting PySR predict process") - if PERSISTENT_READER.process.is_alive(): - PERSISTENT_READER.process.terminate() - PERSISTENT_READER.process.join() + if reader.process.is_alive(): + reader.process.terminate() + reader.process.join() - PERSISTENT_READER = ProcessWrapper(pysr_predict) + reader = ProcessWrapper(pysr_predict) - PERSISTENT_WRITER.queue.put( + writer.queue.put( dict( X=X, y=y, @@ -191,20 +195,20 @@ def processing( yield last_yield - while PERSISTENT_WRITER.out_queue.empty(): + while writer.out_queue.empty(): if ( equation_file.exists() and Path(str(equation_file).replace(".csv", ".pkl")).exists() ): # First, copy the file to a the copy file - PERSISTENT_READER.queue.put( + reader.queue.put( dict( X=X, equation_file=equation_file, index=-1, ) ) - out = PERSISTENT_READER.out_queue.get() + out = reader.out_queue.get() predictions = out["ypred"] equations = out["equations"] last_yield = ( @@ -214,6 +218,19 @@ def processing( ) yield last_yield + if cur_process != ACTIVE_PROCESS: + # Kill both reader and writer + writer.process.terminate() + reader.process.terminate() + return + time.sleep(0.1) yield (*last_yield[:-1], "Done") + return + + +def stop(): + global ACTIVE_PROCESS + ACTIVE_PROCESS = None + return From e4dfed6f9b9d383b2515028ab42cc7a71d57f6c2 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Fri, 19 Apr 2024 01:10:55 -0400 Subject: [PATCH 128/136] refactor(gui): put buttons in same row --- gui/app.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/gui/app.py b/gui/app.py index aca40ff8c..dee4680f4 100644 --- a/gui/app.py +++ b/gui/app.py @@ -10,9 +10,9 @@ class ExampleData: def __init__(self, demo: gr.Blocks) -> None: - with gr.Column(): + with gr.Column(scale=1): self.example_plot = gr.Plot() - with gr.Column(): + with gr.Column(scale=1): self.test_equation = gr.Radio( TEST_EQUATIONS, value=TEST_EQUATIONS[0], label="Test Equation" ) @@ -231,15 +231,18 @@ def flatten_attributes( class AppInterface: def __init__(self, demo: gr.Blocks) -> None: with gr.Row(): - with gr.Column(): + with gr.Column(scale=2): with gr.Row(): self.data = Data(demo) with gr.Row(): self.settings = Settings() - with gr.Column(): + with gr.Column(scale=2): self.results = Results() - self.run = gr.Button() - self.stop = gr.Button(value="Stop") + with gr.Row(): + with gr.Column(scale=1): + self.stop = gr.Button(value="Stop") + with gr.Column(scale=1, min_width=200): + self.run = gr.Button() # Update plot when dataframe is updated: self.results.df.change( From 0f7799eb503a0f2ab1979207248616dc14c49f8f Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Fri, 19 Apr 2024 01:11:52 -0400 Subject: [PATCH 129/136] fix(gui): kill processes not terminate --- gui/processing.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gui/processing.py b/gui/processing.py index c767f1645..6da3cd145 100644 --- a/gui/processing.py +++ b/gui/processing.py @@ -220,13 +220,14 @@ def processing( if cur_process != ACTIVE_PROCESS: # Kill both reader and writer - writer.process.terminate() - reader.process.terminate() + writer.process.kill() + reader.process.kill() + yield (*last_yield[:-1], "Stopped.") return time.sleep(0.1) - yield (*last_yield[:-1], "Done") + yield (*last_yield[:-1], "Done.") return From a1c360e45e7ebbc2a60e973bdbfb9cb44b6ba267 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 8 Jul 2024 23:28:27 +0100 Subject: [PATCH 130/136] deps(gui): create `pysr[gui]` with dependencies --- gui/requirements.txt | 2 -- pyproject.toml | 6 ++++++ 2 files changed, 6 insertions(+), 2 deletions(-) delete mode 100644 gui/requirements.txt diff --git a/gui/requirements.txt b/gui/requirements.txt deleted file mode 100644 index 7c0b8f8df..000000000 --- a/gui/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -matplotlib>=3.0.0,<4.0.0 -gradio>=4.0.0,<5.0.0 diff --git a/pyproject.toml b/pyproject.toml index e5230cde2..55c5e3df8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,12 @@ classifiers = [ ] dynamic = ["dependencies"] +[project.optional-dependencies] +gui = [ + "matplotlib>=3.0.0,<4.0.0", + "gradio>=4.0.0,<5.0.0", +] + [tool.setuptools] packages = ["pysr", "pysr._cli", "pysr.test"] include-package-data = false From 09714d69e8fb5ba09c8b9cacc15368ee083ee04a Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 8 Jul 2024 23:29:42 +0100 Subject: [PATCH 131/136] style(gui): ruff lint --- gui/processing.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/gui/processing.py b/gui/processing.py index 6da3cd145..74947bfc6 100644 --- a/gui/processing.py +++ b/gui/processing.py @@ -10,13 +10,15 @@ from data import generate_data, read_csv from plots import plot_predictions -EMPTY_DF = lambda: pd.DataFrame( - { - "Equation": [], - "Loss": [], - "Complexity": [], - } -) + +def empty_df(): + return pd.DataFrame( + { + "Equation": [], + "Loss": [], + "Complexity": [], + } + ) def pysr_fit(queue: mp.Queue, out_queue: mp.Queue): @@ -138,7 +140,7 @@ def processing( try: X, y = read_csv(file_input, force_run) except ValueError as e: - return (EMPTY_DF(), plot_predictions([], []), str(e)) + return (empty_df(), plot_predictions([], []), str(e)) else: X, y = generate_data(test_equation, num_points, noise_level, data_seed) From 1c7143e44356c77362c2cadf8f78e6c2fe45fdb8 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 8 Jul 2024 23:35:19 +0100 Subject: [PATCH 132/136] style: sort deps --- pyproject.toml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 55c5e3df8..6a63b45a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,8 +21,8 @@ dynamic = ["dependencies"] [project.optional-dependencies] gui = [ - "matplotlib>=3.0.0,<4.0.0", "gradio>=4.0.0,<5.0.0", + "matplotlib>=3.0.0,<4.0.0", ] [tool.setuptools] @@ -38,14 +38,14 @@ profile = "black" [tool.rye] dev-dependencies = [ - "pre-commit>=3.7.0", - "ipython>=8.23.0", + "coverage>=7.5.3", "ipykernel>=6.29.4", - "mypy>=1.10.0", + "ipython>=8.23.0", "jax[cpu]>=0.4.26", - "torch>=2.3.0", + "mypy>=1.10.0", "pandas-stubs>=2.2.1.240316", - "types-pytz>=2024.1.0.20240417", + "pre-commit>=3.7.0", + "torch>=2.3.0", "types-openpyxl>=3.1.0.20240428", - "coverage>=7.5.3", + "types-pytz>=2024.1.0.20240417", ] From f508651b66c0747831589629be2df05918fbd5f3 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 8 Jul 2024 23:52:56 +0100 Subject: [PATCH 133/136] build: make `juliapkg` operate dynamically rather than with json file --- pysr/julia_import.py | 10 ++++++++++ pysr/juliapkg.json | 13 ------------- requirements.txt | 1 + 3 files changed, 11 insertions(+), 13 deletions(-) delete mode 100644 pysr/juliapkg.json diff --git a/pysr/julia_import.py b/pysr/julia_import.py index 0e032bee1..88ca9a124 100644 --- a/pysr/julia_import.py +++ b/pysr/julia_import.py @@ -4,6 +4,8 @@ from types import ModuleType from typing import cast +import juliapkg + # Check if JuliaCall is already loaded, and if so, warn the user # about the relevant environment variables. If not loaded, # set up sensible defaults. @@ -36,6 +38,14 @@ ): os.environ[k] = os.environ.get(k, default) +juliapkg.require_julia("~1.6.7, ~1.7, ~1.8, ~1.9, =1.10.0, ^1.10.3") +juliapkg.add( + "SymbolicRegression", + "8254be44-1295-4e6a-a16d-46603ac705cb", + version="=0.24.5", +) +juliapkg.add("Serialization", "9e88b42a-f829-5b0c-bbe9-9e923198166b", version="1") + autoload_extensions = os.environ.get("PYSR_AUTOLOAD_EXTENSIONS") if autoload_extensions is not None: diff --git a/pysr/juliapkg.json b/pysr/juliapkg.json deleted file mode 100644 index 045d79e30..000000000 --- a/pysr/juliapkg.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "julia": "~1.6.7, ~1.7, ~1.8, ~1.9, =1.10.0, ^1.10.3", - "packages": { - "SymbolicRegression": { - "uuid": "8254be44-1295-4e6a-a16d-46603ac705cb", - "version": "=0.24.5" - }, - "Serialization": { - "uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b", - "version": "1" - } - } -} diff --git a/requirements.txt b/requirements.txt index 230f67dce..1f7c104b3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ sympy>=1.0.0,<2.0.0 pandas>=0.21.0,<3.0.0 numpy>=1.13.0,<3.0.0 scikit_learn>=1.0.0,<2.0.0 +juliapkg==0.1.13 juliacall==0.9.20 click>=7.0.0,<9.0.0 setuptools>=50.0.0 From c1721e177be85230b58352d6d4f23c1c7859a28d Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 15 Jul 2024 16:10:03 +0100 Subject: [PATCH 134/136] fix(gui): dockerfile not installing gui dependencies --- Dockerfile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4f76542b2..4ba2a2e44 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,13 +38,10 @@ WORKDIR $HOME/pysr COPY --chown=user ./requirements.txt $HOME/pysr/requirements.txt RUN $PIP install --no-cache-dir -r $HOME/pysr/requirements.txt -COPY --chown=user ./gui/requirements.txt $HOME/pysr/gui/requirements.txt -RUN $PIP install --no-cache-dir -r $HOME/pysr/gui/requirements.txt - COPY --chown=user ./pyproject.toml $HOME/pysr/pyproject.toml COPY --chown=user ./setup.py $HOME/pysr/setup.py COPY --chown=user ./pysr $HOME/pysr/pysr -RUN $PIP install --no-cache-dir . +RUN $PIP install --no-cache-dir ".[gui]" # Install Julia pre-requisites: RUN $PYTHON -c 'import pysr' From c035a231acfdb1c58d516125b1ff8cc05ded1767 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 15 Jul 2024 22:34:07 +0100 Subject: [PATCH 135/136] hack(gui): temporarily disable IBM Plex Mono for HuggingFace --- Dockerfile | 12 ++++++------ gui/plots.py | 18 ++++++++---------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4ba2a2e44..7dcafb340 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,12 +12,12 @@ FROM python:${PYVERSION}-${BASE_IMAGE} COPY --from=jl /usr/local/julia /usr/local/julia ENV PATH="/usr/local/julia/bin:${PATH}" -# Install font used for GUI -RUN mkdir -p /usr/local/share/fonts/IBM_Plex_Mono && \ - curl -L https://github.com/IBM/plex/releases/download/v6.4.0/IBM-Plex-Mono.zip -o /tmp/IBM_Plex_Mono.zip && \ - unzip /tmp/IBM_Plex_Mono.zip -d /usr/local/share/fonts/IBM_Plex_Mono && \ - rm /tmp/IBM_Plex_Mono.zip -RUN fc-cache -f -v +# # Install font used for GUI +# RUN mkdir -p /usr/local/share/fonts/IBM_Plex_Mono && \ +# curl -L https://github.com/IBM/plex/releases/download/v6.4.0/IBM-Plex-Mono.zip -o /tmp/IBM_Plex_Mono.zip && \ +# unzip /tmp/IBM_Plex_Mono.zip -d /usr/local/share/fonts/IBM_Plex_Mono && \ +# rm /tmp/IBM_Plex_Mono.zip +# RUN fc-cache -f -v # Set up a new user named "user" with user ID 1000 RUN useradd -m -u 1000 user diff --git a/gui/plots.py b/gui/plots.py index f133bf5a1..fc71fb377 100644 --- a/gui/plots.py +++ b/gui/plots.py @@ -1,18 +1,16 @@ -import logging - import numpy as np import pandas as pd from matplotlib import pyplot as plt plt.ioff() -plt.rcParams["font.family"] = [ - "IBM Plex Mono", - # Fallback fonts: - "DejaVu Sans Mono", - "Courier New", - "monospace", -] -logging.getLogger("matplotlib.font_manager").disabled = True +plt.rcParams["font.family"] = "monospace" +# plt.rcParams["font.family"] = [ +# "IBM Plex Mono", +# # Fallback fonts: +# "DejaVu Sans Mono", +# "Courier New", +# "monospace", +# ] from data import generate_data From 046f6d6d2f3b9b4c40d44c1682fd408e77793d6f Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 15 Jul 2024 22:36:54 +0100 Subject: [PATCH 136/136] fix(gui): use Agg backend for plots --- gui/plots.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gui/plots.py b/gui/plots.py index fc71fb377..21f97ead1 100644 --- a/gui/plots.py +++ b/gui/plots.py @@ -1,3 +1,7 @@ +import matplotlib + +matplotlib.use("agg") + import numpy as np import pandas as pd from matplotlib import pyplot as plt