Improve saving, presenting, and rendering idioms (#24)

Improve saving, presenting, and rendering idioms
manishshettym · Feb 23, 2024 · 733b56c · 733b56c
2 parents 9914b8f + 9a80f84
commit 733b56c
Show file tree

Hide file tree

Showing 13 changed files with 555 additions and 215 deletions.
diff --git a/README.md b/README.md
@@ -58,74 +58,96 @@ How to train CodeScholar:
 Refer to the [training README](./codescholar/representation/README.md) for a detailed description of how to train CodeScholar.
 
 
-How to run pre-trained CodeScholar:
+How to use CodeScholar:
 -----------------------
-
-```bash
-# start an elasticsearch server (hosts programs)
-docker run --rm -p 9200:9200 -p 9300:9300 -e "xpack.security.enabled=false" -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:8.7.0
-```
-
-```bash
-# start a redis server (hosts embeddings)
-docker run --rm -p 6379:6379 redis
-```
-
-```bash
-# index the dataset using /search/elastic_search.py
-cd codescholar/search
-python elastic_search.py --dataset <dataset_name>
-```
-
-> TODO: index all embeddings into redis; currently index happens before each search
 
-```bash
-# run the codescholar query (say np.mean) using /search/search.py
-python search.py --dataset <dataset_name> --seed np.mean
-```
+1. Starting services
+    ```bash
+    ./services.sh start
+    ```
+    <details>
+        <summary>what does this do?</summary>
 
-You can also use some arguments with the search query:
-```bash
---min_idiom_size <int> # minimum size of idioms to be saved
---max_idiom_size <int> # maximum size of idioms to be saved
---max_init_beams <int> # maximum beams to initialize search
---stop_at_equilibrium  # stop search when diversity = reusability of idioms
-```
-*note: see more configurations in [/search/search_config.py](./codescholar/search/search_config.py)*
+    ```bash
+    # start an elasticsearch server (hosts programs) in a tmux session
+    docker run --rm -p 9200:9200 -p 9300:9300 -e "xpack.security.enabled=false" -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:8.7.0
+    
+    # start a redis server (hosts embeddings)
+    docker run --rm -p 6379:6379 redis
+    ```
+    </details>
+
+2. Indexing
+    ```bash
+    ./services.sh index <dataset_name>
+    ```
+    <details>
+        <summary>what does this do?</summary>
+
+    ```bash
+    # index the dataset using /search/elastic_search.py
+    cd codescholar/search
+    python elastic_search.py --dataset <dataset_name>
+    ```
+
+    > TODO: index all embeddings into redis; currently index happens before each search
+    </details>
+
+3. Searching
+    ```bash
+    # run the codescholar query (say np.mean) using /search/search.py
+    python search.py --dataset <dataset_name> --seed np.mean
+    ```
+
+    You can also use some arguments with the search query:
+    ```bash
+    --min_idiom_size <int> # minimum size of idioms to be saved
+    --max_idiom_size <int> # maximum size of idioms to be saved
+    --max_init_beams <int> # maximum beams to initialize search
+    --stop_at_equilibrium  # stop search when diversity = reusability of idioms
+    ```
+    *note: see more configurations in [/search/search_config.py](./codescholar/search/search_config.py)*
 
 How to run CodeScholar Streamlit App:
 ---------------------------
 
-```bash
-# cd into the apps directory
-cd codescholar/apps
-```
-
-```bash
-# start a redis server to act as the message broker
-docker run --rm -p 6379:6379 redis
-```
-
-```bash
-# start a celery backend to handle tasks asynchronously
-celery -A app_decl.celery worker --pool=solo --loglevel=info
-```
-
-```bash
-# start a flask server to handle http API requests
-# note: runs flask on port 3003
-python app_main.py
-```
-
-You can now make API requests to the flask server. For example, to run search for size `10` idioms for `pd.merge`, you can:
-```bash
-curl -X POST -H "Content-Type: application/json" -d '{"api": "pd.merge", "size": 10}' http://localhost:3003/search
-```
-
-```bash
-# start the streamlit app on port localhost:8501
-streamlit run app_streamlit.py
-```
+1. Setup services
+    ```bash
+    ./services.sh start
+    ./services.sh index <dataset_name>
+    ```
+
+2. Start server and application
+    ```bash
+    cd codescholar/apps
+
+    ./app.sh start
+    ```
+    <details>
+        <summary>what does this do?</summary>
+
+    ```bash
+    # start a celery backend to handle tasks asynchronously
+    celery -A app_decl.celery worker --pool=solo --loglevel=info
+
+    # start a flask server to handle http API requests
+    # note: runs flask on port 3003
+    python flask_app.py
+    ```
+
+    > You can now make API requests to the flask server. For example, to run search for size `10` idioms for `pd.merge`, you can:
+    ```bash
+    curl -X POST -H "Content-Type: application/json" -d '{"api": "pd.merge", "size": 10}' http://localhost:3003/search
+    ```
+
+    Finally,
+    ```bash
+    # start the streamlit app on port localhost:8501
+    streamlit run streamlit_app.py
+    ```
+    </details>
+
+    View details about the app using: `./app.sh show`
 
 Reproducability of CodeScholar Evaluation:
 ---------------------------

diff --git a/codescholar/apps/app.sh b/codescholar/apps/app.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Function to check if a tmux session exists
+tmux_session_exists() {
+    tmux has-session -t "$1" 2>/dev/null
+}
+
+# Function to start the Celery worker
+start_celery() {
+    if tmux_session_exists "celery"; then
+        echo "Celery tmux session already exists."
+    else
+        echo "Starting Celery worker in a tmux session..."
+        tmux new-session -d -s "celery" celery -A app_decl.celery worker --pool=solo --loglevel=info
+    fi
+}
+
+# Function to start the Flask server
+start_flask() {
+    if tmux_session_exists "flask"; then
+        echo "Flask tmux session already exists."
+    else
+        echo "Starting Flask server in a tmux session..."
+        tmux new-session -d -s "flask" python flask_app.py
+    fi
+}
+
+# Function to start the Streamlit app
+start_streamlit() {
+    if tmux_session_exists "streamlit"; then
+        echo "Streamlit tmux session already exists."
+    else
+        echo "Starting Streamlit app..."
+        tmux new-session -d -s "streamlit" streamlit run streamlit_app.py
+    fi
+}
+
+# Function to stop a service running in a tmux session
+stop_service() {
+    if tmux_session_exists "$1"; then
+        echo "Stopping $1..."
+        tmux send-keys -t "$1" C-c
+        tmux kill-session -t "$1"
+    else
+        echo "No tmux session for $1 found."
+    fi
+}
+
+# Function to display the status of the streamlit session
+show_streamlit_status() {
+    if tmux_session_exists "streamlit"; then
+        echo "Streamlit session status:"
+        tmux capture-pane -p -t "streamlit"
+    else
+        echo "No streamlit session found."
+    fi
+}
+
+# Main logic based on the first argument
+case "$1" in
+    start)
+        start_celery
+        start_flask
+        start_streamlit
+        ;;
+    stop)
+        stop_service "celery"
+        stop_service "flask"
+        stop_service "streamlit"
+        ;;
+    show)
+        show_streamlit_status
+        ;;
+    *)
+        echo "Usage: $0 {start|stop}"
+        exit 1
+        ;;
+esac
diff --git a/codescholar/apps/app_bench.json b/codescholar/apps/app_bench.json
@@ -1,10 +1,5 @@
 {
-    "pandas": [
-        "pd.concat",
-        "df.groupby"
-    ],
     "numpy": [
-        "np.mean",
-        "np.dot"
+        "np.mean"
     ]
 }
diff --git a/codescholar/apps/app_decl.py b/codescholar/apps/app_decl.py
@@ -80,16 +80,13 @@ def search():
         # search config
         args.mode = "q"
         args.seed = api
-        args.min_idiom_size = 2
+        args.min_idiom_size = 3
         args.max_idiom_size = 20
-        args.max_init_beams = 150
+        args.max_init_beams = 200
         args.result_dir = f"{api_cache_dir}/{args.seed}/"
         args.idiom_g_dir = f"{args.result_dir}/idioms/graphs/"
         args.idiom_p_dir = f"{args.result_dir}/idioms/progs/"
 
-        if not osp.exists(args.idiom_g_dir):
-            os.makedirs(args.idiom_g_dir)
-
         if not osp.exists(args.idiom_p_dir):
             os.makedirs(args.idiom_p_dir)
 
@@ -99,9 +96,22 @@ def search():
         # search_main(args)
 
         return flask.jsonify(
-            {
-                "status": "CodeScholar is now growing idioms for this API. Please try again in ~2 mins."
-            }
+            {"status": "CodeScholar is now growing idioms for this API."}
+        )
+
+
+@scholarapp.route("/search_status", methods=["GET"])
+def search_status():
+    api = flask.request.args.get("api")
+    # Check if the idioms for the API are ready
+    # This could involve checking a file, database, or cache status
+    idioms_dir = osp.join(api_cache_dir, api, "idioms", "progs")
+    idioms_ready = osp.exists(idioms_dir) and len(os.listdir(idioms_dir)) > 0
+    if idioms_ready:
+        return flask.jsonify({"status": "ready"})
+    else:
+        return flask.jsonify(
+            {"status": "CodeScholar is now growing idioms for this API."}
         )
 
 

diff --git a/codescholar/apps/app_main.py → codescholar/apps/flask_app.py b/codescholar/apps/app_main.py → codescholar/apps/flask_app.py
@@ -3,4 +3,4 @@
 
 if __name__ == "__main__":
     torch.multiprocessing.set_start_method("spawn")
-    scholarapp.run(host="0.0.0.0", port=3003)
+    scholarapp.run(host="0.0.0.0", port=3003, debug=True)