temp update on model-server

Signed-off-by: Sunyanan Choochotkaew <[email protected]>
sunya-ch · Mar 29, 2024 · f92b56a · f92b56a
1 parent 0f6f131
commit f92b56a
Show file tree

Hide file tree

Showing 39 changed files with 731 additions and 185 deletions.
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
@@ -65,27 +65,48 @@ jobs:
           curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash
           chmod +x kustomize
           mv kustomize /usr/local/bin/
-      - name: test deploying kepler with only estimator
+      # - name: test deploying kepler with only estimator
+      #   run: |
+      #     make deploy
+      #     make e2e-test
+      #     make cleanup
+      #   env:
+      #     OPTS: "ESTIMATOR"
+      # - name: test deploying kepler with only estimator when kepler-model-db is available
+      #   run: |
+      #     make deploy
+      #     make e2e-test
+      #     make cleanup
+      #   env:
+      #     OPTS: "ESTIMATOR DB"
+      # - name: test deploying kepler with only server
+      #   run: |
+      #     make deploy
+      #     make e2e-test
+      #     make cleanup
+      #   env:
+      #     OPTS: "SERVER"
+      # - name: test deploying kepler with only server when kepler-model-db is available
+      #   run: |
+      #     make deploy
+      #     make e2e-test
+      #     make cleanup
+      #   env:
+      #     OPTS: "SERVER DB"
+      # - name: test deploying kepler with estimator and model server
+      #   run: |
+      #     make deploy
+      #     make e2e-test
+      #     make cleanup
+      #   env:
+      #     OPTS: "ESTIMATOR SERVER"
+      - name: test deploying kepler with estimator and model server when kepler-model-db is available
         run: |
           make deploy
           make e2e-test
           make cleanup
         env:
-          OPTS: "ESTIMATOR"
-      - name: test deploying kepler with only server
-        run: |
-          make deploy
-          make e2e-test
-          make cleanup
-        env:
-          OPTS: "SERVER"
-      - name: test deploying kepler with estimator and model server
-        run: |
-          make deploy
-          make e2e-test
-          make cleanup
-        env:
-          OPTS: "ESTIMATOR SERVER"
+          OPTS: "ESTIMATOR SERVER DB"
       - name: test deploying dummy kepler with only estimator
         run: |
           make deploy

diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
@@ -7,6 +7,15 @@
 name: Workflow on Push/PR
 
 on:
+  push:
+    paths-ignore:
+      - 'fig/**'
+      - '.github/ISSUE_TEMPLATE/**'
+      - '.vscode/**'
+      - 'LICENSE'
+      - '.gitignore'
+      - '*.md'
+      - '**/*.md'
   pull_request:
     paths-ignore:
       - 'fig/**'
@@ -112,11 +121,11 @@ jobs:
               echo "exists=false" >> "$GITHUB_OUTPUT"
         fi
 
-  unit-test:
-    needs: [check-change]
-    uses: ./.github/workflows/unit-test.yml
-    with:
-      base_change: ${{ needs.check-change.outputs.base }}
+  # unit-test:
+  #   needs: [check-change]
+  #   uses: ./.github/workflows/unit-test.yml
+  #   with:
+  #     base_change: ${{ needs.check-change.outputs.base }}
 
   base-image:
     if: ${{ (needs.check-secret.outputs.docker-secret == 'true') && ((needs.check-base-exist.outputs.exists == 'false') || (needs.check-change.outputs.base == 'true')) }}
@@ -154,16 +163,16 @@ jobs:
         run: |
           echo "change=true" >> "$GITHUB_OUTPUT"
 
-  tekton-test:
-    needs: [check-secret, check-branch, check-change, base-image]
-    if: always()
-    uses: ./.github/workflows/tekton-test.yml
-    with:
-      base_change: ${{ needs.check-change.outputs.base }}
-      docker_secret: ${{ needs.check-secret.outputs.docker-secret }}
-      image_repo: ${{ vars.IMAGE_REPO || 'docker.io/library' }}
-      image_tag: ${{ needs.check-branch.outputs.tag }}
-      pipeline_name: std_v0.7
+  # tekton-test:
+  #   needs: [check-secret, check-branch, check-change, base-image]
+  #   if: always()
+  #   uses: ./.github/workflows/tekton-test.yml
+  #   with:
+  #     base_change: ${{ needs.check-change.outputs.base }}
+  #     docker_secret: ${{ needs.check-secret.outputs.docker-secret }}
+  #     image_repo: ${{ vars.IMAGE_REPO || 'docker.io/library' }}
+  #     image_tag: ${{ needs.check-branch.outputs.tag }}
+  #     pipeline_name: std_v0.7
 
   integration-test:
     needs: [check-secret, check-branch, check-change, base-image]

diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml
@@ -32,6 +32,7 @@ jobs:
         run: make test-pipeline
       - name: Test model server
         run: make test-model-server
+        timeout-minutes: 5
       - name: Test estimator
         run: make test-estimator
         timeout-minutes: 5

diff --git a/Makefile b/Makefile
@@ -48,8 +48,8 @@ test-estimator: run-estimator run-collector-client clean-estimator
 
 # test estimator --> model-server
 run-model-server:
-	$(CTR_CMD) run -d --platform linux/amd64 -e "MODEL_TOPURL=http://localhost:8110" -v ${MODEL_PATH}:/mnt/models -p 8100:8100 --name model-server $(TEST_IMAGE) /bin/bash -c "python3.8 tests/http_server.py & sleep 5 &&  python3.8 src/server/model_server.py"
-	sleep 5
+	$(CTR_CMD) run -d --platform linux/amd64 -e "MODEL_TOPURL=http://localhost:8110" -v ${MODEL_PATH}:/mnt/models -p 8100:8100 --name model-server $(TEST_IMAGE) /bin/bash -c "python3.8 tests/http_server.py & sleep 10 &&  python3.8 src/server/model_server.py"
+	while ! docker logs model-server | grep -q Serving; do   echo "waiting for model-server to serve";  sleep 5; done
 
 run-estimator-client:
 	$(CTR_CMD) exec model-server /bin/bash -c "python3.8 -u ./tests/estimator_model_request_test.py"

diff --git a/README.md b/README.md
@@ -75,7 +75,7 @@ Compatible version: python 3.8
     |Test case|Command|
     |---|---|
     |[Training pipeline](./tests/README.md#pipeline)|python -u ./tests/pipeline_test.py|
-    |[Model server](./tests/README.md#estimator-model-request-to-model-server)|Terminal 1: python src/server/model_server.py <br>Terminal 2: python -u tests/estimator_model_request_test.py|
+    |[Model server](./tests/README.md#estimator-model-request-to-model-server)|Terminal 1: export MODEL_PATH=$(pwd)/tests/models;python src/server/model_server.py <br>Terminal 2: python -u tests/estimator_model_request_test.py|
     |[Estimator](./tests/README.md#estimator-power-request-from-collector)|Terminal 1: python src/estimate/estimator.py<br>Terminal 2: python -u tests/estimator_power_request_test.py|
     |[Offline Trainer](./tests/README.md#offline-trainer)|Terminal 1: python src/train/offline_trainer.py<br>Terminal 2: python -u tests/offline_trainer_test.py|
 

diff --git a/cmd/README.md b/cmd/README.md
@@ -85,7 +85,7 @@ Use kepler model server function as a standalone docker container.
    5.3. Plot prediction result on specific trainer model and feature group (`estimate`)
 
       ```bash
-      docker run --rm -v "$(pwd)/data":/data quay.io/sustainable_computing_io/kepler_model_server:v0.7 plot --target-data estimate -i output_kepler_query --model-name GradientBoostingRegressorTrainer_1 --feature-group BPFOnly
+      docker run --rm -v "$(pwd)/data":/data quay.io/sustainable_computing_io/kepler_model_server:v0.7 plot --target-data estimate -i output_kepler_query --model-name GradientBoostingRegressorTrainer_0 --feature-group BPFOnly
       ```
 
    5.4. Plot prediction error comparison among feature group and trainer model (`error`)

diff --git a/cmd/cmd_plot.py b/cmd/cmd_plot.py
@@ -9,6 +9,11 @@
 from util.prom_types import TIMESTAMP_COL
 from util import PowerSourceMap
 
+from util.train_types import FeatureGroup, ModelOutputType, weight_support_trainers
+from util.loader import load_metadata, load_scaler, get_model_group_path
+from train.profiler.node_type_index import NodeTypeIndexCollection
+from estimate import load_model
+markers = ['o', 's', '^', 'v', '<', '>', 'p', 'P', '*', 'x', '+', '|', '_']
 
 def ts_plot(data, cols, title, output_folder, name, labels=None, subtitles=None, ylabel=None):
     plot_height = 3
@@ -147,4 +152,110 @@ def metadata_plot(args, energy_source, metadata_df, output_folder, name):
     plt.legend(frameon=False)
     filename = os.path.join(output_folder, name + ".png")
     fig.savefig(filename)
+    plt.close()
+
+def power_curve_plot(args, data_path, energy_source, output_folder, name):
+    model_toppath = data_path
+    pipeline_name = args.pipeline_name
+    pipeline_path = os.path.join(model_toppath, pipeline_name)
+    node_collection = NodeTypeIndexCollection(pipeline_path)
+    all_node_types = sorted(list(node_collection.node_type_index.keys()))
+    output_type = ModelOutputType[args.output_type]
+    models, _, cpu_ms_max = _load_all_models(model_toppath=model_toppath, output_type=output_type, name=pipeline_name, node_types=all_node_types, energy_source=energy_source)
+    if len(models) > 0:
+        _plot_models(models, cpu_ms_max, energy_source, output_folder, name)
+
+def _get_model(model_toppath, trainer, model_node_type, output_type, name, energy_source):
+    feature_group = FeatureGroup.BPFOnly
+    model_name = "{}_{}".format(trainer, model_node_type)
+    group_path = get_model_group_path(model_toppath, output_type, feature_group, energy_source, name)
+    model_path = os.path.join(group_path, model_name)
+    model = load_model(model_path)
+    metadata = load_metadata(model_path)
+    if metadata is None:
+        return model, None, None
+    scaler = load_scaler(model_path)
+    cpu_ms_max = scaler.max_abs_[0]
+    return model, metadata, cpu_ms_max
+
+def _load_all_models(model_toppath, output_type, name, node_types, energy_source):
+    models_dict = dict()
+    metadata_dict = dict()
+    cpu_ms_max_dict = dict()
+    for model_node_type in node_types:
+        min_mae = None
+        for trainer in weight_support_trainers:
+            model, metadata, cpu_ms_max = _get_model(model_toppath, trainer, model_node_type, output_type=output_type, name=name, energy_source=energy_source)
+            if metadata is None:
+                continue
+            cpu_ms_max_dict[model_node_type] = cpu_ms_max
+            if min_mae is None or min_mae > metadata["mae"]:
+                min_mae = metadata["mae"]
+                models_dict[model_node_type], metadata_dict[model_node_type] = model, metadata
+    return models_dict, metadata_dict, cpu_ms_max_dict
+
+def _plot_models(models, cpu_ms_max, energy_source, output_folder, name, max_plot=15, cpu_time_bin_num=10, sample_num=20):
+    from util.train_types import BPF_FEATURES
+    import numpy as np
+    import pandas as pd
+    import seaborn as sns
+    sns.set_palette("Paired")
+
+    import matplotlib.pyplot as plt
+
+    main_feature_col = BPF_FEATURES[0]
+    predicted_col = {
+        "acpi": "default_platform_power",
+        "intel_rapl": "default_package_power"
+    }
+
+    num_bins = len(cpu_ms_max)//cpu_time_bin_num + 1
+    nobin = False
+    if num_bins == 1:
+        nobin = True
+    values = np.array(list(cpu_ms_max.values()))
+    _, bins = np.histogram(values, bins=num_bins)
+    bin_size = len(bins) + 1 if not nobin else 1
+    data_with_prediction_list = [[] for _ in range(bin_size)]
+
+    num_cols = min(3, bin_size)
+
+    for node_type, model in models.items():
+        # generate data from scaler
+        xs = np.column_stack((np.linspace(0, cpu_ms_max[node_type], sample_num), np.zeros(sample_num)))
+        data = pd.DataFrame(xs, columns=models[node_type].estimator.features)
+        _, data_with_prediction = model.append_prediction(data)
+        if nobin:
+            bin_index = 0
+        else:
+            bin_index = np.digitize([cpu_ms_max[node_type]], bins)[0]
+        data_with_prediction_list[bin_index] += [(node_type, data_with_prediction)]
+    total_graphs = 0
+    for data_with_predictions in data_with_prediction_list:
+        total_graphs += int(np.ceil(len(data_with_predictions) / max_plot))
+    num_rows = int(np.ceil(total_graphs/num_cols))
+
+    fig, axes = plt.subplots(num_rows, num_cols, figsize=(int(6*num_cols), int(5*num_rows)))
+    axes_index = 0 
+    for data_with_predictions in data_with_prediction_list:
+        index = 0
+        for data_with_prediction_index in data_with_predictions:
+            if num_rows == 1 and num_cols == 1:
+                ax = axes
+            else:
+                ax = axes[axes_index//num_cols][axes_index%num_cols]
+            node_type = data_with_prediction_index[0]
+            data_with_prediction = data_with_prediction_index[1]
+            sns.lineplot(data=data_with_prediction, x=main_feature_col, y=predicted_col[energy_source], label="type={}".format(node_type), marker=markers[index], ax=ax)
+            index += 1
+            index = index % len(markers)
+            if index % max_plot == 0:
+                ax.set_ylabel("Predicted power (W)")
+                axes_index += 1
+        if len(data_with_predictions) > 0:
+            ax.set_ylabel("Predicted power (W)")
+            axes_index += 1
+    filename = os.path.join(output_folder, name + ".png")
+    plt.tight_layout()
+    fig.savefig(filename)
     plt.close()