Skip to content

Commit

Permalink
temp update on model-server
Browse files Browse the repository at this point in the history
Signed-off-by: Sunyanan Choochotkaew <[email protected]>
  • Loading branch information
sunya-ch committed Mar 29, 2024
1 parent 0f6f131 commit f92b56a
Show file tree
Hide file tree
Showing 39 changed files with 731 additions and 185 deletions.
53 changes: 37 additions & 16 deletions .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,27 +65,48 @@ jobs:
curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash
chmod +x kustomize
mv kustomize /usr/local/bin/
- name: test deploying kepler with only estimator
# - name: test deploying kepler with only estimator
# run: |
# make deploy
# make e2e-test
# make cleanup
# env:
# OPTS: "ESTIMATOR"
# - name: test deploying kepler with only estimator when kepler-model-db is available
# run: |
# make deploy
# make e2e-test
# make cleanup
# env:
# OPTS: "ESTIMATOR DB"
# - name: test deploying kepler with only server
# run: |
# make deploy
# make e2e-test
# make cleanup
# env:
# OPTS: "SERVER"
# - name: test deploying kepler with only server when kepler-model-db is available
# run: |
# make deploy
# make e2e-test
# make cleanup
# env:
# OPTS: "SERVER DB"
# - name: test deploying kepler with estimator and model server
# run: |
# make deploy
# make e2e-test
# make cleanup
# env:
# OPTS: "ESTIMATOR SERVER"
- name: test deploying kepler with estimator and model server when kepler-model-db is available
run: |
make deploy
make e2e-test
make cleanup
env:
OPTS: "ESTIMATOR"
- name: test deploying kepler with only server
run: |
make deploy
make e2e-test
make cleanup
env:
OPTS: "SERVER"
- name: test deploying kepler with estimator and model server
run: |
make deploy
make e2e-test
make cleanup
env:
OPTS: "ESTIMATOR SERVER"
OPTS: "ESTIMATOR SERVER DB"
- name: test deploying dummy kepler with only estimator
run: |
make deploy
Expand Down
39 changes: 24 additions & 15 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@
name: Workflow on Push/PR

on:
push:
paths-ignore:
- 'fig/**'
- '.github/ISSUE_TEMPLATE/**'
- '.vscode/**'
- 'LICENSE'
- '.gitignore'
- '*.md'
- '**/*.md'
pull_request:
paths-ignore:
- 'fig/**'
Expand Down Expand Up @@ -112,11 +121,11 @@ jobs:
echo "exists=false" >> "$GITHUB_OUTPUT"
fi
unit-test:
needs: [check-change]
uses: ./.github/workflows/unit-test.yml
with:
base_change: ${{ needs.check-change.outputs.base }}
# unit-test:
# needs: [check-change]
# uses: ./.github/workflows/unit-test.yml
# with:
# base_change: ${{ needs.check-change.outputs.base }}

base-image:
if: ${{ (needs.check-secret.outputs.docker-secret == 'true') && ((needs.check-base-exist.outputs.exists == 'false') || (needs.check-change.outputs.base == 'true')) }}
Expand Down Expand Up @@ -154,16 +163,16 @@ jobs:
run: |
echo "change=true" >> "$GITHUB_OUTPUT"
tekton-test:
needs: [check-secret, check-branch, check-change, base-image]
if: always()
uses: ./.github/workflows/tekton-test.yml
with:
base_change: ${{ needs.check-change.outputs.base }}
docker_secret: ${{ needs.check-secret.outputs.docker-secret }}
image_repo: ${{ vars.IMAGE_REPO || 'docker.io/library' }}
image_tag: ${{ needs.check-branch.outputs.tag }}
pipeline_name: std_v0.7
# tekton-test:
# needs: [check-secret, check-branch, check-change, base-image]
# if: always()
# uses: ./.github/workflows/tekton-test.yml
# with:
# base_change: ${{ needs.check-change.outputs.base }}
# docker_secret: ${{ needs.check-secret.outputs.docker-secret }}
# image_repo: ${{ vars.IMAGE_REPO || 'docker.io/library' }}
# image_tag: ${{ needs.check-branch.outputs.tag }}
# pipeline_name: std_v0.7

integration-test:
needs: [check-secret, check-branch, check-change, base-image]
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/unit-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ jobs:
run: make test-pipeline
- name: Test model server
run: make test-model-server
timeout-minutes: 5
- name: Test estimator
run: make test-estimator
timeout-minutes: 5
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ test-estimator: run-estimator run-collector-client clean-estimator

# test estimator --> model-server
run-model-server:
$(CTR_CMD) run -d --platform linux/amd64 -e "MODEL_TOPURL=http://localhost:8110" -v ${MODEL_PATH}:/mnt/models -p 8100:8100 --name model-server $(TEST_IMAGE) /bin/bash -c "python3.8 tests/http_server.py & sleep 5 && python3.8 src/server/model_server.py"
sleep 5
$(CTR_CMD) run -d --platform linux/amd64 -e "MODEL_TOPURL=http://localhost:8110" -v ${MODEL_PATH}:/mnt/models -p 8100:8100 --name model-server $(TEST_IMAGE) /bin/bash -c "python3.8 tests/http_server.py & sleep 10 && python3.8 src/server/model_server.py"
while ! docker logs model-server | grep -q Serving; do echo "waiting for model-server to serve"; sleep 5; done

run-estimator-client:
$(CTR_CMD) exec model-server /bin/bash -c "python3.8 -u ./tests/estimator_model_request_test.py"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ Compatible version: python 3.8
|Test case|Command|
|---|---|
|[Training pipeline](./tests/README.md#pipeline)|python -u ./tests/pipeline_test.py|
|[Model server](./tests/README.md#estimator-model-request-to-model-server)|Terminal 1: python src/server/model_server.py <br>Terminal 2: python -u tests/estimator_model_request_test.py|
|[Model server](./tests/README.md#estimator-model-request-to-model-server)|Terminal 1: export MODEL_PATH=$(pwd)/tests/models;python src/server/model_server.py <br>Terminal 2: python -u tests/estimator_model_request_test.py|
|[Estimator](./tests/README.md#estimator-power-request-from-collector)|Terminal 1: python src/estimate/estimator.py<br>Terminal 2: python -u tests/estimator_power_request_test.py|
|[Offline Trainer](./tests/README.md#offline-trainer)|Terminal 1: python src/train/offline_trainer.py<br>Terminal 2: python -u tests/offline_trainer_test.py|

Expand Down
2 changes: 1 addition & 1 deletion cmd/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ Use kepler model server function as a standalone docker container.
5.3. Plot prediction result on specific trainer model and feature group (`estimate`)

```bash
docker run --rm -v "$(pwd)/data":/data quay.io/sustainable_computing_io/kepler_model_server:v0.7 plot --target-data estimate -i output_kepler_query --model-name GradientBoostingRegressorTrainer_1 --feature-group BPFOnly
docker run --rm -v "$(pwd)/data":/data quay.io/sustainable_computing_io/kepler_model_server:v0.7 plot --target-data estimate -i output_kepler_query --model-name GradientBoostingRegressorTrainer_0 --feature-group BPFOnly
```

5.4. Plot prediction error comparison among feature group and trainer model (`error`)
Expand Down
111 changes: 111 additions & 0 deletions cmd/cmd_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
from util.prom_types import TIMESTAMP_COL
from util import PowerSourceMap

from util.train_types import FeatureGroup, ModelOutputType, weight_support_trainers
from util.loader import load_metadata, load_scaler, get_model_group_path
from train.profiler.node_type_index import NodeTypeIndexCollection
from estimate import load_model
markers = ['o', 's', '^', 'v', '<', '>', 'p', 'P', '*', 'x', '+', '|', '_']

def ts_plot(data, cols, title, output_folder, name, labels=None, subtitles=None, ylabel=None):
plot_height = 3
Expand Down Expand Up @@ -147,4 +152,110 @@ def metadata_plot(args, energy_source, metadata_df, output_folder, name):
plt.legend(frameon=False)
filename = os.path.join(output_folder, name + ".png")
fig.savefig(filename)
plt.close()

def power_curve_plot(args, data_path, energy_source, output_folder, name):
model_toppath = data_path
pipeline_name = args.pipeline_name
pipeline_path = os.path.join(model_toppath, pipeline_name)
node_collection = NodeTypeIndexCollection(pipeline_path)
all_node_types = sorted(list(node_collection.node_type_index.keys()))
output_type = ModelOutputType[args.output_type]
models, _, cpu_ms_max = _load_all_models(model_toppath=model_toppath, output_type=output_type, name=pipeline_name, node_types=all_node_types, energy_source=energy_source)
if len(models) > 0:
_plot_models(models, cpu_ms_max, energy_source, output_folder, name)

def _get_model(model_toppath, trainer, model_node_type, output_type, name, energy_source):
feature_group = FeatureGroup.BPFOnly
model_name = "{}_{}".format(trainer, model_node_type)
group_path = get_model_group_path(model_toppath, output_type, feature_group, energy_source, name)
model_path = os.path.join(group_path, model_name)
model = load_model(model_path)
metadata = load_metadata(model_path)
if metadata is None:
return model, None, None
scaler = load_scaler(model_path)
cpu_ms_max = scaler.max_abs_[0]
return model, metadata, cpu_ms_max

def _load_all_models(model_toppath, output_type, name, node_types, energy_source):
models_dict = dict()
metadata_dict = dict()
cpu_ms_max_dict = dict()
for model_node_type in node_types:
min_mae = None
for trainer in weight_support_trainers:
model, metadata, cpu_ms_max = _get_model(model_toppath, trainer, model_node_type, output_type=output_type, name=name, energy_source=energy_source)
if metadata is None:
continue
cpu_ms_max_dict[model_node_type] = cpu_ms_max
if min_mae is None or min_mae > metadata["mae"]:
min_mae = metadata["mae"]
models_dict[model_node_type], metadata_dict[model_node_type] = model, metadata
return models_dict, metadata_dict, cpu_ms_max_dict

def _plot_models(models, cpu_ms_max, energy_source, output_folder, name, max_plot=15, cpu_time_bin_num=10, sample_num=20):
from util.train_types import BPF_FEATURES
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_palette("Paired")

import matplotlib.pyplot as plt

main_feature_col = BPF_FEATURES[0]
predicted_col = {
"acpi": "default_platform_power",
"intel_rapl": "default_package_power"
}

num_bins = len(cpu_ms_max)//cpu_time_bin_num + 1
nobin = False
if num_bins == 1:
nobin = True
values = np.array(list(cpu_ms_max.values()))
_, bins = np.histogram(values, bins=num_bins)
bin_size = len(bins) + 1 if not nobin else 1
data_with_prediction_list = [[] for _ in range(bin_size)]

num_cols = min(3, bin_size)

for node_type, model in models.items():
# generate data from scaler
xs = np.column_stack((np.linspace(0, cpu_ms_max[node_type], sample_num), np.zeros(sample_num)))
data = pd.DataFrame(xs, columns=models[node_type].estimator.features)
_, data_with_prediction = model.append_prediction(data)
if nobin:
bin_index = 0
else:
bin_index = np.digitize([cpu_ms_max[node_type]], bins)[0]
data_with_prediction_list[bin_index] += [(node_type, data_with_prediction)]
total_graphs = 0
for data_with_predictions in data_with_prediction_list:
total_graphs += int(np.ceil(len(data_with_predictions) / max_plot))
num_rows = int(np.ceil(total_graphs/num_cols))

fig, axes = plt.subplots(num_rows, num_cols, figsize=(int(6*num_cols), int(5*num_rows)))
axes_index = 0
for data_with_predictions in data_with_prediction_list:
index = 0
for data_with_prediction_index in data_with_predictions:
if num_rows == 1 and num_cols == 1:
ax = axes
else:
ax = axes[axes_index//num_cols][axes_index%num_cols]
node_type = data_with_prediction_index[0]
data_with_prediction = data_with_prediction_index[1]
sns.lineplot(data=data_with_prediction, x=main_feature_col, y=predicted_col[energy_source], label="type={}".format(node_type), marker=markers[index], ax=ax)
index += 1
index = index % len(markers)
if index % max_plot == 0:
ax.set_ylabel("Predicted power (W)")
axes_index += 1
if len(data_with_predictions) > 0:
ax.set_ylabel("Predicted power (W)")
axes_index += 1
filename = os.path.join(output_folder, name + ".png")
plt.tight_layout()
fig.savefig(filename)
plt.close()
Loading

0 comments on commit f92b56a

Please sign in to comment.