Skip to content

Commit

Permalink
Merge pull request #199 from ORNL/ml_epochs
Browse files Browse the repository at this point in the history
Ml epochs
  • Loading branch information
renan-souza authored Jan 13, 2025
2 parents a1f51cf + 8a5e98b commit e6b164a
Show file tree
Hide file tree
Showing 34 changed files with 608 additions and 219 deletions.
22 changes: 9 additions & 13 deletions .github/workflows/run-checks.yml → .github/workflows/checks.yml
Original file line number Diff line number Diff line change
@@ -1,30 +1,26 @@
name: Linter, formatter, and docs checks
on: [pull_request]

permissions:
contents: read
on: pull_request

jobs:
build:
runs-on: ubuntu-latest
if: "!contains(github.event.head_commit.message, 'CI Bot')"

checks:
runs-on: ubuntu-22.04
if: "!contains(github.event.head_commit.message, 'CI Bot')"
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 1

- name: Set up Python 3.10
- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.10"
python-version: "3.12"
cache: "pip"

- name: Install package and dependencies
run: |
python -m pip install --upgrade pip
python -m pip install ruff
python -m pip install .[docs]
pip install --upgrade pip
pip install ruff
pip install .[docs]
- name: Run linter and formatter checks using ruff
run: make checks
Expand Down
52 changes: 52 additions & 0 deletions .github/workflows/run-llm-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: LLM Tests
on: [pull_request]

jobs:

build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ "3.10", "3.11", "3.12" ]
env:
MONGO_ENABLED: true
LMDB_ENABLED: false
timeout-minutes: 60
if: "!contains(github.event.head_commit.message, 'CI Bot')"

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 1

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "pip"

- name: Show OS Info
run: '[[ "$OSTYPE" == "linux-gnu"* ]] && { echo "OS Type: Linux"; (command -v lsb_release &> /dev/null && lsb_release -a) || cat /etc/os-release; uname -r; } || [[ "$OSTYPE" == "darwin"* ]] && { echo "OS Type: macOS"; sw_vers; uname -r; } || echo "Unsupported OS type: $OSTYPE"'

- name: Start docker compose with redis
run: make services-mongo

- name: Upgrade pip
run: |
python -m pip install --upgrade pip
python --version
- name: Test LLM
run: bash .github/workflows/run_examples.sh examples true llm_complex/llm_test_runner.py

- name: Shut down docker compose
run: make services-stop-mongo

- name: Clean up
run: |
make clean
find /home/runner/runners/ -type f -name "*.log" -exec sh -c 'echo {}; >"{}"' \; || true
docker image prune -a -f
- name: List large files
run: find . -type f -exec du -h {} + | sort -h
9 changes: 5 additions & 4 deletions .github/workflows/run_examples.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ fi

# Function to run tests with common steps
run_test() {
test_path="${EXAMPLES_DIR}/${1}_example.py"
test_path="${EXAMPLES_DIR}/${1}"
test_type="$1"
with_mongo="$2"
echo "Test type=${test_type}"
Expand All @@ -39,6 +39,8 @@ run_test() {
pip install .[mongo] > /dev/null 2>&1
fi


# The following block is only needed to install special dependencies.
if [[ "$test_type" =~ "mlflow" ]]; then
echo "Installing mlflow"
pip install .[mlflow] > /dev/null 2>&1
Expand All @@ -53,6 +55,7 @@ run_test() {
pip install .[ml_dev] > /dev/null 2>&1
elif [[ "$test_type" =~ "llm_complex" ]]; then
echo "Installing ml_dev dependencies"
pip install .[dask] > /dev/null 2>&1
pip install .[ml_dev]
echo "Defining python path for llm_complex..."
export PYTHONPATH=$PYTHONPATH:${EXAMPLES_DIR}/llm_complex
Expand All @@ -62,15 +65,13 @@ run_test() {
echo "Running $test_path ..."
python "$test_path" | tee output.log
echo "Ok, ran $test_path."
# Check for errors in the output
if grep -iq "error" output.log; then
echo "Test $test_path failed! See output.log for details."
exit 1
fi

echo "Great, no errors to run $test_path."

# Clean up the log file
rm output.log
}

Expand All @@ -81,7 +82,7 @@ echo "Using examples directory: $EXAMPLES_DIR"
echo "With Mongo? ${WITH_MONGO}"

# Define the test cases
default_tests=("instrumented_simple" "instrumented_loop" "dask" "mlflow" "tensorboard" "single_layer_perceptron" "llm_complex/llm_main")
default_tests=("instrumented_simple_example.py" "instrumented_loop_example.py" "distributed_consumer_example.py" "dask_example.py" "mlflow_example.py" "tensorboard_example.py" "single_layer_perceptron_example.py" "llm_complex/llm_main_example.py")

# Use the third argument if provided, otherwise use default tests
if [[ -n "$3" ]]; then
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
**/*build*
**/*egg*
**/*pycache*
**/*dist*
#**/*dist*
**/*mlflow.db*
**/*mnist*
**/*tensorboard_events*
Expand All @@ -23,3 +23,4 @@ deployment/data
**/*output_data*
examples/llm_complex/input_data
tmp_tests/
nohup.out
8 changes: 2 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ clean:
find . -type f -name "*.pth" -exec rm -f {} \; || true
find . -type f -name "mlflow.db" -exec rm -f {} \; || true
find . -type d -name "mlruns" -exec rm -rf {} \; 2>/dev/null || true
find . -type d -name "mlruns" -exec rm -rf {} \; 2>/dev/null || true
find . -type d -name "__pycache__" -exec rm -rf {} \; 2>/dev/null || true
find . -type d -name "*tfevents*" -exec rm -rf {} \; 2>/dev/null || true
find . -type d -name "*output_data*" -exec rm -rf {} \; 2>/dev/null || true
# sphinx-build -M clean docs docs/_build This needs to be fixed.
find . -type f -name "*nohup*" -exec rm -rf {} \; 2>/dev/null || true
sphinx-build -M clean docs docs/_build > /dev/null 2>&1 || true

# Build the HTML documentation using Sphinx
.PHONY: docs
Expand Down Expand Up @@ -96,7 +96,3 @@ tests:
.PHONY: tests-notebooks
tests-notebooks:
pytest --nbmake "notebooks/" --nbmake-timeout=600 --ignore=notebooks/dask_from_CLI.ipynb

.PHONY: tests-all
tests-all:
pytest
76 changes: 76 additions & 0 deletions examples/distributed_consumer_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import os
import subprocess
import uuid
from time import sleep
from flowcept import Flowcept, FlowceptTask

def execute_cmd(command: str) -> int:
"""
Executes a command using nohup in the background and returns the process ID (PID).
Parameters
----------
command : str
The command to be executed.
Returns
-------
int
The PID of the background process.
"""
try:
# Append nohup and redirect outputs to /dev/null for background execution
nohup_command = f"nohup {command} > /dev/null 2>&1 & echo $!"
# Execute the command in a shell and capture the PID
print(f"Executing: {nohup_command}")
process = subprocess.run(nohup_command, shell=True, check=True, executable='/bin/bash', text=True, capture_output=True)
pid = int(process.stdout.strip()) # Get the PID from the output
print(f"Started process with PID: {pid}")
return pid
except subprocess.CalledProcessError as e:
print(f"Error executing command: {command}\n{e}")
return -1


def kill_process(pid: int) -> None:
"""
Kills a process by its PID.
Parameters
----------
pid : int
The PID of the process to be killed.
"""
try:
os.kill(pid, 9) # Send SIGKILL to the process
print(f"Process {pid} killed successfully.")
except ProcessLookupError:
print(f"No process found with PID: {pid}.")
except PermissionError:
print(f"Permission denied to kill PID: {pid}.")


def simple_flowcept_task(workflow_id):

with Flowcept(start_persistence=False, workflow_id=workflow_id, bundle_exec_id=workflow_id):
with FlowceptTask(used={"a": 1}) as t:
t.end(generated={"b": 2})


if __name__ == "__main__":

workflow_id = str(uuid.uuid4())
print(workflow_id)

pid = execute_cmd(f"python -c 'from flowcept import Flowcept; Flowcept.start_consumption_services(\"{workflow_id}\")'")
sleep(1)

simple_flowcept_task(workflow_id)

sleep(15) # Give enough time for the consumer services to do their thing

kill_process(pid)

tasks = Flowcept.db.query({"workflow_id": workflow_id})
assert len(tasks) == 1
print(tasks)
Loading

0 comments on commit e6b164a

Please sign in to comment.