Merge pull request #192 from ORNL/dev

Dev
ORNL · Jan 3, 2025 · 4cb1385 · 4cb1385
2 parents 60d7c15 + ea346d2
commit 4cb1385
Show file tree

Hide file tree

Showing 38 changed files with 1,106 additions and 8,686 deletions.
diff --git a/.github/workflows/run-tests-py313.yml b/.github/workflows/run-tests-py313.yml
@@ -0,0 +1,59 @@
+name: Tests on Py313
+on: [pull_request]
+
+jobs:
+
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [ "3.13" ]
+    env:
+      MONGO_ENABLED: true
+      LMDB_ENABLED: false
+    timeout-minutes: 60
+    if: "!contains(github.event.head_commit.message, 'CI Bot')"
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: "pip"
+
+      - name: Show OS Info
+        run: '[[ "$OSTYPE" == "linux-gnu"* ]] && { echo "OS Type: Linux"; (command -v lsb_release &> /dev/null && lsb_release -a) || cat /etc/os-release; uname -r; } || [[ "$OSTYPE" == "darwin"* ]] && { echo "OS Type: macOS"; sw_vers; uname -r; } || echo "Unsupported OS type: $OSTYPE"'
+
+      - name: Start docker compose with redis
+        run: make services-mongo
+
+      - name: Upgrade pip
+        run: |
+          python -m pip install --upgrade pip
+          python --version
+
+      - name: Install dependencies that work on py3.13
+        run: |
+          pip install . --no-deps
+          pip install flask-restful msgpack omegaconf pandas psutil py-cpuinfo redis requests pyarrow
+          pip install .[mongo,analytics,dask,docs,kafka,mlflow,dev]
+
+      - name: List installed packages
+        run: pip list
+
+      - name: Test with pytest and redis, ignoring the ones that (as of the day I am writing this) don't work on py3.13)
+        run: |
+          pytest  --ignore=tests/adapters/test_tensorboard.py --ignore tests/decorator_tests/ml_tests/
+
+      - name: Shut down docker compose
+        run: make services-stop-mongo
+
+      - name: Clean up
+        run: |
+          make clean
+          find /home/runner/runners/ -type f -name "*.log" -exec sh -c 'echo {}; >"{}"' \; || true
+          docker image prune -a -f
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -35,7 +35,9 @@ jobs:
         run: make services-mongo
 
       - name: Upgrade pip
-        run: python -m pip install --upgrade pip
+        run: |  
+          python -m pip install --upgrade pip
+          python --version
 
       - name: Test examples
         run: bash .github/workflows/run_examples.sh examples true # with mongo

diff --git a/.github/workflows/run_examples.sh b/.github/workflows/run_examples.sh
@@ -6,13 +6,14 @@ set -o pipefail
 
 # Display usage/help message
 usage() {
-  echo -e "\nUsage: $0 <examples_dir> <with_mongo>\n"
+  echo -e "\nUsage: $0 <examples_dir> <with_mongo> [tests]\n"
   echo "Arguments:"
   echo "  examples_dir   Path to the examples directory (Mandatory)"
   echo "  with_mongo     Boolean flag (true/false) indicating whether to include MongoDB support (Mandatory)"
+  echo "  tests          Optional array of test cases to run (e.g., \"test1 test2 test3\")"
   echo -e "\nExample:"
   echo "  $0 examples true"
-  echo "  $0 examples false"
+  echo "  $0 examples false \"test1 test2\""
   exit 1
 }
 
@@ -80,7 +81,16 @@ echo "Using examples directory: $EXAMPLES_DIR"
 echo "With Mongo? ${WITH_MONGO}"
 
 # Define the test cases
-tests=("instrumented_simple" "instrumented_loop" "dask" "mlflow" "tensorboard" "single_layer_perceptron" "llm_complex/llm_main")
+default_tests=("instrumented_simple" "instrumented_loop" "dask" "mlflow" "tensorboard" "single_layer_perceptron" "llm_complex/llm_main")
+
+# Use the third argument if provided, otherwise use default tests
+if [[ -n "$3" ]]; then
+  eval "tests=($3)"
+else
+  tests=("${default_tests[@]}")
+fi
+
+echo "Running the following tests: ${tests[*]}"
 
 # Iterate over the tests and run them
 for test_ in "${tests[@]}"; do

diff --git a/.gitignore b/.gitignore
@@ -22,3 +22,4 @@ tmp/
 deployment/data
 **/*output_data*
 examples/llm_complex/input_data
+tmp_tests/
diff --git a/README.md b/README.md
@@ -143,18 +143,20 @@ Data stored in MongoDB and LMDB are interchangeable. You can switch between them
 
 ## Performance Tuning for Performance Evaluation
 
-In the settings.yaml file, the following variables might impact interception performance:
+In the settings.yaml file, many variables may impact interception efficiency. 
+For fastest performance, configure the settings.yaml:
 
-```yaml
-main_redis:
-  buffer_size: 50
-  insertion_buffer_time_secs: 5
-
-plugin:
-  enrich_messages: false
 ```
+project:
+  replace_non_json_serializable: false # Here it will assume that all captured data are JSON serializable
+  db_flush_mode: offline               # This disables the feature of runtime analysis in the database.
+mq:
+  chunk_size: -1                       # This disables chunking the messages to be sent to the MQ. Use this only if the main memory of the compute notes is large enough.
+```
+
+And use the most lightweight capture option available for the adapter or instrumentation.
 
-And other variables depending on the Plugin. For instance, in Dask, timestamp creation by workers add interception overhead. As we evolve the software, other variables that impact overhead appear and we might not stated them in this README file yet. If you are doing extensive performance evaluation experiments using this software, please reach out to us (e.g., create an issue in the repository) for hints on how to reduce the overhead of our software.
+Other variables depending on the adapter may impact too. For instance, in Dask, timestamp creation by workers add interception overhead. As we evolve the software, other variables that impact overhead appear and we might not stated them in this README file yet. If you are doing extensive performance evaluation experiments using this software, please reach out to us (e.g., create an issue in the repository) for hints on how to reduce the overhead of our software.
 
 ## Install AMD GPU Lib
 

diff --git a/examples/instrumented_loop_example.py b/examples/instrumented_loop_example.py
@@ -15,4 +15,4 @@
         loop.end_iter({"item": item, "loss": loss})
 
 docs = Flowcept.db.query(filter={"workflow_id": Flowcept.current_workflow_id})
-assert len(docs) == iterations + 1  # The whole loop itself is a task
+assert len(docs) == iterations
diff --git a/examples/llm_complex/custom_provenance_id_mapping.yaml b/examples/llm_complex/custom_provenance_id_mapping.yaml
@@ -0,0 +1,35 @@
+activity_id:
+  epochs_loop_iteration:
+    epoch: task['used']['epoch']
+    model_train: parent['task_id']
+  train_batch_iteration:
+    query:
+        collection: 'workflows'
+        filter:
+          generated.dataset_ref: primogenitor['used']['dataset_ref']
+        projection: ['name', 'workflow_id', 'used.tokenizer_type', 'used.train_batch_size', 'used.subset_size', 'generated.train_n_batches' ]
+    step: train
+    batch: task['used']['i']
+    data_path: primogenitor['used']['train_data_path']
+    batch_size: primogenitor['used']['batch_size']
+    epoch: parent['used']['epoch']
+    model_train: grandparent['task_id']
+  eval_batch_iteration:
+    query:
+        collection: 'workflows'
+        filter:
+          generated.dataset_ref: primogenitor['used']['dataset_ref']
+        projection: ['name', 'workflow_id', 'used.tokenizer_type', 'used.eval_batch_size', 'used.subset_size', 'generated.eval_n_batches' ]
+    step: eval
+    batch: task['used']['i']
+    data_path: primogenitor['used']['val_data_path']
+    batch_size: primogenitor['used']['eval_batch_size']
+    epoch: parent['used']['epoch']
+    model_train: grandparent['task_id']
+subtype:
+  parent_forward:
+    model: task['activity_id']
+    extend: parent['custom_characterization']
+  child_forward:
+    module: task['activity_id']
+    extend: parent['custom_characterization']