-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added upload and evaluation scripts with simple instructions
- Loading branch information
Showing
8 changed files
with
1,613 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. | ||
|
||
# dependencies | ||
/node_modules | ||
/.pnp | ||
.pnp.js | ||
|
||
# testing | ||
/coverage | ||
|
||
# next.js | ||
/.next/ | ||
/out/ | ||
|
||
# production | ||
/build | ||
|
||
# misc | ||
.DS_Store | ||
*.pem | ||
db.sqlite3 | ||
|
||
# debug | ||
npm-debug.log* | ||
yarn-debug.log* | ||
yarn-error.log* | ||
|
||
# local env files | ||
.env*.local | ||
/api/gl_config.json | ||
/api/*.json | ||
/api/external | ||
|
||
# vercel | ||
.vercel | ||
|
||
# typescript | ||
*.tsbuildinfo | ||
next-env.d.ts | ||
|
||
# python | ||
__pycache__ | ||
|
||
# vim | ||
*.swp | ||
|
||
.env |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,79 @@ | ||
# model-evaluation-tool | ||
A simple tool for evaluating the performance of your Groundlight ML model | ||
# Model Evaluation Tool | ||
A simple tool for evaluating the performance of your Groundlight Binary ML model | ||
|
||
## Installation | ||
|
||
The dependencies for this script can be installed through either using poetry (recommended) or `requirements.txt`. | ||
|
||
Using poetry | ||
|
||
```bash | ||
poetry install | ||
``` | ||
|
||
Using `requirements.txt` | ||
```bash | ||
pip install -r requirements.txt | ||
``` | ||
|
||
## Usage | ||
|
||
### Setting Up Your Account | ||
|
||
To train a ML model, make sure to create a binary detector on the [Online Dashboard](https://dashboard.groundlight.ai/). | ||
|
||
You will also need to create an API Token to start uploading images to the account. You can go [here](https://dashboard.groundlight.ai/reef/my-account/api-tokens) to create one. | ||
|
||
After you have created your API token, add the token to your terminal as an variable: | ||
|
||
```bash | ||
export GROUNDLIIGHT_API_TOKEN="YOUR_API_TOKEN" | ||
``` | ||
|
||
### Formatting Dataset | ||
|
||
To train or evaluate the ML model with your custom dataset, structure your dataset into the following format: | ||
|
||
```bash | ||
└── dataset | ||
├── dataset.csv | ||
└── images | ||
├── 1.jpg | ||
├── 10.jpg | ||
├── 11.jpg | ||
├── 12.jpg | ||
├── 13.jpg | ||
├── 14.jpg | ||
``` | ||
|
||
The `dataset.csv` file should have two columns: image_name and label (YES/NO), for example: | ||
|
||
```bash | ||
1.jpg,YES | ||
11.jpg,NO | ||
12.jpg,YES | ||
13.jpg,YES | ||
14.jpg,NO | ||
``` | ||
|
||
The corresponding image file should be placed inside the `images` folder. | ||
|
||
### Training the Detector | ||
|
||
To train the ML model for a detector, simply run the script `label-upload.py` with the following arguments: | ||
|
||
```bash | ||
poetry run python label-upload.py --detector-id YOUR_DETECTOR_ID --dataset PATH_TO_DATASET_TRAIN_FOLDER | ||
``` | ||
|
||
Optionally, set the `--delay` argument to prevent going over the throttling limit of your account. | ||
|
||
### Evaluate the Detector | ||
|
||
To evaluate the ML model performance for a detector, simply run the script `evaluate-accuracy.py` with the following arguments: | ||
|
||
```bash | ||
poetry run python evaluate-accuracy.py --detector-id YOUR_DETECTOR_ID --dataset PATH_TO_DATASET_TEST_FOLDER | ||
``` | ||
|
||
Optionally, set the `--delay` argument to prevent going over the throttling limit of your account. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
A script to evaluate the accuracy of a detector on a given dataset. | ||
It will upload the images to the detector and compare the predicted labels with the ground truth labels. | ||
You can specify the delay between uploads. | ||
""" | ||
|
||
import argparse | ||
import os | ||
import PIL | ||
import time | ||
import PIL.Image | ||
import pandas as pd | ||
import logging | ||
|
||
from groundlight import Groundlight, Detector, BinaryClassificationResult | ||
from tqdm.auto import tqdm | ||
|
||
logger = logging.getLogger(__name__) | ||
logging.basicConfig(level=logging.INFO) | ||
|
||
|
||
def upload_image(gl: Groundlight, detector: Detector, image: PIL) -> BinaryClassificationResult: | ||
""" | ||
Upload a image with a label to a detector. | ||
Args: | ||
gl: The Groundlight object. | ||
detector: The detector to upload to. | ||
image: The image to upload. | ||
Returns: | ||
The predicted label (YES/NO). | ||
""" | ||
|
||
# Convert image to jpg if not already | ||
if image.format != "JPEG": | ||
image = image.convert("RGB") | ||
|
||
# Use ask_ml to upload the image and then add the label to the image query | ||
iq = gl.ask_ml(detector=detector, image=image) | ||
return iq.result | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser(description="Evaluate the accuracy of a detector on a given dataset.") | ||
parser.add_argument("--detector-id", type=str, required=True, help="The ID of the detector to evaluate.") | ||
parser.add_argument("--dataset", type=str, required=True, help="The folder containing the dataset.csv and images folder") | ||
parser.add_argument("--delay", type=float, required=False, default=0.1, help="The delay between uploads.") | ||
args = parser.parse_args() | ||
|
||
gl = Groundlight() | ||
detector = gl.get_detector(args.detector_id) | ||
|
||
# Load the dataset from the CSV file and images from the images folder | ||
# The CSV file should have two columns: image_name and label (YES/NO) | ||
|
||
dataset = pd.read_csv(os.path.join(args.dataset, "dataset.csv")) | ||
images = os.listdir(os.path.join(args.dataset, "images")) | ||
|
||
logger.info(f"Evaluating {len(dataset)} images on detector {detector.name} with delay {args.delay}.") | ||
|
||
# Record the number of correct predictions | ||
# Also record the number of false positives and false negatives | ||
correct = 0 | ||
total_processed = 0 | ||
false_positives = 0 | ||
false_negatives = 0 | ||
average_confidence = 0 | ||
|
||
for image_name, label in tqdm(dataset.values): | ||
if image_name not in images: | ||
logger.warning(f"Image {image_name} not found in images folder.") | ||
continue | ||
|
||
if label not in ["YES", "NO"]: | ||
logger.warning(f"Invalid label {label} for image {image_name}. Skipping.") | ||
continue | ||
|
||
image = PIL.Image.open(os.path.join(args.dataset, "images", image_name)) | ||
result = upload_image(gl=gl, detector=detector, image=image) | ||
|
||
if result.label == label: | ||
correct += 1 | ||
elif result.label == "YES" and label == "NO": | ||
false_positives += 1 | ||
elif result.label == "NO" and label == "YES": | ||
false_negatives += 1 | ||
|
||
average_confidence += result.confidence | ||
total_processed += 1 | ||
|
||
time.sleep(args.delay) | ||
|
||
# Calculate the accuracy, precision, and recall | ||
accuracy = correct / total_processed if total_processed > 0 else 0 | ||
precision = correct / (correct + false_positives) if correct + false_positives > 0 else 0 | ||
recall = correct / (correct + false_negatives) if correct + false_negatives > 0 else 0 | ||
|
||
logger.info(f"Processed {total_processed} images.") | ||
logger.info(f"Correct: {correct}/{total_processed}") | ||
logger.info(f"Average Confidence: {average_confidence / total_processed:.2f}") | ||
logger.info(f"False Positives: {false_positives}") | ||
logger.info(f"False Negatives: {false_negatives}") | ||
logger.info(f"Accuracy: {accuracy:.2f}") | ||
logger.info(f"Precision: {precision:.2f}") | ||
logger.info(f"Recall: {recall:.2f}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
A script to upload frames with labels to a detector in a controlled manner. | ||
You can specify the delay between uploads. | ||
""" | ||
|
||
import argparse | ||
import os | ||
import PIL | ||
import time | ||
import PIL.Image | ||
import pandas as pd | ||
import logging | ||
|
||
from groundlight import Groundlight, Detector | ||
from tqdm.auto import tqdm | ||
|
||
logger = logging.getLogger(__name__) | ||
logging.basicConfig(level=logging.INFO) | ||
|
||
|
||
def upload_image(gl: Groundlight, detector: Detector, image: PIL, label: str) -> None: | ||
""" | ||
Upload a image with a label to a detector. | ||
Args: | ||
gl: The Groundlight object. | ||
detector: The detector to upload to. | ||
image: The image to upload. | ||
label: The label to upload. | ||
""" | ||
|
||
# Convert image to jpg if not already | ||
if image.format != "JPEG": | ||
image = image.convert("RGB") | ||
|
||
if label not in ["YES", "NO"]: | ||
raise ValueError(f"Invalid label: {label}, must be 'YES' or 'NO'.") | ||
|
||
# Use ask_ml to upload the image and then add the label to the image query | ||
iq = gl.ask_ml(detector=detector, image=image) | ||
gl.add_label(image_query=iq, label=label) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser(description="Upload images with labels to a detector.") | ||
parser.add_argument("--detector-id", type=str, required=True, help="The ID of the detector to upload to.") | ||
parser.add_argument("--dataset", type=str, required=True, help="The folder containing the dataset.csv and images folder") | ||
parser.add_argument("--delay", type=float, required=False, default=0.1, help="The delay between uploads.") | ||
args = parser.parse_args() | ||
|
||
gl = Groundlight() | ||
detector = gl.get_detector(args.detector_id) | ||
|
||
# Load the dataset from the CSV file and images from the images folder | ||
# The CSV file should have two columns: image_name and label (YES/NO) | ||
|
||
dataset = pd.read_csv(os.path.join(args.dataset, "dataset.csv")) | ||
images = os.listdir(os.path.join(args.dataset, "images")) | ||
|
||
logger.info(f"Uploading {len(dataset)} images to detector {detector.name} with delay {args.delay}.") | ||
|
||
for image_name, label in tqdm(dataset.values): | ||
if image_name not in images: | ||
logger.warning(f"Image {image_name} not found in images folder.") | ||
continue | ||
|
||
image = PIL.Image.open(os.path.join(args.dataset, "images", image_name)) | ||
upload_image(gl=gl, detector=detector, image=image, label=label) | ||
time.sleep(args.delay) | ||
|
||
logger.info("Upload complete. Please wait around 10 minutes for the detector to retrain.") |
Oops, something went wrong.