Skip to content

Commit

Permalink
time to fine tune again. Not sure if I should use previous fine tune …
Browse files Browse the repository at this point in the history
…or just start over.
  • Loading branch information
thesteve0 committed Dec 11, 2024
1 parent 0cb24b2 commit 9137cc2
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 24 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,5 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
/predictions_round2/
/sp_photos_yolo11/
10 changes: 10 additions & 0 deletions 10_final_predictions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import fiftyone as fo
import os
import tempfile
import torch
from ultralytics import YOLO

if __name__ == '__main__':
print("starting")

print("finished")
8 changes: 2 additions & 6 deletions 6_fine_tuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
import tempfile
import torch
from ultralytics import YOLO
# from ultralytics import settings
import fiftyone as fo
# import wandb
from wandb.sdk.verify.verify import PROJECT_NAME

"""
Expand All @@ -21,20 +19,18 @@
Requires `ultralytics` and `fiftyone>=0.25.0` to be installed.
---------------
Steve says:
"I was going to to start with the imgsz =640x640 by resizing to 860x860 and then crop to 640x640. I can't
"//I was going to to start with the imgsz =640x640 by resizing to 860x860 and then crop to 640x640//. I can't
do this because the image size below is just a magic number we set with the Yolo library. It says it
will just resize to that size but doesn't tell us how. So instead, to start with, I am just setting this to 640.
I am also setting the model size to nano, but will bump it up over time.
I am also setting the model size to nano, but will bump it up over time. Settled on X - it doesn't take much longer and does better
"""


DATASET_NAME = 'labeled_dataset'
DEFAULT_MODEL_SIZE = "x"
DEFAULT_IMAGE_SIZE = 704
DEFAULT_EPOCHS = 12
PROJECT_NAME = 'sp_photos_yolo11'

# wandb.require("core")


def get_torch_device():
Expand Down
57 changes: 39 additions & 18 deletions 8_cleaning_ground_truth_round2.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
import fiftyone as fo
import argparse
from fiftyone import ViewField as F

"""
This script needs to accept arguments because we don't want to do all the tasks every time we run it.
* at the end means it happens in the app, not here in code
So what we do is:
0. load in the new dataset - "second_play_photos"
1. Create a new field for ground truth
2. One at a time, go through each of the categories and just view that category
3. If there is a mislabel, then fix it in the ground truth field.
4. Once all the images have been fixed for that category, head on to the next category
5. When finished all the categories, anything that has a blank ground truth field should have the ground
1. * Go through the samples and if the prediction is incorrect then add a tag with the correct field
2. After first pass look at all teh untagged images and make sure they are correct:
```
from fiftyone import ViewField as F
view = dataset.match(F("tags") == [])
```
5. When finished all the images, anything that has a blank tag should have the ground
truth field set = to the prediction field.
Then we are ready to do our final training/test
Then final predictions
Expand All @@ -29,28 +34,44 @@
dataset = fo.load_dataset(DATASET_NAME)

parser = argparse.ArgumentParser()
parser.add_argument("-g", help="specify to create ground truth field", action="store_true")
# parser.add_argument("-g", help="specify to create ground truth field", action="store_true")
parser.add_argument("-p", help="specify to move correct predictions to ground truth field",
action="store_true")
args = parser.parse_args()

if args.g:
print("make the field")
# if args.g:
# print("make the field")
# if dataset.has_field("ground_truth"):
# dataset.delete_sample_field("ground_truth")
# dataset.add_sample_field(
# "ground_truth",
# fo.EmbeddedDocumentField,
# embedded_doc_type=fo.Classification,
# )
# dataset.save()
if args.p:
# Add ground_truth label to dataset
if dataset.has_field("ground_truth"):
dataset.delete_sample_field("ground_truth")
dataset.add_sample_field(
# Must initialize new `Label` values via `set_values()`
# This basically creates a Classification for each sample in the data set which just contains an id
dataset.set_values(
"ground_truth",
fo.EmbeddedDocumentField,
embedded_doc_type=fo.Classification,
[fo.Classification() for _ in range(len(dataset))],
)
dataset.save()
elif args.p:
for sample in dataset.iter_samples(autosave=True):
if sample["ground_truth"] is None:
sample["ground_truth"] = sample["prediction"]

# Expression that grabs first tag if one exists, else falls back to the predicted label
# We need an expression for set_field
# This is basically saying "Test if tags is not empty, if that is true then return the first tag, else return the prediction label
label_expr = (F("$tags") != []).if_else(
F("$tags")[0],
F("$prediction.label"),
)

# Apply changes
dataset.set_field("ground_truth.label", label_expr).save()
else:
print("you need to specify either -g or -p")
# print("you need to specify either -g or -p")
print("you need to specify -p")

print("done")

29 changes: 29 additions & 0 deletions 9_final_fine_tuning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import fiftyone as fo
import os
import tempfile
import torch
from ultralytics import YOLO

FIRST_TRAINING = "low_quality_first_labeled_dataset"
SECOND_TRAINING = ""

DATASET_NAME = 'labeled_dataset'
DEFAULT_MODEL_SIZE = "x"
DEFAULT_IMAGE_SIZE = 704
DEFAULT_EPOCHS = 12
PROJECT_NAME = 'sp_photos_yolo11'



def get_torch_device():
if torch.cuda.is_available():
return torch.device("cuda")
elif torch.backends.mps.is_available():
return torch.device("mps")
else:
return torch.device("cpu")

if __name__ == '__main__':
print("starting")

print("finished")
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
@@TODO I need to rename the datasets, and their references in code, to make more sense
The way to rename a dataset is
```python
import fiftyone as fo
dataset = fo.load_dataset("foo")
dataset.name = "footwo"
```

# voxel-photo-album

Here are the general steps we want to accomplish
Expand Down

0 comments on commit 9137cc2

Please sign in to comment.