Skip to content

Commit

Permalink
more progress
Browse files Browse the repository at this point in the history
  • Loading branch information
thesteve0 committed Dec 10, 2024
1 parent cdd6a71 commit 0cb24b2
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 10 deletions.
6 changes: 6 additions & 0 deletions 5_clean_ground_truth.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
import fiftyone as fo

"""
This almost all happens in the client (at least it did the first time). Will need to do this again so will
update doc here as appropriate
"""


if __name__ == '__main__':
fo.load_dataset("labeled_dataset")
38 changes: 29 additions & 9 deletions 7_new_predictions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import fiftyone as fo
import tempfile
import torch
from safetensors.torch import save_model
from fiftyone import Classification
from ultralytics import YOLO

"""
Expand Down Expand Up @@ -33,8 +33,11 @@ def load_data(datasetname) -> fo.Dataset:

def split_again(dataset_with_orig, first_training) -> fo.Dataset:
# Take a random sample from the original data, excluding the first set of sample data
# We
return dataset_with_orig.exclude(first_training).take(SAMPLES_TO_TAKE)
# sample ID is not generally a good unique ID. For 51, filepath is what you should use when working
# with the same dataset over and over again
if "second_play_photos" in fo.list_datasets():
fo.delete_dataset("second_play_photos")
return dataset_with_orig.exclude_by("filepath", first_training.values("filepath")).take(SAMPLES_TO_TAKE).clone("second_play_photos", persistent=True)

def run_predictions(dataset):
model = YOLO(MODEL_LOCATION) # load a custom model
Expand All @@ -50,7 +53,7 @@ def run_predictions(dataset):
device=get_torch_device(),
imgsz=IMAGE_SIZE,
stream=True,
save=True,
# save=True,
project="predictions_round2",
name="write_something"
)
Expand All @@ -68,19 +71,36 @@ def run_predictions(dataset):
# return [results, naive_results]
return results

def extract_orig_path(dataset):
sample = dataset.first()
result = sample.filepath.replace(sample.filename, "")
return result


if __name__ == '__main__':
print("starting")
whole_dataset = load_data("photo_album")
original_path = extract_orig_path(whole_dataset)
first_training = load_data("labeled_dataset")
candidate_data = split_again(whole_dataset,first_training)
print("about to predict")
results = run_predictions(candidate_data)
for r in results:
# we save the data to a 51 dataset. Now that we are done exploring, I think we might want to clone above.
r.save()
# for r2 in results[1]:
# r2.save()

# Convert the results list into a dict so we can iterate through the dataset rather than
# the results. This will allow for faster saves to the dataset rather than individual sample by sample saves
# Using the images name without the path as the key
results_dict = {x.path[x.path.rfind("/")+1:]:x for x in results}

# Now for each sample in the new dataset, add the prediction
for sample in candidate_data.iter_samples(progress=True, autosave=True):
filename = sample.filename
res = results_dict[filename]
predicted_class = Classification(label=res.names[res.probs.top1], confidence=round(res.probs.top1conf.item(), 2))
sample["prediction"] = predicted_class

# Display new dataset and hold it open with a wait()
session = fo.launch_app(candidate_data)
session.wait()
print("Done")


56 changes: 56 additions & 0 deletions 8_cleaning_ground_truth_round2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import fiftyone as fo
import argparse

"""
This script needs to accept arguments because we don't want to do all the tasks every time we run it.
* at the end means it happens in the app, not here in code
So what we do is:
0. load in the new dataset - "second_play_photos"
1. Create a new field for ground truth
2. One at a time, go through each of the categories and just view that category
3. If there is a mislabel, then fix it in the ground truth field.
4. Once all the images have been fixed for that category, head on to the next category
5. When finished all the categories, anything that has a blank ground truth field should have the ground
truth field set = to the prediction field.
Then we are ready to do our final training/test
Then final predictions
To do the ground_truth updates use the console
import fiftyone as fo
dataset = fo.load_dataset("second_play_photos")
session = fo.launch_app(dataset)
"""
DATASET_NAME = "second_play_photos"

if __name__ == '__main__':
print("starting")

dataset = fo.load_dataset(DATASET_NAME)

parser = argparse.ArgumentParser()
parser.add_argument("-g", help="specify to create ground truth field", action="store_true")
parser.add_argument("-p", help="specify to move correct predictions to ground truth field",
action="store_true")
args = parser.parse_args()

if args.g:
print("make the field")
if dataset.has_field("ground_truth"):
dataset.delete_sample_field("ground_truth")
dataset.add_sample_field(
"ground_truth",
fo.EmbeddedDocumentField,
embedded_doc_type=fo.Classification,
)
dataset.save()
elif args.p:
for sample in dataset.iter_samples(autosave=True):
if sample["ground_truth"] is None:
sample["ground_truth"] = sample["prediction"]

else:
print("you need to specify either -g or -p")

print("done")

22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,24 @@ Here are the general steps we want to accomplish

## Order to run things
1. First run make_dataset.py to make the dataset in FiftyOne
2. Now run make_embeddings.py to create all the embeddings
2. Now run make_embeddings.py to create all the embeddings

#### The classes I am going for
"boy"
"girl"
"man"
"woman"
"people"
"dog"
"cat"
"bird"
"insect"
"monkey"
"crustacean",
"fish"
"animal"
"plant"
"flower"
"landscape"
"architecture"
"not an animal, plant, landscape, person, or building"

0 comments on commit 0cb24b2

Please sign in to comment.