all done - just need some cleanup of instructions and dataset names

thesteve0 · Dec 17, 2024 · b71e4b0 · b71e4b0
1 parent eff09ae
commit b71e4b0
Show file tree

Hide file tree

Showing 3 changed files with 217 additions and 8 deletions.
diff --git a/10_final_predictions.py b/10_final_predictions.py
@@ -1,10 +1,118 @@
 import fiftyone as fo
-import os
 import tempfile
 import torch
+from fiftyone import Classification
 from ultralytics import YOLO
 
+"""
+We have trained the model on the < 5% of the original data. 
+Now we are going to take this newly trained model and train it some more on another chunk of the data.
+We are going to have the new model predict the new images and then correct it to the ground truth again.
+
+First step is going to be splitting out more data that is not in the first training set. 
+Then run the new model over it producing the new predictions
+"""
+
+# This is the number we need to make our total sampled data = 10%
+FIRST_TRAINING = "first_labeled_dataset"
+SECOND_TRAINING = "second_labeled_dataset"
+FINAL_OUTPUT = "final_predicted_photos"
+IMAGE_SIZE = 704
+MODEL_LOCATION = "/home/spousty/git/voxel-photo-album/sp_final_training_photos_yolo11/output/weights/best.pt"
+
+
+def get_torch_device():
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    elif torch.backends.mps.is_available():
+        return torch.device("mps")
+    else:
+        return torch.device("cpu")
+
+
+def load_data(datasetname) -> fo.Dataset:
+    return fo.load_dataset(datasetname)
+
+
+def split_again(dataset_with_orig, first_training) -> fo.Dataset:
+    # Take a random sample from the original data, excluding the first set of sample data
+    # sample ID is not generally a good unique ID. For 51, filepath is what you should use when working
+    # with the same dataset over and over again
+    if "second_play_photos" in fo.list_datasets():
+        fo.delete_dataset("second_play_photos")
+    return dataset_with_orig.exclude_by("filepath", first_training.values("filepath"))
+
+
+def run_predictions(dataset):
+    model = YOLO(MODEL_LOCATION)  # load a custom model
+    # naive_model = YOLO(f"yolo11x-cls.pt")
+
+    # "export" our sample images to disk - symlink
+    data_dir = tempfile.mkdtemp()
+    dataset.export(export_dir=data_dir, dataset_type=fo.types.ImageDirectory, export_media="symlink")
+
+    # Predict with the model
+    results = model(
+        source=data_dir,
+        device=get_torch_device(),
+        imgsz=IMAGE_SIZE,
+        stream=True,
+        # save=True,
+        project="final_predictions"
+    )
+
+    return results
+
+
+def extract_orig_path(dataset):
+    sample = dataset.first()
+    result = sample.filepath.replace(sample.filename, "")
+    return result
+
+def merge_all_training_data():
+    first_data = fo.load_dataset(FIRST_TRAINING).view().clone()
+    second_data_view = fo.load_dataset(SECOND_TRAINING).view()
+    first_data.add_samples(second_data_view)
+    return first_data
+
+
 if __name__ == '__main__':
     print("starting")
+    whole_dataset = load_data("photo_album")
+    original_path = extract_orig_path(whole_dataset)
+    all_training_data = merge_all_training_data()
+    if FINAL_OUTPUT in fo.list_datasets():
+        fo.delete_dataset(FINAL_OUTPUT)
+    remaining_photos = split_again(whole_dataset, all_training_data).clone(FINAL_OUTPUT, persistent=True)
+    print("about to predict")
+    results = run_predictions(remaining_photos)
+
+    # Convert the results list into a dict so we can iterate through the dataset rather than
+    # the results. This will allow for faster saves to the dataset rather than individual sample by sample saves
+    # Using the images name without the path as the key
+    # This is causing an OOM kill - I think there are too many objects in each results object
+    results_dict = {}
+    result = {}
+    for x in results:
+        file_name = x.path[x.path.rfind("/") + 1:]
+        label = x.names[x.probs.top1]
+        confidence = float(round(x.probs.top1conf.item(),2))
+        result = {"label": label, "confidence": confidence}
+        results_dict[file_name] = result
+
+    # results_dict = {: x for x in results}
+
+    # Now for each sample in the new dataset, add the prediction
+    for sample in remaining_photos.iter_samples(progress=True, autosave=True):
+        filename = sample.filename
+        res = results_dict[filename]
+        predicted_class = Classification(label=res["label"],
+                                         confidence=res["confidence"])
+        sample["prediction"] = predicted_class
+
+    # Display new dataset and hold it open with a wait()
+    #session = fo.launch_app(remaining_photos)
+    #session.wait()
+    print("Done")
+
 
-    print("finished")
diff --git a/9_final_fine_tuning.py b/9_final_fine_tuning.py
@@ -4,15 +4,22 @@
 import torch
 from ultralytics import YOLO
 
-FIRST_TRAINING = "low_quality_first_labeled_dataset"
-SECOND_TRAINING = ""
+FIRST_TRAINING = "first_labeled_dataset"
+SECOND_TRAINING = "second_labeled_dataset"
 
-DATASET_NAME = 'labeled_dataset'
-DEFAULT_MODEL_SIZE = "x"
+DATASET_NAME = ''
+DEFAULT_MODEL_SIZE = "l"
 DEFAULT_IMAGE_SIZE = 704
-DEFAULT_EPOCHS = 12
-PROJECT_NAME = 'sp_photos_yolo11'
+DEFAULT_EPOCHS = 16
+SAVE_RESULTS = True
+PROJECT_NAME = 'sp_final_training_photos_yolo11'
 
+def merge_datasets():
+    first_data = fo.load_dataset(FIRST_TRAINING).clone()
+    second_data = fo.load_dataset(SECOND_TRAINING)
+    first_data.add_samples(second_data.view())
+    return first_data
+    print("Done Merging")
 
 
 def get_torch_device():
@@ -23,8 +30,79 @@ def get_torch_device():
     else:
         return torch.device("cpu")
 
+def train_classifier(
+        dataset_name=None,
+        model_size=DEFAULT_MODEL_SIZE,
+        image_size=DEFAULT_IMAGE_SIZE,
+        epochs=DEFAULT_EPOCHS,
+        project_name="mislabel_confidence_noise",
+        gt_field="ground_truth",
+        train_split=None,
+        test_split=None,
+        **kwargs
+):
+
+    # settings.update({"wandb": False})
+    if dataset_name:
+        dataset = fo.load_dataset(dataset_name)
+        dataset.take(round(0.3 * len(dataset))).tag_samples("test")
+        dataset.match_tags("test", bool=False).tag_samples("train")
+        train = dataset.match_tags("train")
+        test = dataset.match_tags("test")
+    else:
+        train = train_split
+        test = test_split
+
+    if model_size is None:
+        model_size = "s"
+    elif model_size not in ["n", "s", "m", "l", "x"]:
+        raise ValueError("model_size must be one of ['n', 's', 'm', 'l', 'x']")
+
+    splits_dict = {
+        "train": train,
+        "val": test,
+        "test": test,
+    }
+
+    data_dir = tempfile.mkdtemp()
+
+    for key, split in splits_dict.items():
+        split_dir = os.path.join(data_dir, key)
+        os.makedirs(split_dir)
+        split.export(
+            export_dir=split_dir,
+            dataset_type=fo.types.ImageClassificationDirectoryTree,
+            label_field=gt_field,
+            export_media="symlink",
+        )
+
+    # Load a pre-trained YOLOv8 model for classification
+    model = YOLO(f"yolo11{model_size}-cls.pt")
+
+    # Train the model
+    model.train(
+        data=data_dir,  # Path to the dataset
+        epochs=epochs,  # Number of epochs
+        imgsz=image_size,  # Image size
+        device=get_torch_device(),
+        save = SAVE_RESULTS,
+        name = "output",
+        exist_ok = True,
+        project=project_name,
+    )
+
+    return model
+
 if __name__ == '__main__':
     print("starting")
+    merged_data = merge_datasets()
+    train_classifier(
+        dataset_name=merged_data.name,
+        # model_size=args.model_size,
+        # image_size=args.image_size,
+        # epochs=args.epochs,
+        project_name=PROJECT_NAME,
+    )
 
     """
     PREP DATA

diff --git a/README.md b/README.md
@@ -1,3 +1,26 @@
+I got lost in the challenge of finding good classes and forgot my main goal.
+My goals was to quickly find the raw images I wanted to work with for production.
+This is not, given photos of dogs, which breed is it. This is, here is a general photo that could be of anything, do I care 
+about it.
+
+I only need minimal classes for that:
+1. I could simply do - People vs Not people
+2. Or I could do people, animal, plant, landscape, buildings
+
+People means there is a person in the photo. Even if the people are small or singular in the picture it should be considered 
+a people picture.
+
+For the second scheme, this will be more dependent on my deciding what is the intended subject of the photo. For example,
+a plant with an insect centered on it would be insect. 
+
+THere is an open question about how to handle photos that I am just not interested in, like a picture of a box or a picture from
+a doorway but just is not interesting for development. There are no people in the picture and there is not one of the classes I am interested in. I think this is basically teaching the model my concept of uninteresting. 
+
+Doing it with the 16 classes lead to overlapping categories and unclean labeling. This in turn led to poor model performance
+but for "understandble" reasons. This was not a problem with the models but a problem with data prep
+
+Markus gave the suggestion - which is a really good one - to run this in two stages. First, train a model for people not people. Then for the not-people use that data to train a multi-class model
+
 @@TODO I need to rename the datasets, and their references in code, to make more sense
 The way to rename a dataset is
 ```python