Structure report data

lightly-ai · Nov 8, 2023 · 0498841 · 0498841
1 parent f2a59f8
commit 0498841
Show file tree

Hide file tree

Showing 4 changed files with 147 additions and 87 deletions.
diff --git a/compute_prototype.py → example.py b/compute_prototype.py → example.py
@@ -8,7 +8,7 @@
 def main() -> None:
     # Analyze an image folder.
     image_folder = Path("/Users/michal/datasets/aquarium_predictions")
-    image_insights_data = analyze.analyze_images(image_folder=image_folder)
+    image_analysis = analyze.analyze_images(image_folder=image_folder)
 
     # Analyze object detections.
     label_folder = Path(
@@ -18,14 +18,14 @@ def main() -> None:
         input_folder=label_folder,
         images_rel_path="../../..",
     )
-    od_insights_data = analyze.analyze_object_detections(label_input=label_input)
+    od_analysis = analyze.analyze_object_detections(label_input=label_input)
 
     # Create HTML report.
     output_folder = Path("/Users/michal/tmp/lightly_insights_output")
     present.create_html_report(
         output_folder=output_folder,
-        image_data=image_insights_data,
-        od_data=od_insights_data,
+        image_analysis=image_analysis,
+        od_analysis=od_analysis,
     )
 
 

diff --git a/src/lightly_insights/analyze.py b/src/lightly_insights/analyze.py
@@ -46,6 +46,7 @@ def analyze_images(image_folder: Path) -> ImageAnalysis:
 
     # Param: Recursive?
     # Param: Subsample?
+    # All image types please!
     sorted_paths = sorted(image_folder.glob("*.jpg"))
     for image_path in sorted_paths:
         num_images += 1

diff --git a/src/lightly_insights/present.py b/src/lightly_insights/present.py
@@ -1,8 +1,9 @@
 import random
 import shutil
+from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Counter, Dict
+from typing import Any, Counter, Dict, List, Tuple
 
 from jinja2 import Environment, FileSystemLoader
 
@@ -12,27 +13,82 @@
 static_folder = Path(__file__).parent / "static"
 
 
+@dataclass(frozen=True)
+class SampleImage:
+    filename: str
+    path: Path
+
+
+@dataclass(frozen=True)
+class ImageInsights:
+    # Image sizes.
+    median_size: Tuple[int, int]
+    image_sizes: List[Tuple[Tuple[int, int], int]]
+    image_size_plot: str
+
+    # Sample images.
+    sample_images: List[SampleImage]
+
+
+@dataclass(frozen=True)
+class ObjectInsights:
+    # Object sizes.
+    average_size: Tuple[float, float]
+    object_size_abs_plot: str
+    object_size_rel_plot: str
+    objects_per_image_plot: str
+
+    # Hack.
+    id: str
+    class_name: str
+    num_objects: int
+
+
+@dataclass(frozen=True)
+class ObjectDetectionInsights:
+    # Image filenames.
+    num_images_no_label: int
+    num_images_zero_objects: int
+    sample_filenames_no_label: List[str]
+    sample_filenames_zero_objects: List[str]
+
+    num_classes: int
+    class_histogram: List[Tuple[str, int]]
+    total: ObjectInsights
+    classes: List[ObjectInsights]
+
+
+# @dataclass(frozen=True)
+# class ReportData:
+#     image_analysis: ImageAnalysis
+#     object_detection_analysis: ObjectDetectionAnalysis
+#     date_generated: str
+#     image_insights: ImageInsights
+#     object_detection_insights: ObjectDetectionInsights
+
+
 def create_html_report(
     output_folder: Path,
-    image_data: ImageAnalysis,
-    od_data: ObjectDetectionAnalysis,
+    image_analysis: ImageAnalysis,
+    od_analysis: ObjectDetectionAnalysis,
 ) -> None:
     output_folder.mkdir(parents=True, exist_ok=True)
 
-    image_props = get_image_props(
+    image_insights = _get_image_insights(
         output_folder=output_folder,
-        image_data=image_data,
+        image_analysis=image_analysis,
     )
-    od_props = get_object_detection_props(
+    object_detection_insights = _get_object_detection_insights(
         output_folder=output_folder,
-        od_data=od_data,
+        od_analysis=od_analysis,
+    )
+    report_data = dict(
+        image_analysis=image_analysis,
+        object_detection_analysis=od_analysis,
+        date_generated=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        image_insights=image_insights,
+        object_detection_insights=object_detection_insights,
     )
-    report_props = {
-        "image": image_props,
-        "object_detection": od_props,
-        # Now.
-        "date_generated": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-    }
 
     # Setup Jinja2 environment
     env = Environment(
@@ -41,7 +97,7 @@ def create_html_report(
     template = env.get_template("report.html")
 
     # Render the template with data
-    html_output = template.render(report_props)
+    html_output = template.render(report_data)
 
     # Write the HTML to file
     html_output_path = output_folder / "report.html"
@@ -54,14 +110,14 @@ def create_html_report(
     shutil.copytree(src=static_folder, dst=output_static_folder)
 
 
-def get_image_props(
+def _get_image_insights(
     output_folder: Path,
-    image_data: ImageAnalysis,
-) -> Dict[str, Any]:
+    image_analysis: ImageAnalysis,
+) -> ImageInsights:
     # Image size plot.
     plots.width_heigth_pixels_plot(
         output_file=output_folder / "image_size_plot.png",
-        size_histogram=image_data.image_sizes,
+        size_histogram=image_analysis.image_sizes,  # type: ignore[arg-type]
         title="Image Sizes",
     )
 
@@ -70,84 +126,86 @@ def get_image_props(
     sample_images = []
     # TODO: Do this more efficiently.
     rng = random.Random(42)
-    selection = rng.sample(sorted(list(image_data.filename_set)), k=8)
+    selection = rng.sample(sorted(list(image_analysis.filename_set)), k=8)
     for filename in selection:
         shutil.copy2(
-            src=image_data.image_folder / filename, dst=sample_folder / filename
+            src=image_analysis.image_folder / filename, dst=sample_folder / filename
         )
         sample_images.append(
-            {
-                "filename": filename,
-                "path": sample_folder / filename,
-            }
+            SampleImage(filename=filename, path=Path("./sample") / filename)
         )
 
-    return {
-        "raw": image_data,
-        "num_images": image_data.num_images,
-        "image_sizes": image_data.image_sizes.most_common(),
-        "filename_sample": list(image_data.filename_set)[:10],
-        "image_size_plot": "image_size_plot.png",
-        "sample_images": sample_images,
-    }
+    return ImageInsights(
+        median_size=(9999, 999),
+        image_sizes=list(image_analysis.image_sizes.most_common()),
+        image_size_plot="image_size_plot.png",
+        sample_images=sample_images,
+    )
 
 
-def get_object_detection_props(
+def _get_object_detection_insights(
     output_folder: Path,
-    od_data: ObjectDetectionAnalysis,
-) -> Dict[str, Any]:
+    od_analysis: ObjectDetectionAnalysis,
+) -> ObjectDetectionInsights:
     plots.width_heigth_pixels_plot(
         output_file=output_folder / "object_size_abs_plot.png",
-        size_histogram=od_data.total.object_sizes_abs,
+        size_histogram=od_analysis.total.object_sizes_abs,
         title="Object Sizes in Pixels",
     )
 
     plots.width_heigth_percent_plot(
         output_file=output_folder / "object_size_rel_plot.png",
-        size_histogram=od_data.total.object_sizes_rel,
+        size_histogram=od_analysis.total.object_sizes_rel,
         title="Object Sizes in Percent",
     )
 
     plots.objects_per_image_plot(
         output_file=output_folder / "objects_per_image_plot.png",
-        objects_per_image=od_data.total.objects_per_image,
+        objects_per_image=od_analysis.total.objects_per_image,
         title="Objects per Image",
     )
 
     # Class histogram.
     class_histogram = Counter[str]()
-    for class_name, class_data in od_data.classes.items():
+    for class_name, class_data in od_analysis.classes.items():
         class_histogram[class_name] += class_data.num_objects
 
     # Class plots.
     class_plot_folder = output_folder / "classes"
     class_plot_folder.mkdir(parents=True, exist_ok=True)
-    for class_name, object_analysis in od_data.classes.items():
+    for class_name, object_analysis in od_analysis.classes.items():
         plots.objects_per_image_plot(
             output_file=class_plot_folder / f"class{hash(class_name)}.png",
             objects_per_image=object_analysis.objects_per_image,
             title="Objects per Image",
         )
 
-    return {
-        "num_images": od_data.num_images,
-        "filename_sample": list(od_data.filename_set)[:10],
-        "num_objects": od_data.total.num_objects,
-        "objects_per_image": od_data.total.objects_per_image.most_common(),
-        "object_size_abs_plot": "object_size_abs_plot.png",
-        "object_size_rel_plot": "object_size_rel_plot.png",
-        "objects_per_image_plot": "objects_per_image_plot.png",
-        "num_classes": len(od_data.classes),
-        "class_histogram": class_histogram.most_common(),
-        "classes": [
-            {
-                "id": "class"
-                + str(hash(class_name) % 1000000),  # TODO: Use a better ID.
-                "name": class_name,
-                "num_objects": class_data.num_objects,
-                "objects_per_image": class_data.objects_per_image.most_common(),
-                "objects_per_image_plot": f"classes/class{hash(class_name)}.png",
-            }
-            for class_name, class_data in od_data.classes.items()
+    return ObjectDetectionInsights(
+        num_classes=len(od_analysis.classes),
+        num_images_no_label=99,
+        num_images_zero_objects=99,
+        sample_filenames_no_label=[],
+        sample_filenames_zero_objects=[],
+        class_histogram=list(class_histogram.most_common()),
+        total=ObjectInsights(
+            average_size=(99.9, 9.9),
+            object_size_abs_plot="object_size_abs_plot.png",
+            object_size_rel_plot="object_size_rel_plot.png",
+            objects_per_image_plot="objects_per_image_plot.png",
+            id="REMOVE_ME",
+            class_name="[All classes]",
+            num_objects=od_analysis.total.num_objects,
+        ),
+        classes=[
+            ObjectInsights(
+                average_size=(99.9, 9.9),
+                object_size_abs_plot="TODO",
+                object_size_rel_plot="TODO",
+                objects_per_image_plot=f"classes/class{hash(class_name)}.png",
+                id=f"class{hash(class_name)}",
+                class_name=class_name,
+                num_objects=class_data.num_objects,
+            )
+            for class_name, class_data in od_analysis.classes.items()
         ],
-    }
+    )