From 04988416d4e2bd06dd108c1a840958b68fa5ee30 Mon Sep 17 00:00:00 2001
From: Michal Szabados <michal@lightly.ai>
Date: Wed, 8 Nov 2023 15:24:03 +0100
Subject: [PATCH] Structure report data

---
 compute_prototype.py => example.py         |   8 +-
 src/lightly_insights/analyze.py            |   1 +
 src/lightly_insights/present.py            | 180 ++++++++++++++-------
 src/lightly_insights/templates/report.html |  45 +++---
 4 files changed, 147 insertions(+), 87 deletions(-)
 rename compute_prototype.py => example.py (76%)

diff --git a/compute_prototype.py b/example.py
similarity index 76%
rename from compute_prototype.py
rename to example.py
index a895504..3d09d3f 100644
--- a/compute_prototype.py
+++ b/example.py
@@ -8,7 +8,7 @@
 def main() -> None:
     # Analyze an image folder.
     image_folder = Path("/Users/michal/datasets/aquarium_predictions")
-    image_insights_data = analyze.analyze_images(image_folder=image_folder)
+    image_analysis = analyze.analyze_images(image_folder=image_folder)
 
     # Analyze object detections.
     label_folder = Path(
@@ -18,14 +18,14 @@ def main() -> None:
         input_folder=label_folder,
         images_rel_path="../../..",
     )
-    od_insights_data = analyze.analyze_object_detections(label_input=label_input)
+    od_analysis = analyze.analyze_object_detections(label_input=label_input)
 
     # Create HTML report.
     output_folder = Path("/Users/michal/tmp/lightly_insights_output")
     present.create_html_report(
         output_folder=output_folder,
-        image_data=image_insights_data,
-        od_data=od_insights_data,
+        image_analysis=image_analysis,
+        od_analysis=od_analysis,
     )
 
 
diff --git a/src/lightly_insights/analyze.py b/src/lightly_insights/analyze.py
index a2ef13a..682d300 100644
--- a/src/lightly_insights/analyze.py
+++ b/src/lightly_insights/analyze.py
@@ -46,6 +46,7 @@ def analyze_images(image_folder: Path) -> ImageAnalysis:
 
     # Param: Recursive?
     # Param: Subsample?
+    # All image types please!
     sorted_paths = sorted(image_folder.glob("*.jpg"))
     for image_path in sorted_paths:
         num_images += 1
diff --git a/src/lightly_insights/present.py b/src/lightly_insights/present.py
index 065134a..5d0aa2f 100644
--- a/src/lightly_insights/present.py
+++ b/src/lightly_insights/present.py
@@ -1,8 +1,9 @@
 import random
 import shutil
+from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Counter, Dict
+from typing import Any, Counter, Dict, List, Tuple
 
 from jinja2 import Environment, FileSystemLoader
 
@@ -12,27 +13,82 @@
 static_folder = Path(__file__).parent / "static"
 
 
+@dataclass(frozen=True)
+class SampleImage:
+    filename: str
+    path: Path
+
+
+@dataclass(frozen=True)
+class ImageInsights:
+    # Image sizes.
+    median_size: Tuple[int, int]
+    image_sizes: List[Tuple[Tuple[int, int], int]]
+    image_size_plot: str
+
+    # Sample images.
+    sample_images: List[SampleImage]
+
+
+@dataclass(frozen=True)
+class ObjectInsights:
+    # Object sizes.
+    average_size: Tuple[float, float]
+    object_size_abs_plot: str
+    object_size_rel_plot: str
+    objects_per_image_plot: str
+
+    # Hack.
+    id: str
+    class_name: str
+    num_objects: int
+
+
+@dataclass(frozen=True)
+class ObjectDetectionInsights:
+    # Image filenames.
+    num_images_no_label: int
+    num_images_zero_objects: int
+    sample_filenames_no_label: List[str]
+    sample_filenames_zero_objects: List[str]
+
+    num_classes: int
+    class_histogram: List[Tuple[str, int]]
+    total: ObjectInsights
+    classes: List[ObjectInsights]
+
+
+# @dataclass(frozen=True)
+# class ReportData:
+#     image_analysis: ImageAnalysis
+#     object_detection_analysis: ObjectDetectionAnalysis
+#     date_generated: str
+#     image_insights: ImageInsights
+#     object_detection_insights: ObjectDetectionInsights
+
+
 def create_html_report(
     output_folder: Path,
-    image_data: ImageAnalysis,
-    od_data: ObjectDetectionAnalysis,
+    image_analysis: ImageAnalysis,
+    od_analysis: ObjectDetectionAnalysis,
 ) -> None:
     output_folder.mkdir(parents=True, exist_ok=True)
 
-    image_props = get_image_props(
+    image_insights = _get_image_insights(
         output_folder=output_folder,
-        image_data=image_data,
+        image_analysis=image_analysis,
     )
-    od_props = get_object_detection_props(
+    object_detection_insights = _get_object_detection_insights(
         output_folder=output_folder,
-        od_data=od_data,
+        od_analysis=od_analysis,
+    )
+    report_data = dict(
+        image_analysis=image_analysis,
+        object_detection_analysis=od_analysis,
+        date_generated=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        image_insights=image_insights,
+        object_detection_insights=object_detection_insights,
     )
-    report_props = {
-        "image": image_props,
-        "object_detection": od_props,
-        # Now.
-        "date_generated": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-    }
 
     # Setup Jinja2 environment
     env = Environment(
@@ -41,7 +97,7 @@ def create_html_report(
     template = env.get_template("report.html")
 
     # Render the template with data
-    html_output = template.render(report_props)
+    html_output = template.render(report_data)
 
     # Write the HTML to file
     html_output_path = output_folder / "report.html"
@@ -54,14 +110,14 @@ def create_html_report(
     shutil.copytree(src=static_folder, dst=output_static_folder)
 
 
-def get_image_props(
+def _get_image_insights(
     output_folder: Path,
-    image_data: ImageAnalysis,
-) -> Dict[str, Any]:
+    image_analysis: ImageAnalysis,
+) -> ImageInsights:
     # Image size plot.
     plots.width_heigth_pixels_plot(
         output_file=output_folder / "image_size_plot.png",
-        size_histogram=image_data.image_sizes,
+        size_histogram=image_analysis.image_sizes,  # type: ignore[arg-type]
         title="Image Sizes",
     )
 
@@ -70,84 +126,86 @@ def get_image_props(
     sample_images = []
     # TODO: Do this more efficiently.
     rng = random.Random(42)
-    selection = rng.sample(sorted(list(image_data.filename_set)), k=8)
+    selection = rng.sample(sorted(list(image_analysis.filename_set)), k=8)
     for filename in selection:
         shutil.copy2(
-            src=image_data.image_folder / filename, dst=sample_folder / filename
+            src=image_analysis.image_folder / filename, dst=sample_folder / filename
         )
         sample_images.append(
-            {
-                "filename": filename,
-                "path": sample_folder / filename,
-            }
+            SampleImage(filename=filename, path=Path("./sample") / filename)
         )
 
-    return {
-        "raw": image_data,
-        "num_images": image_data.num_images,
-        "image_sizes": image_data.image_sizes.most_common(),
-        "filename_sample": list(image_data.filename_set)[:10],
-        "image_size_plot": "image_size_plot.png",
-        "sample_images": sample_images,
-    }
+    return ImageInsights(
+        median_size=(9999, 999),
+        image_sizes=list(image_analysis.image_sizes.most_common()),
+        image_size_plot="image_size_plot.png",
+        sample_images=sample_images,
+    )
 
 
-def get_object_detection_props(
+def _get_object_detection_insights(
     output_folder: Path,
-    od_data: ObjectDetectionAnalysis,
-) -> Dict[str, Any]:
+    od_analysis: ObjectDetectionAnalysis,
+) -> ObjectDetectionInsights:
     plots.width_heigth_pixels_plot(
         output_file=output_folder / "object_size_abs_plot.png",
-        size_histogram=od_data.total.object_sizes_abs,
+        size_histogram=od_analysis.total.object_sizes_abs,
         title="Object Sizes in Pixels",
     )
 
     plots.width_heigth_percent_plot(
         output_file=output_folder / "object_size_rel_plot.png",
-        size_histogram=od_data.total.object_sizes_rel,
+        size_histogram=od_analysis.total.object_sizes_rel,
         title="Object Sizes in Percent",
     )
 
     plots.objects_per_image_plot(
         output_file=output_folder / "objects_per_image_plot.png",
-        objects_per_image=od_data.total.objects_per_image,
+        objects_per_image=od_analysis.total.objects_per_image,
         title="Objects per Image",
     )
 
     # Class histogram.
     class_histogram = Counter[str]()
-    for class_name, class_data in od_data.classes.items():
+    for class_name, class_data in od_analysis.classes.items():
         class_histogram[class_name] += class_data.num_objects
 
     # Class plots.
     class_plot_folder = output_folder / "classes"
     class_plot_folder.mkdir(parents=True, exist_ok=True)
-    for class_name, object_analysis in od_data.classes.items():
+    for class_name, object_analysis in od_analysis.classes.items():
         plots.objects_per_image_plot(
             output_file=class_plot_folder / f"class{hash(class_name)}.png",
             objects_per_image=object_analysis.objects_per_image,
             title="Objects per Image",
         )
 
-    return {
-        "num_images": od_data.num_images,
-        "filename_sample": list(od_data.filename_set)[:10],
-        "num_objects": od_data.total.num_objects,
-        "objects_per_image": od_data.total.objects_per_image.most_common(),
-        "object_size_abs_plot": "object_size_abs_plot.png",
-        "object_size_rel_plot": "object_size_rel_plot.png",
-        "objects_per_image_plot": "objects_per_image_plot.png",
-        "num_classes": len(od_data.classes),
-        "class_histogram": class_histogram.most_common(),
-        "classes": [
-            {
-                "id": "class"
-                + str(hash(class_name) % 1000000),  # TODO: Use a better ID.
-                "name": class_name,
-                "num_objects": class_data.num_objects,
-                "objects_per_image": class_data.objects_per_image.most_common(),
-                "objects_per_image_plot": f"classes/class{hash(class_name)}.png",
-            }
-            for class_name, class_data in od_data.classes.items()
+    return ObjectDetectionInsights(
+        num_classes=len(od_analysis.classes),
+        num_images_no_label=99,
+        num_images_zero_objects=99,
+        sample_filenames_no_label=[],
+        sample_filenames_zero_objects=[],
+        class_histogram=list(class_histogram.most_common()),
+        total=ObjectInsights(
+            average_size=(99.9, 9.9),
+            object_size_abs_plot="object_size_abs_plot.png",
+            object_size_rel_plot="object_size_rel_plot.png",
+            objects_per_image_plot="objects_per_image_plot.png",
+            id="REMOVE_ME",
+            class_name="[All classes]",
+            num_objects=od_analysis.total.num_objects,
+        ),
+        classes=[
+            ObjectInsights(
+                average_size=(99.9, 9.9),
+                object_size_abs_plot="TODO",
+                object_size_rel_plot="TODO",
+                objects_per_image_plot=f"classes/class{hash(class_name)}.png",
+                id=f"class{hash(class_name)}",
+                class_name=class_name,
+                num_objects=class_data.num_objects,
+            )
+            for class_name, class_data in od_analysis.classes.items()
         ],
-    }
+    )
diff --git a/src/lightly_insights/templates/report.html b/src/lightly_insights/templates/report.html
index 243a37b..ee344cc 100644
--- a/src/lightly_insights/templates/report.html
+++ b/src/lightly_insights/templates/report.html
@@ -9,6 +9,7 @@
   <!-- Bootstrap CSS -->
   <!-- <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootswatch@5/dist/flatly/bootstrap.min.css"> -->
   <link rel="stylesheet" href="./static/cdn.jsdelivr.net_npm_bootswatch@5_dist_flatly_bootstrap.min.css">
+  <link rel="icon" type="image/x-icon" href="./static/favicon.png">
   <title>Lightly Insights</title>
 
   <style>
@@ -60,7 +61,7 @@ <h1 class="mb-4">Lightly Insights</h1>
         <tbody>
           <tr>
             <th scope="row">Image Folder</th>
-            <td>{{ image.raw.image_folder }}</td>
+            <td>{{ image_analysis.image_folder }}</td>
           </tr>
           <tr>
             <th scope="row">Date Generated</th>
@@ -80,7 +81,7 @@ <h1 class="mb-4">Lightly Insights</h1>
           <div class="card text-white bg-primary">
             <!-- <div class="card-header">Header</div> -->
             <div class="card-body">
-              <h3 class="card-titl mb-3">{{ image.raw.num_images }} Images</h4>
+              <h3 class="card-titl mb-3">{{ image_analysis.num_images }} Images</h4>
 
                 <table class="table table-hover">
                   <tbody>
@@ -97,8 +98,8 @@ <h3 class="card-titl mb-3">{{ image.raw.num_images }} Images</h4>
                       <td>12</td>
                     </tr>
                     <tr class="table-primary">
-                      <th scope="row">Aveg objects per image</th>
-                      <td>{{ "{0:.1f}".format(object_detection.num_objects / image.raw.num_images) }}</td>
+                      <th scope="row">Avg objects per image</th>
+                      <td>{{ "{0:.1f}".format(object_detection_analysis.total.num_objects / image_analysis.num_images) }}</td>
                     </tr>
                   </tbody>
                 </table>
@@ -110,21 +111,21 @@ <h3 class="card-titl mb-3">{{ image.raw.num_images }} Images</h4>
           <div class="card text-white bg-secondary">
             <!-- <div class="card-header">Header</div> -->
             <div class="card-body">
-              <h3 class="card-title mb-3">{{ object_detection.num_objects }} Objects</h4>
+              <h3 class="card-title mb-3">{{ object_detection_analysis.total.num_objects }} Objects</h4>
 
                 <table class="table table-hover">
                   <tbody>
                     <tr class="table-secondary">
                       <th scope="row">Num classes</th>
-                      <td>{{ object_detection.num_classes }}</td>
+                      <td>{{ object_detection_insights.num_classes }}</td>
                     </tr>
                     <tr class="table-secondary">
                       <th scope="row">Avg objects per class</th>
-                      <td>{{ "{0:.1f}".format(object_detection.num_objects / object_detection.num_classes) }}</td>
+                      <td>{{ "{0:.1f}".format(object_detection_analysis.total.num_objects / object_detection_insights.num_classes) }}</td>
                     </tr>
                     <tr class="table-secondary">
                       <th scope="row">Avg images per class</th>
-                      <td>{{ "{0:.1f}".format(image.raw.num_images / object_detection.num_classes) }}</td>
+                      <td>{{ "{0:.1f}".format(image_analysis.num_images / object_detection_insights.num_classes) }}</td>
                     </tr>
                     <tr class="table-secondary">
                       <th scope="row">Avg size</th>
@@ -152,7 +153,7 @@ <h3 class="card-title mb-3">{{ object_detection.num_objects }} Objects</h4>
       <h3>Sample Images</h3>
 
       <div class="row">
-        {% for sample_image in image.sample_images %}
+        {% for sample_image in image_insights.sample_images %}
         <!-- Four equal columns -->
         <div class="col-6 col-md-3 g-3">
           <div class="square-box">
@@ -175,7 +176,7 @@ <h4 class="card-header">Image Size Insights</h4>
           <div class="row">
 
             <div class="col-lg-6">
-              <img src="{{ image.image_size_plot }}" class="img-fluid" alt="Image Size Plot">
+              <img src="{{ image_insights.image_size_plot }}" class="img-fluid" alt="Image Size Plot">
             </div>
 
             <div class="col-lg-6">
@@ -188,7 +189,7 @@ <h4 class="card-header">Image Size Insights</h4>
                   </tr>
                 </thead>
                 <tbody>
-                  {% for image_size, num_images in image.image_sizes %}
+                  {% for image_size, num_images in image_insights.image_sizes %}
                   <tr>
                     <td>{{ image_size[0] }} × {{ image_size[1] }}</td>
                     <td>{{ num_images }}</td>
@@ -212,20 +213,20 @@ <h4 class="card-header">Image Size Insights</h4>
       <div class="card border-primary">
         <h4 class="card-header">Object Detection Insights</h4>
         <div class="card-body">
-          <h4 class="card-title">{{ object_detection.num_objects }} Objects</h4>
+          <h4 class="card-title">{{ object_detection_analysis.total.num_objects }} Objects</h4>
 
           <div class="row">
             <div class="col-lg-6">
-              <img src="{{ object_detection.object_size_abs_plot }}" class="img-fluid" alt="Object Size in Pixels Plot">
+              <img src="{{ object_detection_insights.total.object_size_abs_plot }}" class="img-fluid" alt="Object Size in Pixels Plot">
             </div>
             <div class="col-lg-6">
-              <img src="{{ object_detection.object_size_rel_plot }}" class="img-fluid"
+              <img src="{{ object_detection_insights.total.object_size_rel_plot }}" class="img-fluid"
                 alt="Object Size in Percent Plot">
             </div>
           </div>
 
           <div class="col-lg-6">
-            <img src="{{ object_detection.objects_per_image_plot }}" class="img-fluid" alt="Objects per Image">
+            <img src="{{ object_detection_insights.total.objects_per_image_plot }}" class="img-fluid" alt="Objects per Image">
           </div>
 
           <p class="card-text">Some quick example text to build on the card title and make up the bulk of the card's
@@ -240,14 +241,14 @@ <h4 class="card-title">{{ object_detection.num_objects }} Objects</h4>
               </tr>
             </thead>
             <tbody>
-              {% for class, num_objects in object_detection.class_histogram %}
+              {% for class, num_objects in object_detection_insights.class_histogram %}
               <tr>
                 <td class="col-3">{{ class }}</td>
                 <td class="col-1">{{ num_objects }}</td>
                 <td class="col-8">
                   <div class="progress mt-1">
                     <div class="progress-bar" role="progressbar"
-                      style="width: {{ num_objects / object_detection.num_objects * 100 }}%;" aria-valuenow="25"
+                      style="width: {{ num_objects / object_detection_analysis.total.num_objects * 100 }}%;" aria-valuenow="25"
                       aria-valuemin="0" aria-valuemax="100"></div>
                   </div>
                 </td>
@@ -266,16 +267,16 @@ <h4 class="card-title">{{ object_detection.num_objects }} Objects</h4>
         <h4 class="card-header">Class Insights</h4>
         <div class="card-body">
 
-          <h4 class="card-title">{{ object_detection.num_classes }} Classes</h4>
+          <h4 class="card-title">{{ object_detection_insights.num_classes }} Classes</h4>
           <p>Choose a class:</p>
 
           <!-- Nav pills -->
           <ul class="nav nav-pills mb-4">
-            {% for class in object_detection.classes %}
+            {% for class in object_detection_insights.classes %}
             <li class="nav-item" role="presentation">
               <a class="btn btn-outline-primary btn-sm m-1" data-bs-toggle="tab" data-bs-target="#{{ class.id }}"
                 href="#{{ class.id }}">
-                {{ class.name }}&nbsp;&nbsp;<span class="badge bg-light rounded-pill">{{ class.num_objects }}</span>
+                {{ class.class_name }}&nbsp;&nbsp;<span class="badge bg-light rounded-pill">{{ class.num_objects }}</span>
               </a>
             </li>
             {% endfor %}
@@ -285,9 +286,9 @@ <h4 class="card-title">{{ object_detection.num_classes }} Classes</h4>
           <!-- Tab panes -->
           <div class="tab-content" id="myTabContent">
 
-            {% for class in object_detection.classes %}
+            {% for class in object_detection_insights.classes %}
             <div class="tab-pane fade" id="{{ class.id }}" role="tabpanel">
-              <h4>{{ class.num_objects }} Objects of Class "{{ class.name }}"</h4>
+              <h4>{{ class.num_objects }} Objects of Class "{{ class.class_name }}"</h4>
 
               <img src="{{ class.objects_per_image_plot }}" class="img-fluid" alt="Objects per Image">
             </div>