diff --git a/example.py b/example.py index b8a5978..aba0cec 100644 --- a/example.py +++ b/example.py @@ -1,31 +1,32 @@ +# To run this example please download the PascalVOC 2007 dataset first: +# +# wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar +# tar -xvf VOCtrainval_06-Nov-2007.tar + from pathlib import Path -from labelformat.formats import YOLOv8ObjectDetectionInput +from labelformat.formats import PascalVOCObjectDetectionInput from lightly_insights import analyze, present - -def main() -> None: - # Analyze an image folder. - image_folder = Path("/Users/michal/datasets/aquarium.v2-release.yolov8/test/images") - image_analysis = analyze.analyze_images(image_folder=image_folder) - - # Analyze object detections. - input_file = Path("/Users/michal/datasets/aquarium.v2-release.yolov8/data.yaml") - label_input = YOLOv8ObjectDetectionInput( - input_file=input_file, - input_split="test", - ) - od_analysis = analyze.analyze_object_detections(label_input=label_input) - - # Create HTML report. - output_folder = Path("/Users/michal/tmp/lightly_insights_output") - present.create_html_report( - output_folder=output_folder, - image_analysis=image_analysis, - od_analysis=od_analysis, - ) - - -if __name__ == "__main__": - main() +# Analyze an image folder. +image_analysis = analyze.analyze_images( + image_folder=Path("./VOCdevkit/VOC2007/JPEGImages") +) + +# Analyze object detections. +label_input = PascalVOCObjectDetectionInput( + input_folder=Path("./VOCdevkit/VOC2007/Annotations"), + category_names=( + "person,bird,cat,cow,dog,horse,sheep,aeroplane,bicycle,boat,bus,car," + "motorbike,train,bottle,chair,diningtable,pottedplant,sofa,tvmonitor" + ), +) +od_analysis = analyze.analyze_object_detections(label_input=label_input) + +# Create HTML report. +present.create_html_report( + output_folder=Path("./html_report"), + image_analysis=image_analysis, + od_analysis=od_analysis, +) diff --git a/example_2.py b/example_2.py deleted file mode 100644 index 75a3739..0000000 --- a/example_2.py +++ /dev/null @@ -1,31 +0,0 @@ -from pathlib import Path - -from labelformat.formats import PascalVOCObjectDetectionInput - -from lightly_insights import analyze, present - - -def main() -> None: - # Analyze an image folder. - image_folder = Path("/Users/michal/datasets/VOC2007/JPEGImages") - image_analysis = analyze.analyze_images(image_folder=image_folder) - - # Analyze object detections. - label_folder = Path("/Users/michal/datasets/VOC2007/Annotations") - label_input = PascalVOCObjectDetectionInput( - input_folder=label_folder, - category_names="person,bird,cat,cow,dog,horse,sheep,aeroplane,bicycle,boat,bus,car,motorbike,train,bottle,chair,diningtable,pottedplant,sofa,tvmonitor", - ) - od_analysis = analyze.analyze_object_detections(label_input=label_input) - - # Create HTML report. - output_folder = Path("/Users/michal/tmp/lightly_insights_output_2") - present.create_html_report( - output_folder=output_folder, - image_analysis=image_analysis, - od_analysis=od_analysis, - ) - - -if __name__ == "__main__": - main() diff --git a/pyproject.toml b/pyproject.toml index d32f788..dcd9e89 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "poetry.core.masonry.api" name = "lightly-insights" version = "0.1.0" authors = ["Lightly.ai"] -description = "Easily get basic insights about your ML dataset." +description = "Get quick insights about your ML dataset." readme = "README.md" license = "MIT" @@ -29,9 +29,6 @@ build = "*" twine = "*" types-Pillow = "*" -# [tool.poetry.scripts] -# labelformat = "lightly_insights.cli.cli:main" - [tool.pytest.ini_options] pythonpath = [ ".", "src/" diff --git a/src/lightly_insights/analyze.py b/src/lightly_insights/analyze.py index 0981860..f353f16 100644 --- a/src/lightly_insights/analyze.py +++ b/src/lightly_insights/analyze.py @@ -188,6 +188,7 @@ def analyze_object_detections( total_data.heatmap[int(y1) : int(y2), int(x1) : int(x2)] += 1 class_datum.heatmap[int(y1) : int(y2), int(x1) : int(x2)] += 1 + # Sample images. if len(class_datum.sample_filenames) < 4: class_datum.sample_filenames.append(label.image.filename) diff --git a/src/lightly_insights/main.py b/src/lightly_insights/main.py deleted file mode 100644 index b06117d..0000000 --- a/src/lightly_insights/main.py +++ /dev/null @@ -1,2 +0,0 @@ -def main() -> None: - print("Hello, world!") diff --git a/src/lightly_insights/plots.py b/src/lightly_insights/plots.py index c4ca369..61cc2a4 100644 --- a/src/lightly_insights/plots.py +++ b/src/lightly_insights/plots.py @@ -37,17 +37,6 @@ def create_object_plots( objects_per_image_path = plot_folder / "objects_per_image.png" heatmap_path = plot_folder / "heatmap.png" - # TODO: Remove. - if plot_folder.name != "plots": - return PlotPaths( - object_sizes_abs=str(object_sizes_abs_path.relative_to(output_folder)), - object_sizes_rel=str(object_sizes_rel_path.relative_to(output_folder)), - side_length_avg=str(side_length_avg_path.relative_to(output_folder)), - rel_area=str(rel_area_path.relative_to(output_folder)), - objects_per_image=str(objects_per_image_path.relative_to(output_folder)), - heatmap=str(heatmap_path.relative_to(output_folder)), - ) - # Bucket by multiples of 20px. size_histogram_abs = Counter( [ @@ -131,7 +120,7 @@ def create_object_plots( def width_heigth_pixels_plot( output_file: Path, - size_histogram: Counter[Tuple[float, float]], + size_histogram: Union[Counter[Tuple[float, float]], Counter[Tuple[int, int]]], title: str, ) -> None: # Image size plot. diff --git a/src/lightly_insights/present.py b/src/lightly_insights/present.py index b46a409..e8562dd 100644 --- a/src/lightly_insights/present.py +++ b/src/lightly_insights/present.py @@ -15,6 +15,7 @@ logger = logging.getLogger(__name__) static_folder = Path(__file__).parent / "static" +template_folder = Path(__file__).parent / "templates" @dataclass(frozen=True) @@ -81,7 +82,7 @@ def create_html_report( # Setup Jinja2 environment env = Environment( - loader=FileSystemLoader(searchpath="./src/lightly_insights/templates"), + loader=FileSystemLoader(searchpath=template_folder), undefined=StrictUndefined, ) template = env.get_template("report.html") @@ -90,7 +91,7 @@ def create_html_report( html_output = template.render(report_data) # Write the HTML to file - html_output_path = output_folder / "report.html" + html_output_path = output_folder / "index.html" html_output_path.write_text(html_output) # Copy static files. @@ -99,6 +100,8 @@ def create_html_report( shutil.rmtree(output_static_folder, ignore_errors=True) shutil.copytree(src=static_folder, dst=output_static_folder) + logger.info(f"Successfully created HTML report: {html_output_path}") + def _get_image_insights( output_folder: Path, @@ -107,14 +110,14 @@ def _get_image_insights( # Image size plot. plots.width_heigth_pixels_plot( output_file=output_folder / "image_size_plot.png", - size_histogram=image_analysis.image_sizes, # type: ignore[arg-type] + size_histogram=image_analysis.image_sizes, title="Image Sizes", ) + # Sample images. sample_folder = output_folder / "sample" sample_folder.mkdir(parents=True, exist_ok=True) sample_images = [] - # TODO: Do this more efficiently. rng = random.Random(42) selection = rng.sample(sorted(list(image_analysis.filename_set)), k=8) for filename in selection: diff --git a/tests/test_main.py b/tests/test_main.py index 9ed0516..629ccd5 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,6 +1,3 @@ -from lightly_insights import main - - -def test_main() -> None: - main.main() +def test() -> None: + # Testing infrasctructure is set up! assert True