rebase cleaned for utilities, visualize and preprocessing

weecology · Mar 3, 2024 · 8903652 · 8903652
1 parent affd330
commit 8903652
Show file tree

Hide file tree

Showing 7 changed files with 46 additions and 61 deletions.
diff --git a/deepforest/evaluate.py b/deepforest/evaluate.py
@@ -9,16 +9,17 @@
 from PIL import Image
 
 from deepforest import IoU
-from deepforest.utilities import check_file
 from deepforest import visualize
+from deepforest.utilities import determine_geometry_type
+
 import warnings
 
 
-def evaluate_image(predictions, ground_df, root_dir, savedir=None):
+def evaluate_image_boxes(predictions, ground_df, root_dir, savedir=None):
     """
     Compute intersection-over-union matching among prediction and ground truth boxes for one image
     Args:
-        df: a pandas dataframe with columns name, xmin, xmax, ymin, ymax, label. The 'name' column should be the path relative to the location of the file.
+        df: a geopandas dataframe with geometry columns
         summarize: Whether to group statistics by plot and overall score
         image_coordinates: Whether the current boxes are in coordinate system of the image, e.g. origin (0,0) upper left.
         root_dir: Where to search for image names in df
@@ -32,14 +33,6 @@ def evaluate_image(predictions, ground_df, root_dir, savedir=None):
     else:
         plot_name = plot_names[0]
 
-    predictions['geometry'] = predictions.apply(
-        lambda x: shapely.geometry.box(x.xmin, x.ymin, x.xmax, x.ymax), axis=1)
-    predictions = gpd.GeoDataFrame(predictions, geometry='geometry')
-
-    ground_df['geometry'] = ground_df.apply(
-        lambda x: shapely.geometry.box(x.xmin, x.ymin, x.xmax, x.ymax), axis=1)
-    ground_df = gpd.GeoDataFrame(ground_df, geometry='geometry')
-
     # match
     result = IoU.compute_IoU(ground_df, predictions)
 
@@ -107,14 +100,17 @@ def __evaluate_wrapper__(predictions,
         Returns:
             results: a dictionary of results with keys, results, box_recall, box_precision, class_recall
         """
-    # remove empty samples from ground truth
-    ground_df = ground_df[~((ground_df.xmin == 0) & (ground_df.xmax == 0))]
-
-    results = evaluate(predictions=predictions,
-                       ground_df=ground_df,
-                       root_dir=root_dir,
-                       iou_threshold=iou_threshold,
-                       savedir=savedir)
+    prediction_geometry = determine_geometry_type(predictions)
+    if prediction_geometry == "point":
+        raise NotImplementedError("Point evaluation is not yet implemented")
+    elif prediction_geometry == "box":
+        results = evaluate_boxes(predictions=predictions,
+                        ground_df=ground_df,
+                        root_dir=root_dir,
+                        iou_threshold=iou_threshold,
+                        savedir=savedir)
+    else:
+        raise NotImplementedError("Geometry type {} not implemented".format(prediction_geometry))
 
     # replace classes if not NUll
     if not results is None:
@@ -130,7 +126,7 @@ def __evaluate_wrapper__(predictions,
     return results
 
 
-def evaluate(predictions, ground_df, root_dir, iou_threshold=0.4, savedir=None):
+def evaluate_boxes(predictions, ground_df, root_dir, iou_threshold=0.4, savedir=None):
     """Image annotated crown evaluation routine
     submission can be submitted as a .shp, existing pandas dataframe or .csv path
 
@@ -145,10 +141,6 @@ def evaluate(predictions, ground_df, root_dir, iou_threshold=0.4, savedir=None):
         box_precision: proportion of predictions that are true positive, regardless of class
         class_recall: a pandas dataframe of class level recall and precision with class sizes
     """
-
-    check_file(ground_df)
-    check_file(predictions)
-
     # Run evaluation on all plots
     results = []
     box_recalls = []
@@ -175,7 +167,7 @@ def evaluate(predictions, ground_df, root_dir, iou_threshold=0.4, savedir=None):
             continue
         else:
             group = group.reset_index(drop=True)
-            result = evaluate_image(predictions=image_predictions,
+            result = evaluate_image_boxes(predictions=image_predictions,
                                     ground_df=group,
                                     root_dir=root_dir,
                                     savedir=savedir)

diff --git a/deepforest/main.py b/deepforest/main.py
@@ -395,7 +395,7 @@ def predict_file(self, csv_file, root_dir, savedir=None, color=None, thickness=1
         Returns:
             df: pandas dataframe with bounding boxes, label and scores for each image in the csv file
         """
-        df = utilities.pandas_to_geopandas(csv_file)
+        df = utilities.read_file(csv_file)
         ds = dataset.TreeDataset(csv_file=csv_file,
                                  root_dir=root_dir,
                                  transforms=None,
@@ -411,6 +411,8 @@ def predict_file(self, csv_file, root_dir, savedir=None, color=None, thickness=1
                                                savedir=savedir,
                                                color=color,
                                                thickness=thickness)
+
+
 
         return results
 
@@ -579,7 +581,7 @@ def on_validation_epoch_end(self):
 
         # Evaluate on validation data predictions
         self.predictions_df = pd.concat(self.predictions)
-        ground_df = utilities.pandas_to_geopandas(self.config["validation"]["csv_file"])
+        ground_df = utilities.read_file(self.config["validation"]["csv_file"])
         ground_df["label"] = ground_df.label.apply(lambda x: self.label_dict[x])
 
         #Evaluate every n epochs
@@ -647,7 +649,7 @@ def evaluate(self, csv_file, root_dir, iou_threshold=None, savedir=None):
         Returns:
             results: dict of ("results", "precision", "recall") for a given threshold
         """
-        ground_df = utilities.pandas_to_geopandas(csv_file)
+        ground_df = utilities.read_file(csv_file)
         ground_df["label"] = ground_df.label.apply(lambda x: self.label_dict[x])
         predictions = self.predict_file(csv_file=csv_file,
                                         root_dir=root_dir,

diff --git a/deepforest/predict.py b/deepforest/predict.py
@@ -11,6 +11,7 @@
 import typing
 
 from deepforest import visualize
+from deepforest.utilities import read_file
 
 
 def _predict_image_(model,
@@ -179,6 +180,7 @@ def _dataloader_wrapper_(model,
         results.append(prediction)
 
     results = pd.concat(results, ignore_index=True)
+    results = read_file(results, root_dir)
 
     if savedir:
         visualize.plot_prediction_dataframe(results,

diff --git a/deepforest/preprocess.py b/deepforest/preprocess.py
@@ -13,7 +13,6 @@
 import warnings
 import rasterio
 import geopandas as gpd
-from deepforest.utilities import pandas_to_geopandas
 from shapely import geometry
 import geopandas as gpd
 from deepforest.utilities import read_file
@@ -245,24 +244,28 @@ def split_raster(annotations_file=None,
             continue
 
         # Find annotations, image_name is the basename of the path
-        crop_annotations = select_annotations(image_annotations, window = windows[index])
-
-        if crop_annotations.empty:
-            if allow_empty:
-                crop_annotations.loc[0, "image_path"] = "{}_{}.png".format(image_basename, index)
-            else:
-                continue
-        else:
+        if annotations_file is not None:
+            crop_annotations = select_annotations(image_annotations, window = windows[index])
             crop_annotations["image_path"] = "{}_{}.png".format(image_basename, index)
-
-        annotations_files.append(crop_annotations)
-        save_crop(save_dir, image_name, index, crop)
+            if crop_annotations.empty:
+                if allow_empty:
+                    crop_annotations.loc[0, "image_path"] = "{}_{}.png".format(image_basename, index)
+                else:
+                    continue
+            annotations_files.append(crop_annotations)
+
+        # Save image crop
+        if allow_empty or crop_annotations is not None:
+            crop_filename = save_crop(save_dir, image_name, index, crop)
+            crop_filenames.append(crop_filename)
 
-    if len(annotations_files) == 0:
+    if annotations_file is None:
+        return crop_filenames
+    elif len(annotations_files) == 0:
         raise ValueError(
             "Input file has no overlapping annotations and allow_empty is {}".format(
                 allow_empty))
-
+    else:
         annotations_files = pd.concat(annotations_files)
 
         # Checkpoint csv files, useful for parallelization
@@ -271,6 +274,4 @@ def split_raster(annotations_file=None,
         file_path = os.path.join(save_dir, f"{image_basename}.csv")
         annotations_files.to_csv(file_path, index=False, header=True)
 
-        return annotations_files
-    else:
-        return crop_filenames
+        return annotations_files
diff --git a/deepforest/utilities.py b/deepforest/utilities.py
@@ -374,7 +374,7 @@ def determine_geometry_type(df, verbose=True):
     return geometry_type
 
 def read_file(input, root_dir=None):
-    """Read a file and return a geopandas dataframe
+    """Read a file and return a geopandas dataframe. This is the main entry point for reading annotations into deepforest.
     Args:
         input: a path to a file or a pandas dataframe
         root_dir: Optional directory to prepend to the image_path column
@@ -546,18 +546,6 @@ def round_with_floats(x):
     return result
 
 
-def check_file(df):
-    """Check a file format for correct column names and structure"""
-
-    if not all(x in df.columns
-               for x in ["image_path", "xmin", "xmax", "ymin", "ymax", "label"]):
-        raise IOError("Input file has incorrect column names, "
-                      "the following columns must exist "
-                      "'image_path','xmin','ymin','xmax','ymax','label'.")
-
-    return df
-
-
 def check_image(image):
     """Check an image is three channel, channel last format
         Args:
@@ -574,7 +562,7 @@ def image_to_geo_coordinates(gdf, root_dir, flip_y_axis=False):
     Convert from image coordinates to geographic coordinates
     Note that this assumes df is just a single plot being passed to this function
     Args:
-        gdf: a geodataframe, see pandas_to_geopandas
+        gdf: a geodataframe
         root_dir: directory of images to lookup image_path column
     Returns:
         df: a geospatial dataframe with the boxes optionally transformed to the target crs

diff --git a/deepforest/visualize.py b/deepforest/visualize.py
@@ -142,7 +142,7 @@ def plot_predictions(image, df, color=None, thickness=1):
                 cv2.polylines(image, polygon, True, color, thickness=thickness)
             elif geometry_type == "Point":
                 int_coords = lambda x: np.array(x).round().astype(np.int32)
-                cv2.circle(image, int_coords(row["geometry"]), color=color, radius=5, thickness=thickness)
+                cv2.circle(image, (int_coords(row["geometry"].x), int_coords(row["geometry"].y)), color=color, radius=5, thickness=thickness)
             else:
                 raise ValueError("Only polygons and points are supported")
         elif "xmin" in df.columns:

diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py
@@ -215,7 +215,7 @@ def test_split_raster_with_polygon_annotations(tmpdir, config):
     sample_geometry = [geometry.Polygon([(0, 0), (0, 2), (1, 1), (1, 0), (0, 0)]), geometry.Polygon([(2, 2), (2, 4), (3, 3), (3, 2), (2, 2)])]
     annotations = pd.DataFrame({
         "image_path": ["OSBS_029.tif", "OSBS_029.tif"],
-        "polygon": [sample_geometry[0].to_wkt(), sample_geometry[1].to_wkt()],
+        "polygon": [sample_geometry[0].wkt, sample_geometry[1].wkt],
         "label": ["Tree", "Tree"]
     })
     annotations_file = tmpdir.join("polygon_annotations.csv")