Skip to content

Commit

Permalink
rebase cleaned for utilities, visualize and preprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Mar 3, 2024
1 parent affd330 commit 8903652
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 61 deletions.
42 changes: 17 additions & 25 deletions deepforest/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,17 @@
from PIL import Image

from deepforest import IoU
from deepforest.utilities import check_file
from deepforest import visualize
from deepforest.utilities import determine_geometry_type

import warnings


def evaluate_image(predictions, ground_df, root_dir, savedir=None):
def evaluate_image_boxes(predictions, ground_df, root_dir, savedir=None):
"""
Compute intersection-over-union matching among prediction and ground truth boxes for one image
Args:
df: a pandas dataframe with columns name, xmin, xmax, ymin, ymax, label. The 'name' column should be the path relative to the location of the file.
df: a geopandas dataframe with geometry columns
summarize: Whether to group statistics by plot and overall score
image_coordinates: Whether the current boxes are in coordinate system of the image, e.g. origin (0,0) upper left.
root_dir: Where to search for image names in df
Expand All @@ -32,14 +33,6 @@ def evaluate_image(predictions, ground_df, root_dir, savedir=None):
else:
plot_name = plot_names[0]

predictions['geometry'] = predictions.apply(
lambda x: shapely.geometry.box(x.xmin, x.ymin, x.xmax, x.ymax), axis=1)
predictions = gpd.GeoDataFrame(predictions, geometry='geometry')

ground_df['geometry'] = ground_df.apply(
lambda x: shapely.geometry.box(x.xmin, x.ymin, x.xmax, x.ymax), axis=1)
ground_df = gpd.GeoDataFrame(ground_df, geometry='geometry')

# match
result = IoU.compute_IoU(ground_df, predictions)

Expand Down Expand Up @@ -107,14 +100,17 @@ def __evaluate_wrapper__(predictions,
Returns:
results: a dictionary of results with keys, results, box_recall, box_precision, class_recall
"""
# remove empty samples from ground truth
ground_df = ground_df[~((ground_df.xmin == 0) & (ground_df.xmax == 0))]

results = evaluate(predictions=predictions,
ground_df=ground_df,
root_dir=root_dir,
iou_threshold=iou_threshold,
savedir=savedir)
prediction_geometry = determine_geometry_type(predictions)
if prediction_geometry == "point":
raise NotImplementedError("Point evaluation is not yet implemented")
elif prediction_geometry == "box":
results = evaluate_boxes(predictions=predictions,
ground_df=ground_df,
root_dir=root_dir,
iou_threshold=iou_threshold,
savedir=savedir)
else:
raise NotImplementedError("Geometry type {} not implemented".format(prediction_geometry))

# replace classes if not NUll
if not results is None:
Expand All @@ -130,7 +126,7 @@ def __evaluate_wrapper__(predictions,
return results


def evaluate(predictions, ground_df, root_dir, iou_threshold=0.4, savedir=None):
def evaluate_boxes(predictions, ground_df, root_dir, iou_threshold=0.4, savedir=None):
"""Image annotated crown evaluation routine
submission can be submitted as a .shp, existing pandas dataframe or .csv path
Expand All @@ -145,10 +141,6 @@ def evaluate(predictions, ground_df, root_dir, iou_threshold=0.4, savedir=None):
box_precision: proportion of predictions that are true positive, regardless of class
class_recall: a pandas dataframe of class level recall and precision with class sizes
"""

check_file(ground_df)
check_file(predictions)

# Run evaluation on all plots
results = []
box_recalls = []
Expand All @@ -175,7 +167,7 @@ def evaluate(predictions, ground_df, root_dir, iou_threshold=0.4, savedir=None):
continue
else:
group = group.reset_index(drop=True)
result = evaluate_image(predictions=image_predictions,
result = evaluate_image_boxes(predictions=image_predictions,
ground_df=group,
root_dir=root_dir,
savedir=savedir)
Expand Down
8 changes: 5 additions & 3 deletions deepforest/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ def predict_file(self, csv_file, root_dir, savedir=None, color=None, thickness=1
Returns:
df: pandas dataframe with bounding boxes, label and scores for each image in the csv file
"""
df = utilities.pandas_to_geopandas(csv_file)
df = utilities.read_file(csv_file)
ds = dataset.TreeDataset(csv_file=csv_file,
root_dir=root_dir,
transforms=None,
Expand All @@ -411,6 +411,8 @@ def predict_file(self, csv_file, root_dir, savedir=None, color=None, thickness=1
savedir=savedir,
color=color,
thickness=thickness)



return results

Expand Down Expand Up @@ -579,7 +581,7 @@ def on_validation_epoch_end(self):

# Evaluate on validation data predictions
self.predictions_df = pd.concat(self.predictions)
ground_df = utilities.pandas_to_geopandas(self.config["validation"]["csv_file"])
ground_df = utilities.read_file(self.config["validation"]["csv_file"])
ground_df["label"] = ground_df.label.apply(lambda x: self.label_dict[x])

#Evaluate every n epochs
Expand Down Expand Up @@ -647,7 +649,7 @@ def evaluate(self, csv_file, root_dir, iou_threshold=None, savedir=None):
Returns:
results: dict of ("results", "precision", "recall") for a given threshold
"""
ground_df = utilities.pandas_to_geopandas(csv_file)
ground_df = utilities.read_file(csv_file)
ground_df["label"] = ground_df.label.apply(lambda x: self.label_dict[x])
predictions = self.predict_file(csv_file=csv_file,
root_dir=root_dir,
Expand Down
2 changes: 2 additions & 0 deletions deepforest/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import typing

from deepforest import visualize
from deepforest.utilities import read_file


def _predict_image_(model,
Expand Down Expand Up @@ -179,6 +180,7 @@ def _dataloader_wrapper_(model,
results.append(prediction)

results = pd.concat(results, ignore_index=True)
results = read_file(results, root_dir)

if savedir:
visualize.plot_prediction_dataframe(results,
Expand Down
35 changes: 18 additions & 17 deletions deepforest/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import warnings
import rasterio
import geopandas as gpd
from deepforest.utilities import pandas_to_geopandas
from shapely import geometry
import geopandas as gpd
from deepforest.utilities import read_file
Expand Down Expand Up @@ -245,24 +244,28 @@ def split_raster(annotations_file=None,
continue

# Find annotations, image_name is the basename of the path
crop_annotations = select_annotations(image_annotations, window = windows[index])

if crop_annotations.empty:
if allow_empty:
crop_annotations.loc[0, "image_path"] = "{}_{}.png".format(image_basename, index)
else:
continue
else:
if annotations_file is not None:
crop_annotations = select_annotations(image_annotations, window = windows[index])
crop_annotations["image_path"] = "{}_{}.png".format(image_basename, index)

annotations_files.append(crop_annotations)
save_crop(save_dir, image_name, index, crop)
if crop_annotations.empty:
if allow_empty:
crop_annotations.loc[0, "image_path"] = "{}_{}.png".format(image_basename, index)
else:
continue
annotations_files.append(crop_annotations)

# Save image crop
if allow_empty or crop_annotations is not None:
crop_filename = save_crop(save_dir, image_name, index, crop)
crop_filenames.append(crop_filename)

if len(annotations_files) == 0:
if annotations_file is None:
return crop_filenames
elif len(annotations_files) == 0:
raise ValueError(
"Input file has no overlapping annotations and allow_empty is {}".format(
allow_empty))

else:
annotations_files = pd.concat(annotations_files)

# Checkpoint csv files, useful for parallelization
Expand All @@ -271,6 +274,4 @@ def split_raster(annotations_file=None,
file_path = os.path.join(save_dir, f"{image_basename}.csv")
annotations_files.to_csv(file_path, index=False, header=True)

return annotations_files
else:
return crop_filenames
return annotations_files
16 changes: 2 additions & 14 deletions deepforest/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ def determine_geometry_type(df, verbose=True):
return geometry_type

def read_file(input, root_dir=None):
"""Read a file and return a geopandas dataframe
"""Read a file and return a geopandas dataframe. This is the main entry point for reading annotations into deepforest.
Args:
input: a path to a file or a pandas dataframe
root_dir: Optional directory to prepend to the image_path column
Expand Down Expand Up @@ -546,18 +546,6 @@ def round_with_floats(x):
return result


def check_file(df):
"""Check a file format for correct column names and structure"""

if not all(x in df.columns
for x in ["image_path", "xmin", "xmax", "ymin", "ymax", "label"]):
raise IOError("Input file has incorrect column names, "
"the following columns must exist "
"'image_path','xmin','ymin','xmax','ymax','label'.")

return df


def check_image(image):
"""Check an image is three channel, channel last format
Args:
Expand All @@ -574,7 +562,7 @@ def image_to_geo_coordinates(gdf, root_dir, flip_y_axis=False):
Convert from image coordinates to geographic coordinates
Note that this assumes df is just a single plot being passed to this function
Args:
gdf: a geodataframe, see pandas_to_geopandas
gdf: a geodataframe
root_dir: directory of images to lookup image_path column
Returns:
df: a geospatial dataframe with the boxes optionally transformed to the target crs
Expand Down
2 changes: 1 addition & 1 deletion deepforest/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def plot_predictions(image, df, color=None, thickness=1):
cv2.polylines(image, polygon, True, color, thickness=thickness)
elif geometry_type == "Point":
int_coords = lambda x: np.array(x).round().astype(np.int32)
cv2.circle(image, int_coords(row["geometry"]), color=color, radius=5, thickness=thickness)
cv2.circle(image, (int_coords(row["geometry"].x), int_coords(row["geometry"].y)), color=color, radius=5, thickness=thickness)
else:
raise ValueError("Only polygons and points are supported")
elif "xmin" in df.columns:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def test_split_raster_with_polygon_annotations(tmpdir, config):
sample_geometry = [geometry.Polygon([(0, 0), (0, 2), (1, 1), (1, 0), (0, 0)]), geometry.Polygon([(2, 2), (2, 4), (3, 3), (3, 2), (2, 2)])]
annotations = pd.DataFrame({
"image_path": ["OSBS_029.tif", "OSBS_029.tif"],
"polygon": [sample_geometry[0].to_wkt(), sample_geometry[1].to_wkt()],
"polygon": [sample_geometry[0].wkt, sample_geometry[1].wkt],
"label": ["Tree", "Tree"]
})
annotations_file = tmpdir.join("polygon_annotations.csv")
Expand Down

0 comments on commit 8903652

Please sign in to comment.