Skip to content

Commit

Permalink
Merge pull request #5091 from voxel51/summary-field-tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
minhtuev authored Nov 12, 2024
2 parents 1eee340 + 1e12882 commit 2b28b83
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 30 deletions.
44 changes: 41 additions & 3 deletions fiftyone/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1673,6 +1673,18 @@ def list_summary_fields(self):
self.get_field_schema(flat=True, info_keys=_SUMMARY_FIELD_KEY)
)

def _get_summarized_fields_map(self):
schema = self.get_field_schema(flat=True, info_keys=_SUMMARY_FIELD_KEY)

summarized_fields = {}
for path, field in schema.items():
summary_info = field.info[_SUMMARY_FIELD_KEY]
source_path = summary_info.get("path", None)
if source_path is not None:
summarized_fields[source_path] = path

return summarized_fields

def create_summary_field(
self,
path,
Expand Down Expand Up @@ -1750,13 +1762,25 @@ def create_summary_field(
"""
_field = self.get_field(path)

if isinstance(_field, (fof.StringField, fof.BooleanField)):
is_list_field = isinstance(_field, fof.ListField)
if is_list_field:
_field = _field.field

if isinstance(
_field,
(fof.StringField, fof.BooleanField, fof.ObjectIdField),
):
field_type = "categorical"
elif isinstance(
_field,
(fof.FloatField, fof.IntField, fof.DateField, fof.DateTimeField),
):
field_type = "numeric"
elif is_list_field:
raise ValueError(
f"Cannot generate a summary for list field '{path}' with "
f"element type {type(_field)}"
)
elif _field is not None:
raise ValueError(
f"Cannot generate a summary for field '{path}' of "
Expand Down Expand Up @@ -1889,8 +1913,17 @@ def create_summary_field(
return field_name

def _get_default_summary_field_name(self, path):
_path, is_frame_field, list_fields, _, _ = self._parse_field_name(path)
(
_path,
is_frame_field,
list_fields,
_,
id_to_str,
) = self._parse_field_name(path)

_chunks = _path.split(".")
if id_to_str:
_chunks = [c[1:] if c.startswith("_") else c for c in _chunks]

chunks = []
if is_frame_field:
Expand All @@ -1907,7 +1940,12 @@ def _get_default_summary_field_name(self, path):
if found_list:
chunks.append(_chunks[-1])

return "_".join(chunks)
field_name = "_".join(chunks)

if field_name == path:
field_name += "_summary"

return field_name

def _populate_summary_field(self, field_name, summary_info):
path = summary_info["path"]
Expand Down
106 changes: 79 additions & 27 deletions fiftyone/operators/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import fiftyone.operators as foo
import fiftyone.operators.types as types
from fiftyone.core.odm.workspace import default_workspace_factory

# pylint: disable=no-name-in-module
from fiftyone.operators.builtins.panels.model_evaluation import EvaluationPanel


Expand Down Expand Up @@ -66,8 +68,9 @@ def _edit_field_info_inputs(ctx, inputs):
}
)

path_keys = list(schema.keys())
path_selector = types.AutocompleteView()
for key in sorted(schema.keys()):
for key in path_keys:
path_selector.add_choice(key, label=key)

inputs.enum(
Expand Down Expand Up @@ -239,7 +242,7 @@ def _clone_sample_field_inputs(ctx, inputs):
schema = target_view.get_field_schema(flat=True)
full_schema = ctx.dataset.get_field_schema(flat=True)

field_keys = sorted(schema.keys())
field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in field_keys:
field_selector.add_choice(key, label=key)
Expand Down Expand Up @@ -367,7 +370,7 @@ def _clone_frame_field_inputs(ctx, inputs):
schema = target_view.get_frame_field_schema(flat=True)
full_schema = ctx.dataset.get_frame_field_schema(flat=True)

field_keys = sorted(schema.keys())
field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in field_keys:
field_selector.add_choice(key, label=key)
Expand Down Expand Up @@ -454,8 +457,9 @@ def _rename_sample_field_inputs(ctx, inputs):
prop.invalid = True
return

field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in sorted(schema.keys()):
for key in field_keys:
field_selector.add_choice(key, label=key)

field_prop = inputs.enum(
Expand Down Expand Up @@ -549,8 +553,9 @@ def _rename_frame_field_inputs(ctx, inputs):
prop.invalid = True
return

field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in sorted(schema.keys()):
for key in field_keys:
field_selector.add_choice(key, label=key)

field_prop = inputs.enum(
Expand Down Expand Up @@ -664,7 +669,7 @@ def _clear_sample_field_inputs(ctx, inputs):
schema.pop("id", None)
schema.pop("filepath", None)

field_keys = sorted(schema.keys())
field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in field_keys:
field_selector.add_choice(key, label=key)
Expand Down Expand Up @@ -764,7 +769,7 @@ def _clear_frame_field_inputs(ctx, inputs):
schema.pop("id", None)
schema.pop("frame_number", None)

field_keys = sorted(schema.keys())
field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in field_keys:
field_selector.add_choice(key, label=key)
Expand Down Expand Up @@ -907,8 +912,9 @@ def _delete_sample_field_inputs(ctx, inputs):
prop.invalid = True
return

field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in sorted(schema.keys()):
for key in field_keys:
field_selector.add_choice(key, label=key)

field_prop = inputs.enum(
Expand Down Expand Up @@ -976,8 +982,9 @@ def _delete_frame_field_inputs(ctx, inputs):
prop.invalid = True
return

field_keys = list(schema.keys())
field_selector = types.AutocompleteView()
for key in sorted(schema.keys()):
for key in field_keys:
field_selector.add_choice(key, label=key)

field_prop = inputs.enum(
Expand Down Expand Up @@ -1021,9 +1028,34 @@ def resolve_input(self, ctx):
}
)

categorical_field_types = (
fo.StringField,
fo.BooleanField,
fo.ObjectIdField,
)
numeric_field_types = (
fo.FloatField,
fo.IntField,
fo.DateField,
fo.DateTimeField,
)
valid_field_types = categorical_field_types + numeric_field_types

path_keys = [
p
for p, f in schema.items()
if (
isinstance(f, valid_field_types)
or (
isinstance(f, fo.ListField)
and isinstance(f.field, valid_field_types)
)
)
]

indexes = set(ctx.dataset.list_indexes())

field_keys = sorted(p for p in schema if p not in indexes)
field_keys = [p for p in path_keys if p not in indexes]
field_selector = types.AutocompleteView()
for key in field_keys:
field_selector.add_choice(key, label=key)
Expand Down Expand Up @@ -1051,7 +1083,7 @@ def execute(self, ctx):
field_name = ctx.params["field_name"]
unique = ctx.params.get("unique", False)

ctx.dataset.create_index(field_name, unique=unique)
ctx.dataset.create_index(field_name, unique=unique, wait=False)


class DropIndex(foo.Operator):
Expand All @@ -1071,7 +1103,8 @@ def resolve_input(self, ctx):
default_indexes = set(ctx.dataset._get_default_indexes())
if ctx.dataset._has_frame_fields():
default_indexes.update(
ctx.dataset._get_default_indexes(frames=True)
ctx.dataset._FRAMES_PREFIX + path
for path in ctx.dataset._get_default_indexes(frames=True)
)

indexes = [i for i in indexes if i not in default_indexes]
Expand Down Expand Up @@ -1125,13 +1158,16 @@ def resolve_input(self, ctx):

def execute(self, ctx):
path = ctx.params["path"]
field_name = ctx.params.get("field_name", None)
_, field_name = _get_dynamic(ctx.params, "field_name", path, None)
sidebar_group = ctx.params.get("sidebar_group", None)
include_counts = ctx.params.get("include_counts", False)
group_by = ctx.params.get("group_by", None)
read_only = ctx.params.get("read_only", True)
create_index = ctx.params.get("create_index", True)

if not field_name:
field_name = None

if not sidebar_group:
sidebar_group = False

Expand All @@ -1148,6 +1184,12 @@ def execute(self, ctx):
ctx.trigger("reload_dataset")


def _get_dynamic(params, key, ref_path, default=None):
dynamic_key = key + "|" + ref_path.replace(".", "_")
value = params.get(dynamic_key, default)
return dynamic_key, value


def _create_summary_field_inputs(ctx, inputs):
schema = ctx.dataset.get_field_schema(flat=True)
if ctx.dataset._has_frame_fields():
Expand All @@ -1159,24 +1201,34 @@ def _create_summary_field_inputs(ctx, inputs):
}
)

categorical_field_types = (fo.StringField, fo.BooleanField)
categorical_field_types = (
fo.StringField,
fo.BooleanField,
fo.ObjectIdField,
)
numeric_field_types = (
fo.FloatField,
fo.IntField,
fo.DateField,
fo.DateTimeField,
)
valid_field_types = categorical_field_types + numeric_field_types

schema = {
p: f
field_keys = [
p
for p, f in schema.items()
if (
isinstance(f, categorical_field_types)
or isinstance(f, numeric_field_types)
isinstance(f, valid_field_types)
or (
isinstance(f, fo.ListField)
and isinstance(f.field, valid_field_types)
)
)
}
]

path_keys = list(schema.keys())
summarized_fields = set(ctx.dataset._get_summarized_fields_map())

path_keys = [p for p in field_keys if p not in summarized_fields]
path_selector = types.AutocompleteView()
for key in path_keys:
path_selector.add_choice(key, label=key)
Expand All @@ -1194,23 +1246,23 @@ def _create_summary_field_inputs(ctx, inputs):
if path is None or path not in path_keys:
return

field_name = ctx.params.get("field_name", None)
prop_name, field_name = _get_dynamic(ctx.params, "field_name", path, None)
if field_name is None:
default_field_name = ctx.dataset._get_default_summary_field_name(path)
else:
default_field_name = field_name

field_name_prop = inputs.str(
"field_name",
prop = inputs.str(
prop_name,
required=False,
label="Summary field",
description="The sample field in which to store the summary data",
default=default_field_name,
)

if field_name and field_name in path_keys:
field_name_prop.invalid = True
field_name_prop.error_message = f"Field '{field_name}' already exists"
if field_name and field_name in schema:
prop.invalid = True
prop.error_message = f"Field '{field_name}' already exists"
inputs.str(
"error",
label="Error",
Expand Down Expand Up @@ -1254,7 +1306,7 @@ def _create_summary_field_inputs(ctx, inputs):
)
elif isinstance(field, numeric_field_types):
group_prefix = path.rsplit(".", 1)[0] + "."
group_by_keys = sorted(p for p in schema if p.startswith(group_prefix))
group_by_keys = [p for p in field_keys if p.startswith(group_prefix)]
group_by_selector = types.AutocompleteView()
for group in group_by_keys:
group_by_selector.add_choice(group, label=group)
Expand Down

0 comments on commit 2b28b83

Please sign in to comment.