diff --git a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py index 5e8413bbb6f30..c9b0e4d7de467 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py +++ b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py @@ -140,6 +140,10 @@ class OktaConfig(StatefulIngestionConfigBase, ConfigModel): default=None, description="Okta search expression (not regex) for ingesting groups. Only one of `okta_groups_filter` and `okta_groups_search` can be set. See (https://developer.okta.com/docs/reference/api/groups/#list-groups-with-search) for more info.", ) + skip_users_without_a_group: bool = Field( + default=False, + description="Whether to only ingest users that are members of groups. If this is set to False, all users will be ingested regardless of group membership.", + ) # Configuration for stateful ingestion stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field( @@ -387,6 +391,15 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: datahub_corp_user_snapshot.urn ] ) + if ( + self.config.skip_users_without_a_group + and len(datahub_group_membership.groups) == 0 + ): + logger.debug( + f"Filtering {datahub_corp_user_snapshot.urn} due to group filter" + ) + self.report.report_filtered(datahub_corp_user_snapshot.urn) + continue assert datahub_group_membership is not None datahub_corp_user_snapshot.aspects.append(datahub_group_membership) mce = MetadataChangeEvent(proposedSnapshot=datahub_corp_user_snapshot)