Skip to content

Commit

Permalink
feat(protobuf): adding deprecation support for datasets and fields (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
leifker authored May 4, 2022
1 parent 23f657e commit c22d52d
Show file tree
Hide file tree
Showing 24 changed files with 403 additions and 100 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ import "google/protobuf/descriptor.proto";
*/
enum DataHubMetadataType {
PROPERTY = 0; // Datahub Custom Property
TAG = 1; // Datahub Tag
TAG_LIST = 2; // comma delimited string
TERM = 3; // Datahub Term
OWNER = 4; // Datahub Owner
DOMAIN = 5; // Datahub Domain
PROPERTY = 0; // Datahub Custom Property
TAG = 1; // Datahub Tag
TAG_LIST = 2; // comma delimited string
TERM = 3; // Datahub Term
OWNER = 4; // Datahub Owner
DOMAIN = 5; // Datahub Domain
DEPRECATION = 6; // Datahub Deprecation
}

// Assuming Glossary Term defined from bootstrap example
Expand Down Expand Up @@ -96,6 +97,9 @@ message lifecycle {
bool archived = 5500 [(datahubField.type) = TAG, (datahubField.type) = PROPERTY];
Frequency frequency = 5510 [(datahubField.type) = TAG, (datahubField.type) = PROPERTY];
string ttl = 5520 [(datahubField.type) = TAG];

repeated string deprecation_note = 5530 [(datahubField.type) = DEPRECATION];
uint64 deprecation_time = 5531 [(datahubField.type) = DEPRECATION];
}
}

Expand All @@ -110,6 +114,9 @@ message message {

string tags = 5600 [(datahubField.type) = TAG_LIST];
MessageType type = 5610 [(datahubField.type) = TAG, (datahubField.type) = PROPERTY];

repeated string deprecation_note = 5620 [(datahubField.type) = DEPRECATION, (datahubField.type) = PROPERTY];
uint64 deprecation_time = 5621 [(datahubField.type) = DEPRECATION, (datahubField.type) = PROPERTY];
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ import "google/protobuf/timestamp.proto";
Clickstream impressions
**/
message Impression {
option deprecated = true;
option(meta.lifecycle.deprecation_note) = "Impression events are deprecated.";
option(meta.lifecycle.deprecation_time) = 1649693315;

option(meta.message.type) = EVENT;
option(meta.kafka.topics) = "clickstream_impressions";

Expand Down Expand Up @@ -36,5 +40,7 @@ message Impression {
(meta.fieldTags.tag_list) = "a, b, c",
(meta.securityField.classification_enum) = HighlyConfidential,
(meta.securityField.classification) = "Classification.Sensitive"
]; // event details
]; // event details

string deprecated_field = 3 [deprecated = true];
}
101 changes: 93 additions & 8 deletions metadata-integration/java/datahub-protobuf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ enum DataHubMetadataType {
TERM = 3; // Datahub Term
OWNER = 4; // Datahub Owner
DOMAIN = 5; // Datahub Domain
DEPRECATION = 6; // Datahub Deprecation
}
/*
Expand Down Expand Up @@ -210,20 +211,24 @@ message msg {
meta.MetaEnumExample type = 60004 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY];
bool bool_feature = 60005 [(meta.fld.type) = TAG];
string alert_channel = 60007 [(meta.fld.type) = PROPERTY];
repeated string deprecation_note = 60008 [(meta.fld.type) = DEPRECATION, (meta.fld.type) = PROPERTY];
uint64 deprecation_time = 60009 [(meta.fld.type) = DEPRECATION, (meta.fld.type) = PROPERTY];
}
}
```

#### DataHubMetadataType

| DataHubMetadataType | String | Bool | Enum | Repeated |
|---------------------|--------|------|------|----------|
| PROPERTY | X | X | X | X |
| TAG | X | X | X | |
| TAG_LIST | X | | | |
| TERM | X | | X | |
| OWNER | X | | | X |
| DOMAIN | X | | | X |
| DataHubMetadataType | String | Bool | Enum | Repeated | Uint64 |
|---------------------|-----------|------|------|-----------|----------|
| PROPERTY | X | X | X | X | |
| TAG | X | X | X | | |
| TAG_LIST | X | | | | |
| TERM | X | | X | | |
| OWNER | X | | | X | |
| DOMAIN | X | | | X | |
| DEPRECATION | X (notes) | | | X (notes) | X (time) |

##### PROPERTY

Expand Down Expand Up @@ -361,6 +366,33 @@ The dot delimited prefix also works with enum types where the prefix is the enum
}
```

In addition, tags can be added to fields as well as messages. The following is a consolidated example for all the possible tag options on fields.

```protobuf
enum MetaEnumExample {
UNKNOWN = 0;
ENTITY = 1;
EVENT = 2;
}
message fld {
extend google.protobuf.FieldOptions {
string tags = 6000 [(meta.fld.type) = TAG_LIST];
string tagString = 6001 [(meta.fld.type) = TAG];
bool tagBool = 6002 [(meta.fld.type) = TAG];
MetaEnumExample tagEnum = 6003 [(meta.fld.type) = TAG];
}
}
message Message {
uint32 my_field = 1
[(meta.fld.tags) = "a, b, c",
(meta.fld.tagString) = "myTag",
(meta.fld.tagBool) = true,
(meta.fld.tagEnum) = ENTITY];
}
```

##### TERM

Terms are specified by either a fully qualified string value or an enum where the enum type's name is the first element in the fully qualified term name.
Expand All @@ -387,6 +419,29 @@ The following example shows both methods, either of which would result in the te
}
```

The following is a consolidated example for the possible field level term options.

```protobuf
enum Classification {
HighlyConfidential = 0;
Confidential = 1;
Sensitive = 2;
}
message fld {
extend google.protobuf.FieldOptions {
Classification term = 5000 [(meta.fld.type) = TERM];
string class = 5001 [(meta.fld.type) = TERM];
}
}
message Message {
uint32 my_field = 1
[(meta.fld.term) = HighlyConfidential,
(meta.fld.class) = "Classification.HighlyConfidential"];
}
```

##### OWNER

One or more owners can be specified and can be any combination of `corpUser` and `corpGroup` entities. The default entity type is `corpGroup`. By default, the ownership type is set to `producer`, see the second example for setting the ownership type.
Expand Down Expand Up @@ -438,6 +493,36 @@ Set the domain id for the dataset. The domain should exist already. Note that th
}
```

##### DEPRECATION

Deprecation of fields and messages are natively supported by protobuf options.
The standard "Deprecation" aspect is used for a dataset generated from a protobuf `message`.
Field deprecation adds a tag with the following urn `urn:li:tag:deprecated` (red, #FF000).

```protobuf
message msg {
extend google.protobuf.MessageOptions {
repeated string deprecation_note = 5620 [(meta.fld.type) = DEPRECATION];
uint64 deprecation_time = 5621 [(meta.fld.type) = DEPRECATION];
}
}
message Message {
option deprecated = true;
option (meta.msg.deprecation_note) = "Deprecated for this other message.";
option (meta.msg.deprecation_note) = "Drop in replacement.";
option (meta.msg.deprecation_time) = 1649689387;
}
```

The field deprecation tag works without definition in `meta.proto` using the native protobuf option.

```protobuf
message Message {
uint32 my_field = 1 [deprecated = true];
}
```

## Gradle Integration

An example application is included which works with the `protobuf-gradle-plugin`, see the standalone [example project](../datahub-protobuf-example).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@
import datahub.protobuf.visitors.dataset.InstitutionalMemoryVisitor;
import datahub.protobuf.visitors.dataset.KafkaTopicPropertyVisitor;
import datahub.protobuf.visitors.dataset.OwnershipVisitor;
import datahub.protobuf.visitors.dataset.ProtobufExtensionPropertyVisitor;
import datahub.protobuf.visitors.dataset.ProtobufExtensionTagAssocVisitor;
import datahub.protobuf.visitors.dataset.ProtobufExtensionTermAssocVisitor;
import datahub.protobuf.visitors.dataset.PropertyVisitor;
import datahub.protobuf.visitors.dataset.TagAssociationVisitor;
import datahub.protobuf.visitors.dataset.TermAssociationVisitor;
import datahub.protobuf.visitors.field.SchemaFieldVisitor;
import datahub.event.MetadataChangeProposalWrapper;
import datahub.protobuf.visitors.field.ProtobufExtensionFieldVisitor;
import datahub.protobuf.visitors.tags.ProtobufExtensionTagVisitor;
import datahub.protobuf.visitors.tags.TagVisitor;

import javax.annotation.Nullable;
import java.io.IOException;
Expand Down Expand Up @@ -122,7 +122,7 @@ public ProtobufDataset build() throws IOException {
new ProtobufGraph(fileSet, messageName, filename), schema, auditStamp, fabricType)
.setMetadataChangeProposalVisitors(
List.of(
new ProtobufExtensionTagVisitor()
new TagVisitor()
)
)
.setFieldVisitor(new ProtobufExtensionFieldVisitor())
Expand All @@ -131,7 +131,7 @@ public ProtobufDataset build() throws IOException {
.datasetPropertyVisitors(
List.of(
new KafkaTopicPropertyVisitor(),
new ProtobufExtensionPropertyVisitor()
new PropertyVisitor()
)
)
.institutionalMemoryMetadataVisitors(
Expand All @@ -141,12 +141,12 @@ public ProtobufDataset build() throws IOException {
)
.tagAssociationVisitors(
List.of(
new ProtobufExtensionTagAssocVisitor()
new TagAssociationVisitor()
)
)
.termAssociationVisitors(
List.of(
new ProtobufExtensionTermAssocVisitor()
new TermAssociationVisitor()
)
)
.ownershipVisitors(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,15 @@
import com.google.protobuf.DescriptorProtos;
import com.google.protobuf.Descriptors;
import com.google.protobuf.ExtensionRegistry;
import com.linkedin.util.Pair;

import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.Function;
Expand Down Expand Up @@ -35,6 +40,93 @@ public static String collapseLocationComments(DescriptorProtos.SourceCodeInfo.Lo
return new String(orig.getBytes(StandardCharsets.ISO_8859_1));
}

/*
* Reflection used to prevent an exception deep inside the protobuf library due to a getter method
* mutating the json name field and causing an equality check to fail between an instance that has and has not
* had the getter called.
*
* https://github.com/protocolbuffers/protobuf/blob/main/java/core/src/main/java/com/google/protobuf/Descriptors.java#L1105
*
* java.lang.IllegalArgumentException: FieldDescriptors can only be compared to other FieldDescriptors for fields of the same message type.
* at com.google.protobuf.Descriptors$FieldDescriptor.compareTo(Descriptors.java:1344)
* at com.google.protobuf.Descriptors$FieldDescriptor.compareTo(Descriptors.java:1057)
* at java.base/java.util.TreeMap.put(TreeMap.java:566)
* at java.base/java.util.AbstractMap.putAll(AbstractMap.java:281)
* at java.base/java.util.TreeMap.putAll(TreeMap.java:325)
* at com.google.protobuf.GeneratedMessageV3$ExtendableMessage.getAllFields(GeneratedMessageV3.java:1240)
*
*/
private static final Method FIELD_OPT_EXT_FIELDS_METHOD;
private static final Method FIELD_OPT_ALL_FIELD_METHOD;
private static final Method MSG_OPT_EXT_FIELDS_METHOD;
private static final Method MSG_OPT_ALL_FIELD_METHOD;
static {
try {
FIELD_OPT_EXT_FIELDS_METHOD = DescriptorProtos.FieldOptions.class.getSuperclass()
.getDeclaredMethod("getExtensionFields");
FIELD_OPT_EXT_FIELDS_METHOD.setAccessible(true);

FIELD_OPT_ALL_FIELD_METHOD = DescriptorProtos.FieldOptions.class.getSuperclass().getSuperclass()
.getDeclaredMethod("getAllFieldsMutable", boolean.class);
FIELD_OPT_ALL_FIELD_METHOD.setAccessible(true);

MSG_OPT_EXT_FIELDS_METHOD = DescriptorProtos.MessageOptions.class.getSuperclass()
.getDeclaredMethod("getExtensionFields");
MSG_OPT_EXT_FIELDS_METHOD.setAccessible(true);

MSG_OPT_ALL_FIELD_METHOD = DescriptorProtos.MessageOptions.class.getSuperclass().getSuperclass()
.getDeclaredMethod("getAllFieldsMutable", boolean.class);
MSG_OPT_ALL_FIELD_METHOD.setAccessible(true);
} catch (NoSuchMethodException e) {
throw new RuntimeException(e);
}
}

public static List<Pair<Descriptors.FieldDescriptor, Object>> getFieldOptions(DescriptorProtos.FieldDescriptorProto fieldProto) {
try {
LinkedList<Pair<Descriptors.FieldDescriptor, Object>> options = new LinkedList<>();

options.addAll(((Map<Descriptors.FieldDescriptor, Object>) FIELD_OPT_EXT_FIELDS_METHOD.invoke(fieldProto.getOptions()))
.entrySet()
.stream()
.map(e -> Pair.of(e.getKey(), e.getValue()))
.collect(Collectors.toList()));

options.addAll(((Map<Descriptors.FieldDescriptor, Object>) FIELD_OPT_ALL_FIELD_METHOD.invoke(fieldProto.getOptions(), false))
.entrySet()
.stream()
.map(e -> Pair.of(e.getKey(), e.getValue()))
.collect(Collectors.toList()));

return options;
} catch (IllegalAccessException | InvocationTargetException e) {
throw new RuntimeException(e);
}
}

public static List<Pair<Descriptors.FieldDescriptor, Object>> getMessageOptions(DescriptorProtos.DescriptorProto messageProto) {
try {
LinkedList<Pair<Descriptors.FieldDescriptor, Object>> options = new LinkedList<>();

options.addAll(((Map<Descriptors.FieldDescriptor, Object>) MSG_OPT_EXT_FIELDS_METHOD.invoke(messageProto.getOptions()))
.entrySet()
.stream()
.map(e -> Pair.of(e.getKey(), e.getValue()))
.collect(Collectors.toList()));

options.addAll(((Map<Descriptors.FieldDescriptor, Object>) MSG_OPT_ALL_FIELD_METHOD.invoke(messageProto.getOptions(),
false))
.entrySet()
.stream()
.map(e -> Pair.of(e.getKey(), e.getValue()))
.collect(Collectors.toList()));

return options;
} catch (IllegalAccessException | InvocationTargetException e) {
throw new RuntimeException(e);
}
}

public static ExtensionRegistry buildRegistry(DescriptorProtos.FileDescriptorSet fileSet) {
ExtensionRegistry registry = ExtensionRegistry.newInstance();
Map<String, DescriptorProtos.FileDescriptorProto> descriptorProtoMap = fileSet.getFileList().stream()
Expand Down Expand Up @@ -94,7 +186,9 @@ private static Descriptors.FileDescriptor descriptorFromProto(

// Finally, construct the actual descriptor.
Descriptors.FileDescriptor[] empty = new Descriptors.FileDescriptor[0];
return Descriptors.FileDescriptor.buildFrom(descriptorProto, dependencies.build().toArray(empty), false);
Descriptors.FileDescriptor descript = Descriptors.FileDescriptor.buildFrom(descriptorProto, dependencies.build().toArray(empty), false);
descriptorCache.put(descript.getName(), descript);
return descript;
}

}
Expand Down
Loading

0 comments on commit c22d52d

Please sign in to comment.