-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathexample.yaml
287 lines (286 loc) · 9.5 KB
/
example.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
id: urn:dmb:dp:my_domain:my_data_product:1
name: my data product
fullyQualifiedName: My Data Product
description: this data product is representing the xxx functional entity
kind: dataproduct
domain: my_domain
version: 1.0.0
environment: development
dataProductOwner: tom_smith_corp.com
dataProductOwnerDisplayName: Tom Smith
email: mailto:[email protected]
ownerGroup: dataproduct1_corp.com
devGroup: dataproduct1_dev_corp.com
ownerGroup: dataproduct1_corp.com
devGroup: dataproduct1_dev_corp.com
supportSLA:
supportHours: 8x5
responseTime: 1H
resolutionTime: undefined
informationTime: 2WD
endOfSupport: 01/01/2026
status: DRAFT
maturity: Strategic
billing: {}
businessInfo:
valueProposition: Unlock some capability for the organization
valueGeneration: OperationMonitoring
okr: increase the margin
pricingType: Subscription
stakeholderRoles:
- CMO
securityInfo:
visibility: Department
confidentiality: Confidential
gdpr: Yes
contacts:
ownerContact: [email protected]
suportContact: [email protected]
targetConsumption:
- Analytics
- Reporting
- OnlineApplication
tags: []
businessConcepts:
- tagFQN: Margin
source: Glossary
labelType: Manual
state: Confirmed
specific: {}
components:
- id: urn:dmb:cmp:my_domain:my_data_product:1:my_raw_s3_port
name: my raw s3 port
fullyQualifiedName: My Raw S3 Port
description: s3 raw output port
kind: outputport
version: 1.0.1
infrastructureTemplateId: microservice-id-1
useCaseTemplateId: template-id-1
dependsOn: []
platform: CDP on AWS
technology: s3_cdp
outputPortType: Files
creationDate: 05-04-2022T16:53:00.000Z
startDate:
processDescription: this output port is generated by a Spark Job scheduled every day at 2AM and it lasts for approx 2 hours
dataContract:
schema: []
SLA:
intervalOfChange: 1 hours
timeliness: 1 minutes
upTime: 99.9%
endpoint: https://myurl/development/my_domain/my_data_product/1.0.0/my_raw_s3_port
dataSharingAgreements:
termsAndConditions: only usable in development environment
purpose: this output port want to provide a rich set of profitability KPIs related to the customer
billing: 5$ for each full scan
security: In order to consume this output port an additional security check with compliance must be done
intendedUsage: the dataset is huge so it is recommended to extract maximum 1 year of data and to use these KPIs in the marketing or sales domain, but not for customer care
limitations: is not possible to use this data without a compliance check
lifeCycle: the maximum retention is 10 years, and eviction is happening on the first of january
confidentiality: if you want to store this data somewhere else, PII columns must be masked
tags:
- tagFQN: experimental
source: Tag
labelType: Manual
state: Confirmed
- tagFQN: structured
source: Tag
labelType: Manual
state: Confirmed
sampleData: {}
sampleQuery: select * from dp.table
semanticLinking: {}
specific:
directory: history
bucket: ms-datamesh-s3
- id: urn:dmb:cmp:my_domain:my_data_product:1:my_view_impala_port
name: my view impala port
fullyQualifiedName: My View Impala Port
description: impala view output port
kind: outputPort
version: 1.1.0
infrastructureTemplateId: microservice-id-2
useCaseTemplateId: template-id-2
dependsOn: [urn:dmb:cmp:my_domain:my_data_product:1:my_raw_s3_port]
platform: CDP on AWS
technology: impala_cdp
outputPortType: SQL
creationDate: 05-04-2022T17:00:00.000Z
startDate:
retentionTime:
processDescription:
dataContract:
schema:
- name: employeeId
dataType: string
description: global addressable identifier for an employee.
constraint: PRIMARY_KEY
tags:
- tagFQN: GlobalAddressableIdentifier
source: Tag
labelType: Manual
state: Confirmed
- name: first_name
dataType: string
description: employee's first name
constraint: NOT_NULL
tags:
- tagFQN: PII
source: Tag
labelType: Manual
state: Confirmed
- name: last_name
dataType: string
description: employee's last name
constraint: NOT_NULL
tags:
- tagFQN: PII
source: Tag
labelType: Manual
state: Confirmed
- name: birthdate
dataType: date
description: employee's birthdate
constraint: NOT_NULL
tags: []
- name: gender
dataType: string
description: employee's gender
constraint: NOT_NULL
tags: []
- name: residential_address
dataType: struct
description: employee's residential address
constraint: NOT_NULL
tags:
- tagFQN: PII
source: Tag
labelType: Manual
state: Confirmed
businessTerms:
- tagFQN: BusinessAddress
source: Glossary
labelType: Manual
state: Confirmed
- name: first_hire_date
dataType: date
description: the date of his/her first hire in mybank. No matter is a temporary or permanent contract
constraint: NOT_NULL
tags: []
- name: last_working_date
dataType: date
description: the last day the employee worked for mybank
constraint: NULL
tags: []
- name: last_update
dataType: date
description: the last date the record has been updated
constraint: NULL
tags: []
- name: businessTs
dataType: timestamp
description: the business timestamp, to be leveraged for time-travelling
constraint: NOT_NULL
tags: []
- name: writeTs
dataType: timestamp
description: the technical (write) timestamp, to be leveraged for time-travelling
constraint: NOT_NULL
tags: []
SLA:
intervalOfChange: 1 hours
timeliness: 1 minutes
upTime: 99.9%
termsAndConditions: only usable in development environment
endpoint: https://myurl/development/my_domain/my_data_product/1.0.0/my_raw_s3_port
biTempBusinessTs: businessTs
biTempWriteTs: writeTs
dataSharingAgreements:
purpose: this output port want to provide a rich set of profitability KPIs related to the customer
billing: 5$ for each full scan
security: In order to consume this output port an additional security check with compliance must be done
intendedUsage: the dataset is huge so it is recommended to extract maximum 1 year of data and to use these KPIs in the marketing or sales domain, but not for customer care
limitations: is not possible to use this data without a compliance check
lifeCycle: the maximum retention is 10 years, and eviction is happening on the first of january
confidentiality: if you want to store this data somewhere else, PII columns must be masked
tags: []
sampleData:
columns:
- name
- surname
rows:
- - Jace
- Beleren
- - Gideon
- Jura
- - Chandra
- Nalaar
semanticLinking: {}
specific:
database: my_database
table: my_table
location: /my_path
schema:
firstName: string
lastName: string
format: PARQUET
- id: urn:dmb:cmp:my_domain:my_data_product:1:my_spark_workload
name: my spark workload
fullyQualifiedName: My Spark workload
description: spark batch workload
kind: workload
version: 1.1.1
infrastructureTemplateId: microservice-id-3
useCaseTemplateId: template-id-3
platform: CDP on AWS
technology: spark
workloadType: batch
connectionType: DataPipeline
tags: []
readsFrom: [urn:dmb:ex:mainframe_db2_database]
specific:
artifactory: ms-datamesh-s3
artefact: /path/to/my/spark/workload.jar
service: my_cdp_service
cluster: my_cde_cluster
className: com.mycompany.MySparkApp
args:
- arg1
- arg2
driverCores: 1
driverMemory: 4g
executorCores: 4
executorMemory: 4g
numExecutors: 3
schedule:
cronExpression: 0 0 0,22 ? * * *
- id: urn:dmb:cmp:my_domain:my_data_product:1:my_observability
name: my observability
fullyQualifiedName: My Observability
description: observability for my data product
kind: observability
infrastructureTemplateId: microservice-id-4
useCaseTemplateId: template-id-4
version: 1.1.1
endpoint: http://develop/my_domain/my_data_product/1.0.0/obs
completeness:
dataProfiling:
freshness:
availability:
dataQuality:
specific:
restApiName: obs_api
stageName: data_mesh
bucket: ms-datamesh-s3
obsEndpoint:
- artifact: path/to/my/obs_dq.jar
handler: com.mycompany.MyHandler::handleRequest
lambdaname: my_data_product_obs_dq
awsResourceName: my_data_product_obs_dq
awsResourcePath: /data_quality
- artifact: path/to/my/obs_workload.jar
handler: com.mycompany.MyHandler::handleRequest
lambdaname: my_data_product_obs_workload
awsResourceName: my_data_product_obs_workload
awsResourcePath: /workload