Skip to content

Commit

Permalink
Update application.conf files
Browse files Browse the repository at this point in the history
  • Loading branch information
pflooky committed Jul 9, 2024
1 parent 791fbdb commit c26be6a
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 67 deletions.
139 changes: 76 additions & 63 deletions app/src/main/resources/application.conf
Original file line number Diff line number Diff line change
@@ -1,97 +1,129 @@
flags {
enableGeneratePlanAndTasks = false
enableGeneratePlanAndTasks = ${?ENABLE_GENERATE_PLAN_AND_TASKS}
enableCount = true
enableCount = ${?ENABLE_COUNT}
enableGenerateData = true
enableGenerateData = ${?ENABLE_GENERATE_DATA}
enableGeneratePlanAndTasks = false
enableGeneratePlanAndTasks = ${?ENABLE_GENERATE_PLAN_AND_TASKS}
enableRecordTracking = false
enableRecordTracking = ${?ENABLE_RECORD_TRACKING}
enableDeleteGeneratedRecords = false
enableDeleteGeneratedRecords = ${?ENABLE_DELETE_GENERATED_RECORDS}
enableFailOnError = true
enableFailOnError = ${?ENABLE_FAIL_ON_ERROR}
enableUniqueCheck = true
enableUniqueCheck = ${?ENABLE_UNIQUE_CHECK}
enableSinkMetadata = true
enableSinkMetadata = ${?ENABLED_SINK_METADATA}
enableSinkMetadata = ${?ENABLE_SINK_METADATA}
enableSaveReports = true
enableSaveReports = ${?ENABLED_SAVE_REPORTS}
enableSaveReports = ${?ENABLE_SAVE_REPORTS}
enableValidation = false
enableValidation = ${?ENABLED_VALIDATION}
enableValidation = ${?ENABLE_VALIDATION}
enableGenerateValidations = false
enableGenerateValidations = ${?ENABLE_GENERATE_VALIDATIONS}
enableAlerts = false
enableAlerts = ${?ENABLE_ALERTS}
}

folders {
generatedPlanAndTaskFolderPath = "/tmp"
generatedPlanAndTaskFolderPath = ${?GENERATED_PLAN_AND_TASK_FOLDER_PATH}
planFilePath = "/plan/customer-create-plan.yaml"
planFilePath = "app/src/test/resources/sample/plan/customer-create-plan.yaml"
planFilePath = ${?PLAN_FILE_PATH}
taskFolderPath = "/task"
taskFolderPath = "app/src/test/resources/sample/task"
taskFolderPath = ${?TASK_FOLDER_PATH}
recordTrackingFolderPath = "/tmp/data/generated/recordTracking"
recordTrackingFolderPath = ${?RECORD_TRACKING_FOLDER_PATH}
recordTrackingForValidationFolderPath = "/tmp/data/validation/recordTracking"
recordTrackingForValidationFolderPath = ${?RECORD_TRACKING_VALIDATION_FOLDER_PATH}
generatedReportsFolderPath = "app/src/test/resources/sample/html"
generatedReportsFolderPath = ${?GENERATED_REPORTS_FOLDER_PATH}
validationFolderPath = "app/src/test/resources/sample/validation"
validationFolderPath = ${?VALIDATION_FOLDER_PATH}
}

metadata {
numRecordsFromDataSource = 10000
numRecordsFromDataSource = ${?METADATA_NUM_RECORDS_FROM_DATA_SOURCE}
numRecordsFromDataSource = ${?NUM_RECORDS_FROM_DATA_SOURCE}
numRecordsForAnalysis = 10000
numRecordsForAnalysis = ${?METADATA_NUM_RECORDS_FOR_ANALYSIS}
numRecordsForAnalysis = ${?NUM_RECORDS_FOR_ANALYSIS}
oneOfDistinctCountVsCountThreshold = 0.1
oneOfDistinctCountVsCountThreshold = ${?METADATA_ONE_OF_DISTINCT_COUNT_VS_COUNT_THRESHOLD}
oneOfDistinctCountVsCountThreshold = ${?ONE_OF_DISTINCT_COUNT_VS_COUNT_THRESHOLD}
oneOfMinCount = 1000
oneOfMinCount = ${?ONE_OF_MIN_COUNT}
numGeneratedSamples = 10
numGeneratedSamples = ${?NUM_GENERATED_SAMPLES}
}

generation {
numRecordsPerBatch = 100000
numRecordsPerBatch = ${?GENERATION_NUM_RECORDS_PER_BATCH}
numRecordsPerBatch = 1000000
numRecordsPerBatch = ${?NUM_RECORDS_PER_BATCH}
}

validation {}
alert {}
validation {
numSampleErrorRecords = 5
numSampleErrorRecords = ${?NUM_SAMPLE_ERROR_RECORDS}
enableDeleteRecordTrackingFiles = true
enableDeleteRecordTrackingFiles = ${?ENABLE_DELETE_RECORD_TRACKING_FILES}
}

alert {
triggerOn = "all"
triggerOn = ${?ALERT_TRIGGER_ON}
slackAlertConfig {
token = ""
token = ${?ALERT_SLACK_TOKEN}
channels = []
channels = ${?ALERT_SLACK_CHANNELS}
}
}

runtime {
master = "local[*]"
master = ${?DATA_CATERER_MASTER}
config {
"spark.sql.cbo.enabled" = "true"
"spark.sql.adaptive.enabled" = "true"
"spark.sql.cbo.planStats.enabled" = "true"
"spark.sql.legacy.allowUntypedScalaUDF" = "true"
"spark.sql.statistics.histogram.enabled" = "true"
"spark.sql.shuffle.partitions" = "10"
"spark.sql.catalog.postgres" = ""
"spark.sql.catalog.cassandra" = "com.datastax.spark.connector.datasource.CassandraCatalog"
"spark.hadoop.fs.s3a.directory.marker.retention" = "keep"
"spark.hadoop.fs.s3a.bucket.all.committer.magic.enabled" = "true"
#"spark.hadoop.fs.defaultFS" = "s3a://my-bucket"
"spark.sql.cbo.enabled": "true",
"spark.sql.adaptive.enabled": "true",
"spark.sql.cbo.planStats.enabled": "true",
"spark.sql.legacy.allowUntypedScalaUDF": "true",
"spark.sql.legacy.allowParameterlessCount": "true",
"spark.sql.statistics.histogram.enabled": "true",
"spark.sql.shuffle.partitions": "10",
"spark.sql.catalog.postgres": "",
"spark.sql.catalog.cassandra": "com.datastax.spark.connector.datasource.CassandraCatalog",
"spark.sql.catalog.iceberg": "org.apache.iceberg.spark.SparkCatalog",
"spark.sql.catalog.iceberg.type": "hadoop",
"spark.hadoop.fs.s3a.directory.marker.retention": "keep",
"spark.hadoop.fs.s3a.bucket.all.committer.magic.enabled": "true",
"spark.hadoop.fs.hdfs.impl": "org.apache.hadoop.hdfs.DistributedFileSystem",
"spark.hadoop.fs.file.impl": "com.globalmentor.apache.hadoop.fs.BareLocalFileSystem",
"spark.sql.extensions": "io.delta.sql.DeltaSparkSessionExtension,org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions"
}
}

json {
json {
}
}

csv {
csv {
}
}

parquet {
parquet {
}
}

# connection type
jdbc {
# connection name
postgres {
# connection details
postgresCustomer {
url = "jdbc:postgresql://localhost:5432/customer"
url = ${?POSTGRES_URL}
user = "postgres"
user = ${?POSTGRES_USERNAME}
user = ${?POSTGRES_USER}
password = "postgres"
password = ${?POSTGRES_PASSWORD}
driver = "org.postgresql.Driver"
}
postgresDvd {
url = "jdbc:postgresql://localhost:5432/dvdrental"
url = ${?POSTGRES_URL}
user = "postgres"
user = ${?POSTGRES_USERNAME}
password = "postgres"
password = ${?POSTGRES_PASSWORD}
driver = "org.postgresql.Driver"
stringtype = "unspecified"
}
mysql {
url = "jdbc:mysql://localhost:3306/customer"
url = ${?MYSQL_URL}
Expand All @@ -103,6 +135,7 @@ jdbc {
}
}


org.apache.spark.sql.cassandra {
cassandra {
spark.cassandra.connection.host = "localhost"
Expand Down Expand Up @@ -140,30 +173,10 @@ jms {

kafka {
kafka {
kafka.bootstrap.servers = "localhost:9092"
kafka.bootstrap.servers = "localhost:29092"
kafka.bootstrap.servers = ${?KAFKA_BOOTSTRAP_SERVERS}
}
}

parquet {
parquet {
path = "app/src/test/resources/sample"
path = ${?PARQUET_PATH}
}
}

json {
json {
path = "app/src/test/resources/sample"
path = ${?JSON_PATH}
}
}

csv {
csv {
path = "app/src/test/resources/sample"
path = ${?CSV_PATH}
}
}

datastax-java-driver.advanced.metadata.schema.refreshed-keyspaces = [ "/.*/" ]
13 changes: 9 additions & 4 deletions app/src/test/resources/application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,15 @@ runtime{
"spark.sql.legacy.allowUntypedScalaUDF" = "true"
"spark.sql.statistics.histogram.enabled" = "true"
"spark.sql.shuffle.partitions" = "10"
"spark.sql.catalog.postgres" = ""
"spark.sql.catalog.cassandra" = "com.datastax.spark.connector.datasource.CassandraCatalog"
"spark.hadoop.fs.s3a.directory.marker.retention" = "keep"
"spark.hadoop.fs.s3a.bucket.all.committer.magic.enabled" = "true"
"spark.sql.catalog.postgres": "",
"spark.sql.catalog.cassandra": "com.datastax.spark.connector.datasource.CassandraCatalog",
"spark.sql.catalog.iceberg": "org.apache.iceberg.spark.SparkCatalog",
"spark.sql.catalog.iceberg.type": "hadoop",
"spark.hadoop.fs.s3a.directory.marker.retention": "keep",
"spark.hadoop.fs.s3a.bucket.all.committer.magic.enabled": "true",
"spark.hadoop.fs.hdfs.impl": "org.apache.hadoop.hdfs.DistributedFileSystem",
"spark.hadoop.fs.file.impl": "com.globalmentor.apache.hadoop.fs.BareLocalFileSystem",
"spark.sql.extensions": "io.delta.sql.DeltaSparkSessionExtension,org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions"
}
}

Expand Down

0 comments on commit c26be6a

Please sign in to comment.