Skip to content

Commit

Permalink
Add support for UTF8 to XDMoD
Browse files Browse the repository at this point in the history
  • Loading branch information
jpwhite4 committed Dec 16, 2024
1 parent efd0d95 commit 6c86451
Show file tree
Hide file tree
Showing 31 changed files with 463 additions and 35 deletions.
5 changes: 5 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ jobs:
PATH=/usr/local/node-v10.24.1-linux-x64/bin:$PATH ./tests/ci/samlSetup.sh
PATH=/usr/local/node-v10.24.1-linux-x64/bin:$PATH ./tests/ui/runtests.sh --headless --log-junit ~/phpunit --sso
./vendor/phpunit/phpunit/phpunit -c ./tests/integration/phpunit.xml.dist --testsuite sso --log-junit ~/phpunit/xdmod-sso-integration.xml
- run:
name: 'Run Post Install / Upgrade Tests'
command: |
./tests/post/bootstrap.sh
./tests/post/runtests.sh
- run:
name: Ensure that no unexpected Apache errors were generated ( We expect PHP Deprecated )
command: >
Expand Down
4 changes: 2 additions & 2 deletions classes/DB/FilterListBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ private function buildDimensionLists(iQuery $realmQuery, iGroupBy $groupBy, iRea
"CREATE TABLE `{$targetSchema}`.`{$mainTableName}` (
`{$dimensionId}` {$dimensionColumnType} NOT NULL,
PRIMARY KEY (`{$dimensionId}`)
);"
) CHARACTER SET utf8 COLLATE utf8_unicode_ci"
);
$dimensionQuery = $this->createDimensionQuery($realmQuery, $groupBy);

Expand Down Expand Up @@ -222,7 +222,7 @@ private function buildDimensionLists(iQuery $realmQuery, iGroupBy $groupBy, iRea
`{$secondDimensionId}` {$secondDimensionColumnType} NOT NULL,
PRIMARY KEY (`{$firstDimensionId}`, `{$secondDimensionId}`),
INDEX `idx_second_dimension` (`{$secondDimensionId}` ASC)
)"
) CHARACTER SET utf8 COLLATE utf8_unicode_ci"
);

$firstSelectTables = $firstDimensionQuery->getSelectTables();
Expand Down
2 changes: 1 addition & 1 deletion classes/DB/PDODBMultiIngestor.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class PDODBMultiIngestor implements Ingestor
* If set to null then the mysql defaults will be used.
* This is only used in LOAD INFILE mode.
*/
protected $_character_set = null;
protected $_character_set = 'utf8';

/**
* Helper instance for destination database.
Expand Down
18 changes: 11 additions & 7 deletions classes/ETL/DbModel/Table.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ class Table extends SchemaEntity implements iEntity, iDiscoverableEntity, iAlter
'engine' => null,

// Optional table default character set
'charset' => null,
'charset' => 'utf8',

// Optional table collation
'collation' => null,
'collation' => 'utf8_unicode_ci',

// Associative array where the keys are column names and the values are Column objects
'columns' => array(),
Expand Down Expand Up @@ -837,11 +837,15 @@ public function getAlterSql($destination, $includeSchema = true)
}

if ( null !== $destination->charset && $this->charset != $destination->charset ) {
$alterList[] = sprintf("CHARSET = %s", $destination->charset);
}

if ( null !== $destination->collation && $this->collation != $destination->collation ) {
$alterList[] = sprintf("COLLATE = %s", $destination->collation);
$collation_definition = '';
if ($destination->collation !== null) {
$collation_definition = ' COLLATE ' . $destination->collation;
}
$alterList[] = sprintf("CONVERT TO CHARACTER SET %s %s", $destination->charset, $collation_definition);
} else {
if ( null !== $destination->collation && $this->collation != $destination->collation ) {
$alterList[] = sprintf("COLLATE = %s", $destination->collation);
}
}

if ( $this->comment != $destination->comment ) {
Expand Down
2 changes: 2 additions & 0 deletions classes/ETL/Ingestor/pdoIngestor.php
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,7 @@ private function multiDatabaseIngest()

if ( $this->options->force_load_data_infile_replace_into ) {
$loadStatement = "LOAD DATA LOCAL INFILE '$infileName' replace into table $qualifiedDestTableName "
. "CHARACTER SET utf8 "
. "FIELDS TERMINATED BY " . sprintf("0x%02x", ord($this->fieldSeparator))
. " OPTIONALLY ENCLOSED BY " . sprintf("0x%02x", ord($this->stringEnclosure))
. " ESCAPED BY " . sprintf("0x%02x", ord($this->escapeChar))
Expand All @@ -639,6 +640,7 @@ function ($s) {
$loadStatement = "CREATE TABLE $tmpTable LIKE $qualifiedDestTableName; "
. "ALTER TABLE $tmpTable DISABLE KEYS; "
. "LOAD DATA LOCAL INFILE '$infileName' INTO TABLE $tmpTable "
. "CHARACTER SET utf8 "
. "FIELDS TERMINATED BY " . sprintf("0x%02x", ord($this->fieldSeparator))
. " OPTIONALLY ENCLOSED BY " . sprintf("0x%02x", ord($this->stringEnclosure))
. " ESCAPED BY " . sprintf("0x%02x", ord($this->escapeChar))
Expand Down
3 changes: 0 additions & 3 deletions classes/OpenXdmod/Shredder/Slurm.php
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,6 @@ public function shredLine($line)
return;
}

// Convert job name encoding.
$job['job_name'] = mb_convert_encoding($job['job_name'], 'ISO-8859-1', 'UTF-8');

// Convert datetime strings into unix timestamps.
$dateKeys = array(
'submit_time',
Expand Down
146 changes: 146 additions & 0 deletions configuration/etl/etl.d/xdmod-migration-11_0_0-11_5_0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
{
"module": "xdmod",
"defaults": {
"global": {
"class": "ManageTables",
"namespace": "ETL\\Maintenance",
"options_class": "MaintenanceOptions",
"endpoints": {
"source": {
"type": "mysql",
"name": "XDMoD Database",
"config": "database",
"schema": "moddb",
"create_schema_if_not_exists": true
},
"destination": {
"type": "mysql",
"name": "XDMoD Database",
"config": "database",
"schema": "moddb",
"create_schema_if_not_exists": true
}
}
}
},
"migration-11_0_0-11_5_0": [
{
"name": "update-moddb-tables",
"description": "Update managed tables in moddb",
"definition_file_list": [
"xdb/account-requests.json",
"xdb/api-keys.json",
"xdb/chart-pool.json",
"xdb/colors.json",
"xdb/exception-email-addresses.json",
"xdb/report-charts.json",
"xdb/report-template-charts.json",
"xdb/report-templates.json",
"xdb/reports.json",
"xdb/restx509.json",
"xdb/session-manager.json",
"xdb/users.json",
"xdb/user-profiles.json",
"xdb/user-types.json",
"xdb/version-check.json",
"xdb/batch-export-requests.json"
]
},
{
"name": "update-shredder-tables",
"description": "Update managed tables in mod_shredder",
"endpoints": {
"destination": {
"type": "mysql",
"name": "Shredder/Staging Database",
"config": "database",
"schema": "mod_shredder"
}
},
"definition_file_list": [
"jobs/shredder/job-lsf.json",
"jobs/shredder/job-pbs.json",
"jobs/shredder/job-sge.json",
"jobs/shredder/job.json",
"storage/staging/mountpoint.json",
"storage/staging/usage.json"
]
},
{
"name": "update-log-tables",
"description": "Update managed tables in mod_logger",
"endpoints": {
"destination": {
"type": "mysql",
"name": "Logging Database",
"config": "database",
"schema": "mod_logger"
}
},
"definition_file_list": [
"logger/log_id_seq.json",
"logger/log_level.json",
"logger/log_table.json"
]
},
{
"name": "update-cloud-tables",
"description": "Update managed tables in modw_cloud",
"endpoints": {
"destination": {
"type": "mysql",
"name": "Cloud Database",
"config": "database",
"schema": "modw_cloud"
}
},
"definition_file_list": [
"cloud_common/domains.json"
]
},
{
"name": "update-modw-tables",
"description": "Update managed tables in modw",
"endpoints": {
"destination": {
"type": "mysql",
"name": "Datawarehouse database",
"config": "database",
"schema": "modw"
}
},
"definition_file_list": [
"jobs/job_request_info.json",
"jobs/job_task_countable.json",
"jobs/meta_job.json",
"jobs/meta_job_record.json",
"jobs/xdw/processor-buckets.json",
"jobs/xdw/error-descriptions.json",
"jobs/job_record_type.json",
"jobs/job_task_type.json",
"jobs/submission_venue.json",
"jobs/countable_type.json",
"jobs/xdw/job-times.json",
"jobs/xdw/gpu-buckets.json",
"common/staging/resource-type.json",
"jobs/xdw/job-wait-times.json",
"jobs/xdw/federation-instances.json",
"jobs/unit.json",
"jobs/xdw/days.json",
"jobs/xdw/months.json",
"jobs/xdw/quarters.json",
"jobs/xdw/years.json"
]
},
{
"name": "update-nonmanaged-tables",
"description": "Update tables in modw that are not managed by etlv2",
"namespace": "ETL\\Maintenance",
"class": "ExecuteSql",
"options_class": "MaintenanceOptions",
"sql_file_list": [
"migrations/11.0.0-11.5.0/mod_shredder/shredded_job_slurm.sql"
]
}
]
}
2 changes: 1 addition & 1 deletion configuration/etl/etl_sql.d/jobs/shredder/job-slurm.sql
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ CREATE TABLE ${DESTINATION_SCHEMA}.`shredded_job_slurm` (
`node_list` mediumtext NOT NULL,
PRIMARY KEY (`shredded_job_slurm_id`),
UNIQUE KEY `job` (`cluster_name`(20),`job_id`,`job_array_index`,`submit_time`,`end_time`)
) ENGINE=InnoDB//
) ENGINE=InnoDB CHARACTER SET utf8 COLLATE utf8_unicode_ci//
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE `mod_shredder`.`shredded_job_slurm` CONVERT TO CHARACTER SET utf8 COLLATE utf8_unicode_ci//
2 changes: 1 addition & 1 deletion configuration/etl/etl_tables.d/cloud_common/domains.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"table_definition": {
"name": "domains",
"engine": "InnoDB",
"charset": "utf8",
"comment": "Which domains are currently being tracked by the Cloud realm",
"columns": [
{
Expand All @@ -23,6 +22,7 @@
"name": "name",
"type": "varchar(64)",
"nullable": false,
"collation": "utf8_unicode_ci",
"comment": "The human readable internal name, as received from the resource."
}
],
Expand Down
4 changes: 2 additions & 2 deletions configuration/etl/etl_tables.d/logger/log_level.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"schema": "mod_logger",
"name": "log_level",
"engine": "InnoDB",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"charset": "utf8",
"collation": "utf8_unicode_ci",
"columns": [
{
"name": "log_level_id",
Expand Down
4 changes: 2 additions & 2 deletions configuration/etl/etl_tables.d/logger/log_table.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"schema": "mod_logger",
"name": "log_table",
"engine": "InnoDB",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"charset": "utf8",
"collation": "utf8_unicode_ci",
"columns": [
{
"name": "id",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,22 @@
"table_definition": {
"name": "modify_table_test",
"engine": "MyISAM",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"columns": [
{
"#": "New column at start of list",
"name": "new_column_1",
"type": "varchar(40)",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"nullable": true
},
{
"name": "resource",
"type": "varchar(40)",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"nullable": true
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,22 @@
"table_definition": {
"name": "modify_table_test",
"engine": "MyISAM",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"columns": [
{
"name": "resource",
"type": "varchar(40)",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"nullable": true
},
{
"#": "This was the 1st column",
"name": "new_column_1",
"type": "varchar(40)",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"nullable": true
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,21 @@
"table_definition": {
"name": "modify_table_test",
"engine": "MyISAM",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"columns": [
{
"name": "resource",
"type": "varchar(40)",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"nullable": true
},
{
"name": "new_column_1",
"type": "varchar(40)",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"nullable": true
},
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
{
"comment": "",
"engine": "myisam",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"charset": "utf8",
"collation": "utf8_unicode_ci",
"columns": [
{
"type": "varchar(40)",
"charset": "latin1",
"collation": "latin1_swedish_ci",
"charset": "utf8",
"collation": "utf8_unicode_ci",
"nullable": true,
"default": null,
"extra": null,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
ALTER TABLE `test_db_model`
CHARSET = utf8mb4,
COLLATE = utf8mb4_general_ci,
CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
DROP INDEX `fk_instance`,
ADD INDEX `fk_instance` USING BTREE (`instance_id`, `inferred`);
ALTER TABLE `test_db_model`
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
ALTER TABLE `test_db_model`
CHARSET = utf8mb4,
COLLATE = utf8mb4_general_ci,
CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci,
ADD COLUMN `new_column` boolean NOT NULL DEFAULT 0 AFTER `inferred`,
ADD COLUMN `new_column2` char(64) CHARSET utf8mb4 COLLATE utf8mb4_general_ci NULL AFTER `new_column`,
ADD INDEX `index_new_column` (`new_column`),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ CREATE TABLE IF NOT EXISTS `resourceallocationfact_by_quarter` (
INDEX `index_year` (`year`),
INDEX `index_organization_id` (`organization_id`),
INDEX `index_resource_id` (`resource_id`)
) ENGINE = myisam COMMENT = 'Resource Allocation facts aggregated by quarter.';
) ENGINE = myisam CHARSET = utf8 COLLATE = utf8_unicode_ci COMMENT = 'Resource Allocation facts aggregated by quarter.';
Loading

0 comments on commit 6c86451

Please sign in to comment.