diff --git a/README.md b/README.md index 53dca5c9..e1a1852e 100644 --- a/README.md +++ b/README.md @@ -796,3 +796,45 @@ Either wait for scheduled event to occur or manually execute via: ```shell fw run evans-pres-repr ``` + +## duplicate-instance-report + +### Duplicate Instrance Report Workflow (Scheduled) + +This workflow emails a CSV after connects, queries, and disconnects from LDP. + +It has a signle query to build the report. + +Requires following path `/mnt/workflows/${tenantId}/duplicate-instance-report` to save `instance-report-report-${timestamp}.csv` to. + + +These variables are required when building and running the workflow: + +| Variable Name | Allowed Values | Brief Description | +| ------------------------------ | -------------- | ----------------- | +| ldp-url | URL | LDP URL. | +| ldp-user | string | LDP login username. | +| ldp-password | string | LDP login password. | +| duplicate-instance-report-from | e-mail address | The e-mail address of the report sender. | +| duplicate-instance-report-to | e-mail address | The e-mail address of the report recipient. | + +The scheduled event is for ... + +```shell +fw config set ldp-url *** +fw config set ldp-user *** +fw config set ldp-password *** +fw config set duplicate-instance-report-from *** +fw config set duplicate-instance-report-to *** +``` + +To build and activate: +```shell +fw build duplicate-instance-report +fw activate duplicate-instance-report +``` + +Either wait for scheduled event to occur or manually execute via: +```shell +fw run duplicate-instance-report +``` diff --git a/duplicate-instance-report/nodes/connectToLdp.json b/duplicate-instance-report/nodes/connectToLdp.json new file mode 100644 index 00000000..63809590 --- /dev/null +++ b/duplicate-instance-report/nodes/connectToLdp.json @@ -0,0 +1,13 @@ +{ + "id": "11f065f0-f1ea-47ed-abc9-146099572e7b", + "name": "Connect LDP", + "description": "Connect to the LDP.", + "deserializeAs": "DatabaseConnectionTask", + "inputVariables": [], + "outputVariable": {}, + "designation": "ldp", + "url": "{{{ldp-url}}}", + "username": "{{{ldp-user}}}", + "password": "{{{ldp-password}}}", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/disconnectFromLDP.json b/duplicate-instance-report/nodes/disconnectFromLDP.json new file mode 100644 index 00000000..f1300cad --- /dev/null +++ b/duplicate-instance-report/nodes/disconnectFromLDP.json @@ -0,0 +1,10 @@ +{ + "id": "db806bf5-49b8-4f8a-bf96-fc0258d3c31e", + "name": "Disconnect LDP", + "description": "Disconnect from the LDP.", + "deserializeAs": "DatabaseDisconnectTask", + "inputVariables": [], + "outputVariable": {}, + "designation": "ldp", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/email.json b/duplicate-instance-report/nodes/email.json new file mode 100644 index 00000000..ac7a5423 --- /dev/null +++ b/duplicate-instance-report/nodes/email.json @@ -0,0 +1,29 @@ +{ + "id": "da165c76-b891-4d01-9fbb-f8da1b1a6d12", + "name": "Email Duplicate Instance Report", + "description": "Email CSV with duplicate instances hrids and match criterium.", + "deserializeAs": "EmailTask", + "inputVariables": [ + { + "key": "timestamp", + "type": "PROCESS" + }, + { + "key": "tenantId", + "type": "PROCESS" + }, + { + "key": "count", + "type": "PROCESS" + } + ], + "outputVariable": {}, + "mailFrom": "{{{duplicate-instance-report-from}}}", + "mailTo": "{{{duplicate-instance-report-to}}}", + "mailText": "The Instance Duplication Report has completed, see the results attached.\n${count} instance matches found", + "mailMarkup": "

The Instance Duplication Report has completed, see the results attached.


${count} instance matches found", + "mailSubject": "Instance Duplication Report - LDP {{{ldp-url}}}", + "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-report-report-${timestamp}.csv", + "includeAttachment": "${count}", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/end.json b/duplicate-instance-report/nodes/end.json new file mode 100644 index 00000000..72be1aa8 --- /dev/null +++ b/duplicate-instance-report/nodes/end.json @@ -0,0 +1,6 @@ +{ + "id": "3c9848b3-f5a4-4753-b916-2b73c88d9409", + "name": "End", + "description": "End of duplicate instance report workflow", + "deserializeAs": "EndEvent" +} diff --git a/duplicate-instance-report/nodes/query.json b/duplicate-instance-report/nodes/query.json new file mode 100644 index 00000000..82a2f6a2 --- /dev/null +++ b/duplicate-instance-report/nodes/query.json @@ -0,0 +1,27 @@ +{ + "id": "f548dd78-4cf5-4eb9-9b28-e4738470d44b", + "name": "Find Duplicates", + "description": "Query to return matching instances by identifier from LDP", + "deserializeAs": "DatabaseQueryTask", + "inputVariables": [ + { + "key": "timestamp", + "type": "PROCESS" + }, + { + "key": "tenantId", + "type": "PROCESS" + } + ], + "outputVariable": { + "key": "count", + "type": "PROCESS", + "spin": false + }, + "designation": "ldp", + "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-report-report-${timestamp}.csv", + "resultType": "CSV", + "includeHeader": true, + "query": "WITH call_number_from_holdings_ext AS (SELECT instance_hrid, TRIM(TRIM(CONCAT_WS(' ', call_number_prefix, call_number, call_number_suffix))) AS call_number FROM folio_reporting.instance_ext i JOIN folio_reporting.holdings_ext h ON i.instance_id = h.instance_id WHERE call_number IS NOT NULL AND call_number <> ''), call_number_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, 'call_number' AS match_type FROM call_number_from_holdings_ext l JOIN call_number_from_holdings_ext r ON l.call_number = r.call_number AND l.instance_hrid < r.instance_hrid), oclc_from_srs_marctab AS (SELECT instance_hrid, LTRIM(REGEXP_REPLACE(content, '[^0-9]', '', 'g'), '0') AS oclc FROM public.srs_marctab WHERE field = '035' AND ord = 1 AND sf IN ('a', 'z') AND content LIKE '(OCoLC)%'), oclc_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, 'oclc' AS match_type FROM oclc_from_srs_marctab l JOIN oclc_from_srs_marctab r ON l.oclc = r.oclc AND l.oclc <> '' AND l.instance_hrid < r.instance_hrid), isbn_from_instance_identifiers AS (SELECT instance_hrid, LEFT(RIGHT(REGEXP_REPLACE(identifier, ' .*', ''), 10), 9) AS isbn FROM folio_reporting.instance_identifiers WHERE identifier_type_name = 'ISBN' AND identifier IS NOT NULL AND identifier <> ''), isbn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, 'isbn' AS match_type FROM isbn_from_instance_identifiers l JOIN isbn_from_instance_identifiers r ON l.isbn = r.isbn AND l.isbn <> '' AND l.instance_hrid < r.instance_hrid), lccn_from_instance_identifiers AS (SELECT instance_hrid, identifier AS lccn FROM folio_reporting.instance_identifiers WHERE identifier_type_name = 'LCCN' AND identifier IS NOT NULL AND identifier <> ''), lccn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, 'lccn' AS match_type FROM lccn_from_instance_identifiers l JOIN lccn_from_instance_identifiers r ON l.lccn = r.lccn AND l.instance_hrid < r.instance_hrid), issn_from_srs_marctab AS (SELECT instance_hrid::text, content AS issn FROM public.srs_marctab WHERE field = '022' AND ord = 1 AND sf = 'a' AND content IS NOT NULL AND content <> ''), issn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, 'issn' AS match_type FROM issn_from_srs_marctab l JOIN issn_from_srs_marctab r ON l.issn = r.issn AND l.instance_hrid < r.instance_hrid), all_matches AS (SELECT hrid, hrid2, match_type FROM call_number_matches UNION ALL SELECT hrid, hrid2, match_type FROM oclc_matches UNION ALL SELECT hrid, hrid2, match_type FROM isbn_matches UNION ALL SELECT hrid, hrid2, match_type FROM lccn_matches UNION ALL SELECT hrid, hrid2, match_type FROM issn_matches) SELECT hrid AS HRID, hrid2 AS HRID1, MAX(CASE WHEN match_type = 'oclc' THEN 'T' END) AS OCLC, MAX(CASE WHEN match_type = 'isbn' THEN 'T' END) AS ISBN, MAX(CASE WHEN match_type = 'lccn' THEN 'T' END) AS LCCN, MAX(CASE WHEN match_type = 'call_number' THEN 'T' END) AS CALL_NUMBER, MAX(CASE WHEN match_type = 'issn' THEN 'T' END) AS ISSN FROM all_matches GROUP BY hrid, hrid2", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/start.json b/duplicate-instance-report/nodes/start.json new file mode 100644 index 00000000..8de9c00b --- /dev/null +++ b/duplicate-instance-report/nodes/start.json @@ -0,0 +1,8 @@ +{ + "id": "bf39c55f-1fd6-41a5-a98a-c28d6d05da9d", + "name": "Start", + "description": "Start of duplicate instance report workflow", + "type": "MESSAGE_CORRELATION", + "deserializeAs": "StartEvent", + "expression": "/events/workflow/duplicate-instance-workflow/start" +} diff --git a/duplicate-instance-report/setup.json b/duplicate-instance-report/setup.json new file mode 100644 index 00000000..0967ef42 --- /dev/null +++ b/duplicate-instance-report/setup.json @@ -0,0 +1 @@ +{} diff --git a/duplicate-instance-report/triggers/startTrigger.json b/duplicate-instance-report/triggers/startTrigger.json new file mode 100644 index 00000000..c10ef6a4 --- /dev/null +++ b/duplicate-instance-report/triggers/startTrigger.json @@ -0,0 +1,9 @@ +{ + "id": "4381df47-746c-466a-8beb-75ca91bad9f0", + "name": "Duplicate Instance Report Trigger", + "description": "Start to duplcate instance report workflow via POST request (temporary)", + "type": "MESSAGE_CORRELATE", + "method": "POST", + "deserializeAs": "EventTrigger", + "pathPattern": "/events/workflow/duplicate-instance-workflow/start" +} diff --git a/duplicate-instance-report/workflow.json b/duplicate-instance-report/workflow.json new file mode 100644 index 00000000..b25c0d7f --- /dev/null +++ b/duplicate-instance-report/workflow.json @@ -0,0 +1,22 @@ +{ + "id": "93c90a8c-5b39-4499-a0bc-a24d75444a5c", + "name": "Duplicate Instance Report Workflow", + "description": "Identify and report duplicate instances by OCLC, ISBN, ISSN, LCCN, and Call Number", + "versionTag": "1.0", + "historyTimeToLive": 0, + "deploymentId": null, + "active": false, + "setup": { + "asyncBefore": false, + "asyncAfter": false + }, + "nodes": [ + "{{{mod-workflow}}}/startEvent/bf39c55f-1fd6-41a5-a98a-c28d6d05da9d", + "{{{mod-workflow}}}/databaseConnectionTask/11f065f0-f1ea-47ed-abc9-146099572e7b", + "{{{mod-workflow}}}/databaseQueryTask/f548dd78-4cf5-4eb9-9b28-e4738470d44b", + "{{{mod-workflow}}}/databaseDisconnectTask/db806bf5-49b8-4f8a-bf96-fc0258d3c31e", + "{{{mod-workflow}}}/emailTask/da165c76-b891-4d01-9fbb-f8da1b1a6d12", + "{{{mod-workflow}}}/endEvent/3c9848b3-f5a4-4753-b916-2b73c88d9409" + ], + "initialContext": {} +}