diff --git a/README.md b/README.md
index 53dca5c9..4892fa7d 100644
--- a/README.md
+++ b/README.md
@@ -796,3 +796,49 @@ Either wait for scheduled event to occur or manually execute via:
```shell
fw run evans-pres-repr
```
+
+## duplicate-instance-report
+
+### Instance Duplication Report Workflow (Scheduled)
+
+This workflow emails a CSV report for Call Number, ISBN, LCCN, ISSN, and OCLC matches as well as a full instance duplication CSV report compressed with ZIP format.
+
+The full instance duplication CSV has the following columns. The title and author columns are wrapped in double quotes.
+
+```
+HRID, HRID2, OCLC, ISBN, ISSN, CALL_NUMBER, LCCN, TITLE, TITLE2, AUTHOR, AUTHOR2
+```
+
+Requires following path `/mnt/workflows/${tenantId}/duplicate-instance-report`.
+
+
+These variables are required when building and running the workflow:
+
+| Variable Name | Allowed Values | Brief Description |
+| ------------------------------ | -------------- | ----------------- |
+| ldp-url | URL | LDP URL. |
+| ldp-user | string | LDP login username. |
+| ldp-password | string | LDP login password. |
+| duplicate-instance-report-from | e-mail address | The e-mail address of the report sender. |
+| duplicate-instance-report-to | e-mail address | The e-mail address of the report recipient. |
+
+The scheduled event is for **12:00 AM UTC**, on the first of the month, only in January, April, July, and October.
+
+```shell
+fw config set ldp-url ***
+fw config set ldp-user ***
+fw config set ldp-password ***
+fw config set duplicate-instance-report-from ***
+fw config set duplicate-instance-report-to ***
+```
+
+To build and activate:
+```shell
+fw build duplicate-instance-report
+fw activate duplicate-instance-report
+```
+
+Either wait for scheduled event to occur or manually execute via:
+```shell
+fw run duplicate-instance-report
+```
diff --git a/duplicate-instance-report/nodes/callNumberEmail.json b/duplicate-instance-report/nodes/callNumberEmail.json
new file mode 100644
index 00000000..aaf1c6d6
--- /dev/null
+++ b/duplicate-instance-report/nodes/callNumberEmail.json
@@ -0,0 +1,29 @@
+{
+ "id": "b5dca523-4a24-4d02-a122-6ea8c9f34ac4",
+ "name": "Email Call Number Matches",
+ "description": "Email CSV with instances with matching call numbers",
+ "deserializeAs": "EmailTask",
+ "inputVariables": [
+ {
+ "key": "timestamp",
+ "type": "PROCESS"
+ },
+ {
+ "key": "tenantId",
+ "type": "PROCESS"
+ },
+ {
+ "key": "callNumberCount",
+ "type": "PROCESS"
+ }
+ ],
+ "outputVariable": {},
+ "mailFrom": "{{{duplicate-instance-report-from}}}",
+ "mailTo": "{{{duplicate-instance-report-to}}}",
+ "mailText": "The instances with matching Call Number report has completed, see the results attached.\n${callNumberCount} instance matches found.",
+ "mailMarkup": "
The instances with matching Call Number report has completed, see the results attached.
${callNumberCount} instance matches found.",
+ "mailSubject": "Matching Call Number Instances Report - LDP {{{ldp-url}}}",
+ "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/call-number-${timestamp}.csv",
+ "includeAttachment": "${callNumberCount}",
+ "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/callNumberMoveToNode.json b/duplicate-instance-report/nodes/callNumberMoveToNode.json
new file mode 100644
index 00000000..e4e7de3b
--- /dev/null
+++ b/duplicate-instance-report/nodes/callNumberMoveToNode.json
@@ -0,0 +1,12 @@
+{
+ "id": "42c50baa-7d73-48ad-bd9b-53ffd6cd6eda",
+ "name": "CALL NUMBER",
+ "description": "",
+ "deserializeAs": "MoveToNode",
+ "gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83",
+ "nodes": [
+ "{{{mod-workflow}}}/databaseQueryTask/7a20c05e-2a58-42f2-9769-42e5b7045343",
+ "{{{mod-workflow}}}/emailTask/b5dca523-4a24-4d02-a122-6ea8c9f34ac4",
+ "{{{mod-workflow}}}/connectTo/53c5ca0f-9116-4bcf-86ba-ad4b4770aaaf"
+ ]
+}
diff --git a/duplicate-instance-report/nodes/callNumberQuery.json b/duplicate-instance-report/nodes/callNumberQuery.json
new file mode 100644
index 00000000..72fab78e
--- /dev/null
+++ b/duplicate-instance-report/nodes/callNumberQuery.json
@@ -0,0 +1,27 @@
+{
+ "id": "7a20c05e-2a58-42f2-9769-42e5b7045343",
+ "name": "Call Number Match Query",
+ "description": "Query to find instances with matching call number",
+ "deserializeAs": "DatabaseQueryTask",
+ "inputVariables": [
+ {
+ "key": "timestamp",
+ "type": "PROCESS"
+ },
+ {
+ "key": "tenantId",
+ "type": "PROCESS"
+ }
+ ],
+ "outputVariable": {
+ "key": "callNumberCount",
+ "type": "PROCESS",
+ "spin": false
+ },
+ "designation": "ldp",
+ "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/call-number-${timestamp}.csv",
+ "resultType": "CSV",
+ "includeHeader": true,
+ "query": "WITH call_number AS (SELECT ie.instance_hrid, he.call_number_type_id, he.call_number_type_name, he.call_number, TRIM(CONCAT_WS(' ', he.call_number_prefix, he.call_number, he.call_number_suffix)) AS full_call_number, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.holdings_ext he ON ie.instance_id = he.instance_id JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE he.call_number IS NOT NULL AND he.call_number !~ '^\\s*$' AND he.call_number_type_id IS NOT NULL AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.call_number AS call_number, r.call_number AS call_number2, l.full_call_number, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM call_number l JOIN call_number r ON l.full_call_number = r.full_call_number AND l.instance_hrid < r.instance_hrid",
+ "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/connectToLdp.json b/duplicate-instance-report/nodes/connectToLdp.json
new file mode 100644
index 00000000..1c29382b
--- /dev/null
+++ b/duplicate-instance-report/nodes/connectToLdp.json
@@ -0,0 +1,13 @@
+{
+ "id": "11f065f0-f1ea-47ed-abc9-146099572e7b",
+ "name": "Connect LDP",
+ "description": "Connect to the LDP",
+ "deserializeAs": "DatabaseConnectionTask",
+ "inputVariables": [],
+ "outputVariable": {},
+ "designation": "ldp",
+ "url": "{{{ldp-url}}}",
+ "username": "{{{ldp-user}}}",
+ "password": "{{{ldp-password}}}",
+ "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/disconnectFromLDP.json b/duplicate-instance-report/nodes/disconnectFromLDP.json
new file mode 100644
index 00000000..3487cb0f
--- /dev/null
+++ b/duplicate-instance-report/nodes/disconnectFromLDP.json
@@ -0,0 +1,10 @@
+{
+ "id": "db806bf5-49b8-4f8a-bf96-fc0258d3c31e",
+ "name": "Disconnect LDP",
+ "description": "Disconnect from the LDP",
+ "deserializeAs": "DatabaseDisconnectTask",
+ "inputVariables": [],
+ "outputVariable": {},
+ "designation": "ldp",
+ "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/end.json b/duplicate-instance-report/nodes/end.json
new file mode 100644
index 00000000..72be1aa8
--- /dev/null
+++ b/duplicate-instance-report/nodes/end.json
@@ -0,0 +1,6 @@
+{
+ "id": "3c9848b3-f5a4-4753-b916-2b73c88d9409",
+ "name": "End",
+ "description": "End of duplicate instance report workflow",
+ "deserializeAs": "EndEvent"
+}
diff --git a/duplicate-instance-report/nodes/isbnConnectTo.json b/duplicate-instance-report/nodes/isbnConnectTo.json
new file mode 100644
index 00000000..35bcda68
--- /dev/null
+++ b/duplicate-instance-report/nodes/isbnConnectTo.json
@@ -0,0 +1,7 @@
+{
+ "id": "c36f8e08-7e43-49b3-923f-ebb8629617c8",
+ "name": "ISBN Complete",
+ "description": "",
+ "deserializeAs": "ConnectTo",
+ "nodeId": "parallel_gateway_af9d6b6c_6d59_4735_9a7c_2314a68e0985"
+}
diff --git a/duplicate-instance-report/nodes/isbnEmail.json b/duplicate-instance-report/nodes/isbnEmail.json
new file mode 100644
index 00000000..055536eb
--- /dev/null
+++ b/duplicate-instance-report/nodes/isbnEmail.json
@@ -0,0 +1,29 @@
+{
+ "id": "66356870-9e8a-406b-ae3d-5fcffef0c556",
+ "name": "Email ISBN Matches",
+ "description": "Email CSV with instances with matching ISBN",
+ "deserializeAs": "EmailTask",
+ "inputVariables": [
+ {
+ "key": "timestamp",
+ "type": "PROCESS"
+ },
+ {
+ "key": "tenantId",
+ "type": "PROCESS"
+ },
+ {
+ "key": "isbnCount",
+ "type": "PROCESS"
+ }
+ ],
+ "outputVariable": {},
+ "mailFrom": "{{{duplicate-instance-report-from}}}",
+ "mailTo": "{{{duplicate-instance-report-to}}}",
+ "mailText": "The instances with matching ISBN report has completed, see the results attached.\n${isbnCount} instance matches found.",
+ "mailMarkup": "The instances with matching ISBN report has completed, see the results attached.
${isbnCount} instance matches found.",
+ "mailSubject": "Matching ISBN Instances Report - LDP {{{ldp-url}}}",
+ "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/isbn-${timestamp}.csv",
+ "includeAttachment": "${isbnCount}",
+ "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/isbnMoveToNode.json b/duplicate-instance-report/nodes/isbnMoveToNode.json
new file mode 100644
index 00000000..c57e8c8b
--- /dev/null
+++ b/duplicate-instance-report/nodes/isbnMoveToNode.json
@@ -0,0 +1,12 @@
+{
+ "id": "408a6624-8be4-4bdf-8688-75c23a730187",
+ "name": "ISBN",
+ "description": "",
+ "deserializeAs": "MoveToNode",
+ "gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83",
+ "nodes": [
+ "{{{mod-workflow}}}/databaseQueryTask/84b7be20-ce1f-45f4-ad2e-7dff0c131e42",
+ "{{{mod-workflow}}}/emailTask/66356870-9e8a-406b-ae3d-5fcffef0c556",
+ "{{{mod-workflow}}}/connectTo/c36f8e08-7e43-49b3-923f-ebb8629617c8"
+ ]
+}
diff --git a/duplicate-instance-report/nodes/isbnQuery.json b/duplicate-instance-report/nodes/isbnQuery.json
new file mode 100644
index 00000000..d680f787
--- /dev/null
+++ b/duplicate-instance-report/nodes/isbnQuery.json
@@ -0,0 +1,27 @@
+{
+ "id": "84b7be20-ce1f-45f4-ad2e-7dff0c131e42",
+ "name": "ISBN Match Query",
+ "description": "Query to find instances with matching ISBN",
+ "deserializeAs": "DatabaseQueryTask",
+ "inputVariables": [
+ {
+ "key": "timestamp",
+ "type": "PROCESS"
+ },
+ {
+ "key": "tenantId",
+ "type": "PROCESS"
+ }
+ ],
+ "outputVariable": {
+ "key": "isbnCount",
+ "type": "PROCESS",
+ "spin": false
+ },
+ "designation": "ldp",
+ "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/isbn-${timestamp}.csv",
+ "resultType": "CSV",
+ "includeHeader": true,
+ "query": "WITH isbn AS (SELECT ie.instance_hrid, ii.identifier, NULLIF(LEFT(RIGHT(REGEXP_REPLACE(ii.identifier, ' .*', ''), 10), 9), ':') AS isbn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'ISBN' AND ii.identifier NOT SIMILAR TO '(:|$)%' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.identifier, l.isbn, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM isbn l JOIN isbn r ON l.isbn = r.isbn AND l.instance_hrid < r.instance_hrid",
+ "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/issnConnectTo.json b/duplicate-instance-report/nodes/issnConnectTo.json
new file mode 100644
index 00000000..f9bde9ec
--- /dev/null
+++ b/duplicate-instance-report/nodes/issnConnectTo.json
@@ -0,0 +1,7 @@
+{
+ "id": "01c05fa9-6897-4e1c-b0bc-c56b187173e2",
+ "name": "ISSN Complete",
+ "description": "",
+ "deserializeAs": "ConnectTo",
+ "nodeId": "parallel_gateway_af9d6b6c_6d59_4735_9a7c_2314a68e0985"
+}
diff --git a/duplicate-instance-report/nodes/issnEmail.json b/duplicate-instance-report/nodes/issnEmail.json
new file mode 100644
index 00000000..bcbb29b9
--- /dev/null
+++ b/duplicate-instance-report/nodes/issnEmail.json
@@ -0,0 +1,29 @@
+{
+ "id": "f2cd0c7a-0dbe-4daa-b1ea-29455044a505",
+ "name": "Email ISSN Matches",
+ "description": "Email CSV with instances with matching ISSN",
+ "deserializeAs": "EmailTask",
+ "inputVariables": [
+ {
+ "key": "timestamp",
+ "type": "PROCESS"
+ },
+ {
+ "key": "tenantId",
+ "type": "PROCESS"
+ },
+ {
+ "key": "issnCount",
+ "type": "PROCESS"
+ }
+ ],
+ "outputVariable": {},
+ "mailFrom": "{{{duplicate-instance-report-from}}}",
+ "mailTo": "{{{duplicate-instance-report-to}}}",
+ "mailText": "The instances with matching ISSN report has completed, see the results attached.\n${issnCount} instance matches found.",
+ "mailMarkup": "The instances with matching ISSN report has completed, see the results attached.
${issnCount} instance matches found.",
+ "mailSubject": "Matching ISSN Instances Report - LDP {{{ldp-url}}}",
+ "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/issn-${timestamp}.csv",
+ "includeAttachment": "${issnCount}",
+ "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/issnMoveToNode.json b/duplicate-instance-report/nodes/issnMoveToNode.json
new file mode 100644
index 00000000..55c60d8e
--- /dev/null
+++ b/duplicate-instance-report/nodes/issnMoveToNode.json
@@ -0,0 +1,12 @@
+{
+ "id": "3b2668b9-58aa-447e-b907-40cee663a3ea",
+ "name": "ISSN",
+ "description": "",
+ "deserializeAs": "MoveToNode",
+ "gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83",
+ "nodes": [
+ "{{{mod-workflow}}}/databaseQueryTask/403c8b97-2901-494a-bcdd-bfbbe23a1aa8",
+ "{{{mod-workflow}}}/emailTask/f2cd0c7a-0dbe-4daa-b1ea-29455044a505",
+ "{{{mod-workflow}}}/connectTo/01c05fa9-6897-4e1c-b0bc-c56b187173e2"
+ ]
+}
diff --git a/duplicate-instance-report/nodes/issnQuery.json b/duplicate-instance-report/nodes/issnQuery.json
new file mode 100644
index 00000000..19c3849b
--- /dev/null
+++ b/duplicate-instance-report/nodes/issnQuery.json
@@ -0,0 +1,27 @@
+{
+ "id": "403c8b97-2901-494a-bcdd-bfbbe23a1aa8",
+ "name": "ISSN Match Query",
+ "description": "Query to find instances with matching ISSN",
+ "deserializeAs": "DatabaseQueryTask",
+ "inputVariables": [
+ {
+ "key": "timestamp",
+ "type": "PROCESS"
+ },
+ {
+ "key": "tenantId",
+ "type": "PROCESS"
+ }
+ ],
+ "outputVariable": {
+ "key": "issnCount",
+ "type": "PROCESS",
+ "spin": false
+ },
+ "designation": "ldp",
+ "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/issn-${timestamp}.csv",
+ "resultType": "CSV",
+ "includeHeader": true,
+ "query": "WITH issn_with_title AS (SELECT ie.instance_hrid, sm.content AS issn, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE field = '022' AND ord = 1 AND sf = 'a'), issn AS (SELECT issnwt.instance_hrid, issnwt.issn, issnwt.title, sm.content AS author FROM issn_with_title issnwt JOIN public.srs_marctab sm ON issnwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.issn, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM issn l JOIN issn r ON l.issn = r.issn AND l.instance_hrid < r.instance_hrid",
+ "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/join.json b/duplicate-instance-report/nodes/join.json
new file mode 100644
index 00000000..e9fe5076
--- /dev/null
+++ b/duplicate-instance-report/nodes/join.json
@@ -0,0 +1,7 @@
+{
+ "id": "af9d6b6c-6d59-4735-9a7c-2314a68e0985",
+ "name": "Join",
+ "description": "",
+ "deserializeAs": "ParallelGateway",
+ "nodes": []
+}
diff --git a/duplicate-instance-report/nodes/lccnConnectTo.json b/duplicate-instance-report/nodes/lccnConnectTo.json
new file mode 100644
index 00000000..8af4d1ce
--- /dev/null
+++ b/duplicate-instance-report/nodes/lccnConnectTo.json
@@ -0,0 +1,7 @@
+{
+ "id": "35bb9b1d-ffa5-4d20-b3c9-afd71ff3990e",
+ "name": "LCCN Complete",
+ "description": "",
+ "deserializeAs": "ConnectTo",
+ "nodeId": "parallel_gateway_af9d6b6c_6d59_4735_9a7c_2314a68e0985"
+}
diff --git a/duplicate-instance-report/nodes/lccnEmail.json b/duplicate-instance-report/nodes/lccnEmail.json
new file mode 100644
index 00000000..2fbc66f4
--- /dev/null
+++ b/duplicate-instance-report/nodes/lccnEmail.json
@@ -0,0 +1,29 @@
+{
+ "id": "a32eae62-d2f3-4b7b-bbd4-275ba140555e",
+ "name": "Email LCCN Matches",
+ "description": "Email CSV with instances with matching LCCN",
+ "deserializeAs": "EmailTask",
+ "inputVariables": [
+ {
+ "key": "timestamp",
+ "type": "PROCESS"
+ },
+ {
+ "key": "tenantId",
+ "type": "PROCESS"
+ },
+ {
+ "key": "lccnCount",
+ "type": "PROCESS"
+ }
+ ],
+ "outputVariable": {},
+ "mailFrom": "{{{duplicate-instance-report-from}}}",
+ "mailTo": "{{{duplicate-instance-report-to}}}",
+ "mailText": "The instances with matching LCCN report has completed, see the results attached.\n${lccnCount} instance matches found.",
+ "mailMarkup": "The instances with matching LCCN report has completed, see the results attached.
${lccnCount} instance matches found.",
+ "mailSubject": "Matching LCCN Instances Report - LDP {{{ldp-url}}}",
+ "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/lccn-${timestamp}.csv",
+ "includeAttachment": "${lccnCount}",
+ "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/lccnMoveToNode.json b/duplicate-instance-report/nodes/lccnMoveToNode.json
new file mode 100644
index 00000000..cae67f9a
--- /dev/null
+++ b/duplicate-instance-report/nodes/lccnMoveToNode.json
@@ -0,0 +1,12 @@
+{
+ "id": "b4e72925-19a4-47ed-8f31-e32cf8905123",
+ "name": "LCCN",
+ "description": "",
+ "deserializeAs": "MoveToNode",
+ "gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83",
+ "nodes": [
+ "{{{mod-workflow}}}/databaseQueryTask/9d943d76-ab21-4ca6-9eae-8df6e962c037",
+ "{{{mod-workflow}}}/emailTask/a32eae62-d2f3-4b7b-bbd4-275ba140555e",
+ "{{{mod-workflow}}}/connectTo/35bb9b1d-ffa5-4d20-b3c9-afd71ff3990e"
+ ]
+}
diff --git a/duplicate-instance-report/nodes/lccnQuery.json b/duplicate-instance-report/nodes/lccnQuery.json
new file mode 100644
index 00000000..3aacdf68
--- /dev/null
+++ b/duplicate-instance-report/nodes/lccnQuery.json
@@ -0,0 +1,27 @@
+{
+ "id": "9d943d76-ab21-4ca6-9eae-8df6e962c037",
+ "name": "LCCN Match Query",
+ "description": "Query to find instances with matching LCCN",
+ "deserializeAs": "DatabaseQueryTask",
+ "inputVariables": [
+ {
+ "key": "timestamp",
+ "type": "PROCESS"
+ },
+ {
+ "key": "tenantId",
+ "type": "PROCESS"
+ }
+ ],
+ "outputVariable": {
+ "key": "lccnCount",
+ "type": "PROCESS",
+ "spin": false
+ },
+ "designation": "ldp",
+ "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/lccn-${timestamp}.csv",
+ "resultType": "CSV",
+ "includeHeader": true,
+ "query": "WITH lccn AS (SELECT ie.instance_hrid, ii.identifier AS lccn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'LCCN' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.lccn, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM lccn l JOIN lccn r ON l.lccn = r.lccn AND l.instance_hrid < r.instance_hrid",
+ "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/oclcConnectTo.json b/duplicate-instance-report/nodes/oclcConnectTo.json
new file mode 100644
index 00000000..17354509
--- /dev/null
+++ b/duplicate-instance-report/nodes/oclcConnectTo.json
@@ -0,0 +1,7 @@
+{
+ "id": "53c5ca0f-9116-4bcf-86ba-ad4b4770aaaf",
+ "name": "OCLC Complete",
+ "description": "",
+ "deserializeAs": "ConnectTo",
+ "nodeId": "parallel_gateway_af9d6b6c_6d59_4735_9a7c_2314a68e0985"
+}
diff --git a/duplicate-instance-report/nodes/oclcEmail.json b/duplicate-instance-report/nodes/oclcEmail.json
new file mode 100644
index 00000000..57c4fbaf
--- /dev/null
+++ b/duplicate-instance-report/nodes/oclcEmail.json
@@ -0,0 +1,29 @@
+{
+ "id": "7bc30e09-6b69-40da-9a53-34e75fdcc488",
+ "name": "Email OCLC Matches",
+ "description": "Email CSV with instances with matching OCLC",
+ "deserializeAs": "EmailTask",
+ "inputVariables": [
+ {
+ "key": "timestamp",
+ "type": "PROCESS"
+ },
+ {
+ "key": "tenantId",
+ "type": "PROCESS"
+ },
+ {
+ "key": "oclcCount",
+ "type": "PROCESS"
+ }
+ ],
+ "outputVariable": {},
+ "mailFrom": "{{{duplicate-instance-report-from}}}",
+ "mailTo": "{{{duplicate-instance-report-to}}}",
+ "mailText": "The instances with matching OCLC report has completed, see the results attached.\n${oclcCount} instance matches found.",
+ "mailMarkup": "The instances with matching OCLC report has completed, see the results attached.
${oclcCount} instance matches found.",
+ "mailSubject": "Matching OCLC Instances Report - LDP {{{ldp-url}}}",
+ "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/oclc-${timestamp}.csv",
+ "includeAttachment": "${oclcCount}",
+ "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/oclcFork.json b/duplicate-instance-report/nodes/oclcFork.json
new file mode 100644
index 00000000..1953ac29
--- /dev/null
+++ b/duplicate-instance-report/nodes/oclcFork.json
@@ -0,0 +1,11 @@
+{
+ "id": "aea23b81-06b1-4795-8bab-ea562a854c83",
+ "name": "Fork",
+ "description": "",
+ "deserializeAs": "ParallelGateway",
+ "nodes": [
+ "{{{mod-workflow}}}/databaseQueryTask/724e34bf-c539-411e-bb62-cd15da9ff515",
+ "{{{mod-workflow}}}/emailTask/7bc30e09-6b69-40da-9a53-34e75fdcc488",
+ "{{{mod-workflow}}}/parallelGateway/af9d6b6c-6d59-4735-9a7c-2314a68e0985"
+ ]
+}
diff --git a/duplicate-instance-report/nodes/oclcQuery.json b/duplicate-instance-report/nodes/oclcQuery.json
new file mode 100644
index 00000000..5fc89e61
--- /dev/null
+++ b/duplicate-instance-report/nodes/oclcQuery.json
@@ -0,0 +1,27 @@
+{
+ "id": "724e34bf-c539-411e-bb62-cd15da9ff515",
+ "name": "OCLC Match Query",
+ "description": "Query to find instances with matching OCLC",
+ "deserializeAs": "DatabaseQueryTask",
+ "inputVariables": [
+ {
+ "key": "timestamp",
+ "type": "PROCESS"
+ },
+ {
+ "key": "tenantId",
+ "type": "PROCESS"
+ }
+ ],
+ "outputVariable": {
+ "key": "oclcCount",
+ "type": "PROCESS",
+ "spin": false
+ },
+ "designation": "ldp",
+ "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/oclc-${timestamp}.csv",
+ "resultType": "CSV",
+ "includeHeader": true,
+ "query": "WITH oclc_with_title AS (SELECT ie.instance_hrid, LTRIM(REGEXP_REPLACE(SUBSTRING(sm.content FROM 8), '[^0-9]', '', 'g'), '0') AS oclc, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE sm.field = '035' AND sm.ord = 1 AND sm.sf IN ('a', 'z') AND sm.content LIKE '(OCoLC)%'), oclc AS (SELECT oclcwt.instance_hrid, oclcwt.oclc, oclcwt.title, sm.content AS author FROM oclc_with_title oclcwt JOIN public.srs_marctab sm ON oclcwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.oclc, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM oclc l JOIN oclc r ON l.oclc = r.oclc AND l.instance_hrid < r.instance_hrid",
+ "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/reportEmail.json b/duplicate-instance-report/nodes/reportEmail.json
new file mode 100644
index 00000000..01188e98
--- /dev/null
+++ b/duplicate-instance-report/nodes/reportEmail.json
@@ -0,0 +1,29 @@
+{
+ "id": "da165c76-b891-4d01-9fbb-f8da1b1a6d12",
+ "name": "Email Instance Duplications Report",
+ "description": "Email CSV with duplicate instances hrids and match criterium",
+ "deserializeAs": "EmailTask",
+ "inputVariables": [
+ {
+ "key": "timestamp",
+ "type": "PROCESS"
+ },
+ {
+ "key": "tenantId",
+ "type": "PROCESS"
+ },
+ {
+ "key": "count",
+ "type": "PROCESS"
+ }
+ ],
+ "outputVariable": {},
+ "mailFrom": "{{{duplicate-instance-report-from}}}",
+ "mailTo": "{{{duplicate-instance-report-to}}}",
+ "mailText": "The Instance Duplication Report has completed, see the results attached.\n${count} instance matches found.",
+ "mailMarkup": "The Instance Duplication Report has completed, see the results attached.
${count} instance matches found.",
+ "mailSubject": "Instance Duplication Report - LDP {{{ldp-url}}}",
+ "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-duplication-report-${timestamp}.zip",
+ "includeAttachment": "${count}",
+ "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/reportQuery.json b/duplicate-instance-report/nodes/reportQuery.json
new file mode 100644
index 00000000..a92c06d5
--- /dev/null
+++ b/duplicate-instance-report/nodes/reportQuery.json
@@ -0,0 +1,27 @@
+{
+ "id": "f548dd78-4cf5-4eb9-9b28-e4738470d44b",
+ "name": "Find Instance Duplications",
+ "description": "Query to return matching instances by identifier from LDP",
+ "deserializeAs": "DatabaseQueryTask",
+ "inputVariables": [
+ {
+ "key": "timestamp",
+ "type": "PROCESS"
+ },
+ {
+ "key": "tenantId",
+ "type": "PROCESS"
+ }
+ ],
+ "outputVariable": {
+ "key": "count",
+ "type": "PROCESS",
+ "spin": false
+ },
+ "designation": "ldp",
+ "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-duplication-report-${timestamp}.csv",
+ "resultType": "CSV",
+ "includeHeader": true,
+ "query": "WITH oclc_with_title AS (SELECT ie.instance_hrid, LTRIM(REGEXP_REPLACE(SUBSTRING(sm.content FROM 8), '[^0-9]', '', 'g'), '0') AS oclc, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE sm.field = '035' AND sm.ord = 1 AND sm.sf IN ('a', 'z') AND sm.content LIKE '(OCoLC)%'), oclc AS (SELECT oclcwt.instance_hrid, oclcwt.oclc, oclcwt.title, sm.content AS author FROM oclc_with_title oclcwt JOIN public.srs_marctab sm ON oclcwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), oclc_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'oclc' AS match_type FROM oclc l JOIN oclc r ON l.oclc = r.oclc AND l.instance_hrid < r.instance_hrid), isbn AS (SELECT ie.instance_hrid, NULLIF(LEFT(RIGHT(REGEXP_REPLACE(ii.identifier, ' .*', ''), 10), 9), ':') AS isbn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'ISBN' AND ii.identifier NOT SIMILAR TO '(:|$)%' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), isbn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'isbn' AS match_type FROM isbn l JOIN isbn r ON l.isbn = r.isbn AND l.instance_hrid < r.instance_hrid), lccn AS (SELECT ie.instance_hrid, ii.identifier AS lccn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'LCCN' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), lccn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'lccn' AS match_type FROM lccn l JOIN lccn r ON l.lccn = r.lccn AND l.instance_hrid < r.instance_hrid), issn_with_title AS (SELECT ie.instance_hrid, sm.content AS issn, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE field = '022' AND ord = 1 AND sf = 'a'), issn AS (SELECT issnwt.instance_hrid, issnwt.issn, issnwt.title, sm.content AS author FROM issn_with_title issnwt JOIN public.srs_marctab sm ON issnwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), issn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'issn' AS match_type FROM issn l JOIN issn r ON l.issn = r.issn AND l.instance_hrid < r.instance_hrid), all_matches AS (SELECT hrid, hrid2, title, title2, author, author2, match_type FROM oclc_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM isbn_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM lccn_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM issn_matches) SELECT hrid AS HRID, hrid2 AS HRID2, MAX(CASE WHEN match_type = 'oclc' THEN 'T' END) AS OCLC, MAX(CASE WHEN match_type = 'isbn' THEN 'T' END) AS ISBN, MAX(CASE WHEN match_type = 'issn' THEN 'T' END) AS ISSN, MAX(CASE WHEN match_type = 'lccn' THEN 'T' END) AS LCCN, '\"' || REPLACE(title, '\"', '\"\"') || '\"' AS TITLE, '\"' || REPLACE(title2, '\"', '\"\"') || '\"' AS TITLE2, '\"' || REPLACE(author, '\"', '\"\"') || '\"' AS AUTHOR, '\"' || REPLACE(author2, '\"', '\"\"') || '\"' AS AUTHOR2 FROM all_matches GROUP BY hrid, hrid2, title, title2, author, author2",
+ "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/reportZip.json b/duplicate-instance-report/nodes/reportZip.json
new file mode 100644
index 00000000..b4c51c69
--- /dev/null
+++ b/duplicate-instance-report/nodes/reportZip.json
@@ -0,0 +1,22 @@
+{
+ "id": "e14e6cfc-b069-4f2f-8903-d94714fd2574",
+ "name": "Compress Instance Duplications Report",
+ "description": "Compress instance duplications report as ZIP format",
+ "deserializeAs": "CompressFileTask",
+ "inputVariables": [
+ {
+ "key": "timestamp",
+ "type": "PROCESS"
+ },
+ {
+ "key": "tenantId",
+ "type": "PROCESS"
+ }
+ ],
+ "outputVariable": {},
+ "source": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-duplication-report-${timestamp}.csv",
+ "destination": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-duplication-report-${timestamp}.zip",
+ "format": "ZIP",
+ "container": "NONE",
+ "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/start.json b/duplicate-instance-report/nodes/start.json
new file mode 100644
index 00000000..884e9da4
--- /dev/null
+++ b/duplicate-instance-report/nodes/start.json
@@ -0,0 +1,8 @@
+{
+ "id": "bf39c55f-1fd6-41a5-a98a-c28d6d05da9d",
+ "name": "Start",
+ "description": "Start of instance duplication report workflow",
+ "type": "SCHEDULED",
+ "deserializeAs": "StartEvent",
+ "expression": "0 0 0 1 1,4,7,10 ?"
+}
diff --git a/duplicate-instance-report/setup.json b/duplicate-instance-report/setup.json
new file mode 100644
index 00000000..0967ef42
--- /dev/null
+++ b/duplicate-instance-report/setup.json
@@ -0,0 +1 @@
+{}
diff --git a/duplicate-instance-report/workflow.json b/duplicate-instance-report/workflow.json
new file mode 100644
index 00000000..1ca45016
--- /dev/null
+++ b/duplicate-instance-report/workflow.json
@@ -0,0 +1,27 @@
+{
+ "id": "93c90a8c-5b39-4499-a0bc-a24d75444a5c",
+ "name": "Duplicate Instance Report Workflow",
+ "description": "Identify and report instances with matching OCLC, ISBN, ISSN, LCCN, or Call Number",
+ "versionTag": "1.0",
+ "historyTimeToLive": 0,
+ "deploymentId": null,
+ "active": false,
+ "setup": {
+ "asyncBefore": false,
+ "asyncAfter": false
+ },
+ "nodes": [
+ "{{{mod-workflow}}}/startEvent/bf39c55f-1fd6-41a5-a98a-c28d6d05da9d",
+ "{{{mod-workflow}}}/databaseConnectionTask/11f065f0-f1ea-47ed-abc9-146099572e7b",
+ "{{{mod-workflow}}}/parallelGateway/aea23b81-06b1-4795-8bab-ea562a854c83",
+ "{{{mod-workflow}}}/moveToNode/408a6624-8be4-4bdf-8688-75c23a730187",
+ "{{{mod-workflow}}}/moveToNode/3b2668b9-58aa-447e-b907-40cee663a3ea",
+ "{{{mod-workflow}}}/moveToNode/b4e72925-19a4-47ed-8f31-e32cf8905123",
+ "{{{mod-workflow}}}/databaseQueryTask/f548dd78-4cf5-4eb9-9b28-e4738470d44b",
+ "{{{mod-workflow}}}/compressFileTask/e14e6cfc-b069-4f2f-8903-d94714fd2574",
+ "{{{mod-workflow}}}/emailTask/da165c76-b891-4d01-9fbb-f8da1b1a6d12",
+ "{{{mod-workflow}}}/databaseDisconnectTask/db806bf5-49b8-4f8a-bf96-fc0258d3c31e",
+ "{{{mod-workflow}}}/endEvent/3c9848b3-f5a4-4753-b916-2b73c88d9409"
+ ],
+ "initialContext": {}
+}