From 547993337d60aa0e9d5d47fb3a242ec2128226f8 Mon Sep 17 00:00:00 2001 From: Kalin Chernev Date: Tue, 7 May 2019 16:14:32 +0300 Subject: [PATCH 1/3] feat(etls): add interreg 2014UK16RFOP002 - EUBFR-258 --- config.example.json | 1 + docs/types/README.md | 1 + docs/types/etls/2014tc16i5cb005-csv.md | 7 + docs/types/etls/2014uk16rfop002-xls.md | 136 ++++++++++ scripts/documentation/docs-md.js | 1 + .../etl/2014uk16rfop002/xls/README.md | 16 ++ .../etl/2014uk16rfop002/xls/babel.config.js | 29 +++ .../etl/2014uk16rfop002/xls/package.json | 32 +++ .../etl/2014uk16rfop002/xls/serverless.yml | 123 +++++++++ .../xls/src/events/onParseXLS.js | 122 +++++++++ .../xls/src/lib/improveObjectKeys.js | 16 ++ .../2014uk16rfop002/xls/src/lib/transform.js | 235 ++++++++++++++++++ .../xls/test/stubs/record.json | 12 + .../xls/test/unit/events/onParseXLS.spec.js | 20 ++ .../lib/__snapshots__/transform.spec.js.snap | 94 +++++++ .../xls/test/unit/lib/transform.spec.js | 22 ++ .../etl/2014uk16rfop002/xls/webpack.config.js | 32 +++ tools/eubfr-cli/lib/getServices.js | 1 + 18 files changed, 900 insertions(+) create mode 100644 docs/types/etls/2014uk16rfop002-xls.md create mode 100644 services/ingestion/etl/2014uk16rfop002/xls/README.md create mode 100644 services/ingestion/etl/2014uk16rfop002/xls/babel.config.js create mode 100644 services/ingestion/etl/2014uk16rfop002/xls/package.json create mode 100644 services/ingestion/etl/2014uk16rfop002/xls/serverless.yml create mode 100644 services/ingestion/etl/2014uk16rfop002/xls/src/events/onParseXLS.js create mode 100644 services/ingestion/etl/2014uk16rfop002/xls/src/lib/improveObjectKeys.js create mode 100644 services/ingestion/etl/2014uk16rfop002/xls/src/lib/transform.js create mode 100644 services/ingestion/etl/2014uk16rfop002/xls/test/stubs/record.json create mode 100644 services/ingestion/etl/2014uk16rfop002/xls/test/unit/events/onParseXLS.spec.js create mode 100644 services/ingestion/etl/2014uk16rfop002/xls/test/unit/lib/__snapshots__/transform.spec.js.snap create mode 100644 services/ingestion/etl/2014uk16rfop002/xls/test/unit/lib/transform.spec.js create mode 100644 services/ingestion/etl/2014uk16rfop002/xls/webpack.config.js diff --git a/config.example.json b/config.example.json index 33b71c089..7b5cdf7f5 100644 --- a/config.example.json +++ b/config.example.json @@ -11,6 +11,7 @@ "2014tc16rfcb047", "2014tc16rfpc001", "2014tc16rftn002", + "2014uk16rfop002", "bulgaria", "cordis", "devco", diff --git a/docs/types/README.md b/docs/types/README.md index 7a5334c0a..1a495415d 100644 --- a/docs/types/README.md +++ b/docs/types/README.md @@ -17,6 +17,7 @@ Here's a list of the transformations made in ETLs around the `Project` model. - [2014tc16rfcb047 - XLS](./etls/2014tc16rfcb047-xls.md) - [2014tc16rfpc001 - XLS](./etls/2014tc16rfpc001-xls.md) - [2014tc16rftn002 - XLS](./etls/2014tc16rftn002-xls.md) +- [2014uk16rfop002 - XLS](./etls/2014uk16rfop002-xls.md) - [bulgaria - XLS](./etls/bulgaria-xls.md) - [CORDIS - CSV](./etls/cordis-csv.md) - [DEVCO - XLS](./etls/devco-xls.md) diff --git a/docs/types/etls/2014tc16i5cb005-csv.md b/docs/types/etls/2014tc16i5cb005-csv.md index 9ef97d5ae..454d87792 100644 --- a/docs/types/etls/2014tc16i5cb005-csv.md +++ b/docs/types/etls/2014tc16i5cb005-csv.md @@ -146,6 +146,13 @@ Input fields taken from the `record` are: Returns **[String][4]** +## numeral + +Numeral understands percentages: + +- "500665.00%" => 5006.650000000001 +- "85.00%" => 0.85 + [1]: https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014tc16i5cb005/csv/test/stubs/record.json [2]: https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014tc16i5cb005/csv/src/lib/transform.js [3]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object diff --git a/docs/types/etls/2014uk16rfop002-xls.md b/docs/types/etls/2014uk16rfop002-xls.md new file mode 100644 index 000000000..6400e8729 --- /dev/null +++ b/docs/types/etls/2014uk16rfop002-xls.md @@ -0,0 +1,136 @@ + + +## 2014uk16rfop002XlsTransform + +Map fields for 2014uk16rfop002 producer, XLS file types + +Example input data: [stub][1] + +Transform function: [implementation details][2] + +### Parameters + +- `record` **[Object][3]** Piece of data to transform before going to harmonized storage. + +Returns **Project** JSON matching the type fields. + +### getBudget + +Preprocess `budget`. + +Input fields taken from the `record` are: + +- `EU (£) (30%)` +- `EU (£)` +- `Total (£)` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **Budget** + +### getDescription + +Preprocess `description`. + +Input fields taken from the `record` are: + +- `Project No.` +- `GOG (£)` +- `PS (£)` +- `PS (£) (70%)` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getProjectId + +Preprocess `project_id`. + +Input fields taken from the `record` are: + +- `Project Name` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +### getLocations + +Preprocess `project_locations`. + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<[Location][6]>** + +### getThemes + +Preprocess `themes`. + +Input fields taken from the `record` are: + +- `Activity` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<[String][4]>** + +### getThirdParties + +Preprocess `third_parties`. + +Input fields taken from the `record` are: + +- `Sponsor` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[Array][5]<ThirdParty>** + +### getTimeframe + +Preprocess `timeframe`. + +Input fields taken from the `record` are: + +- `Start Date` +- `End Date` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **Timeframe** + +### getTitle + +Preprocess `title`. + +Input fields taken from the `record` are: + +- `Project Name` + +#### Parameters + +- `record` **[Object][3]** The row received from parsed file + +Returns **[String][4]** + +[1]: https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop002/xls/test/stubs/record.json +[2]: https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop002/xls/src/lib/transform.js +[3]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object +[4]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String +[5]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array +[6]: https://developer.mozilla.org/docs/Web/API/Location diff --git a/scripts/documentation/docs-md.js b/scripts/documentation/docs-md.js index b61dec1a7..895c71d11 100755 --- a/scripts/documentation/docs-md.js +++ b/scripts/documentation/docs-md.js @@ -19,6 +19,7 @@ const transforms = [ '2014tc16rfcb047-xls', '2014tc16rfpc001-xls', '2014tc16rftn002-xls', + '2014uk16rfop002-xls', 'bulgaria-xls', 'cordis-csv', 'devco-xls', diff --git a/services/ingestion/etl/2014uk16rfop002/xls/README.md b/services/ingestion/etl/2014uk16rfop002/xls/README.md new file mode 100644 index 000000000..534367bf0 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop002/xls/README.md @@ -0,0 +1,16 @@ +# 2014uk16rfop002 XLS ETL mapping rules + +Model to compare with is available at: https://ec-europa.github.io/eubfr-data-lake/ + +| Field | Target | +| -------------------- | ----------------- | +| Project No. | description | +| Project Name | title | +| Sponsor | third_parties | +| EU (£), EU (£) (30%) | budget.eu_contrib | +| GOG (£) | description | +| PS (£), PS (£) (70%) | description | +| Total (£) | budget.total_cost | +| Activity | themes | +| Start Date | timeframe.from | +| End Date | timeframe.to | diff --git a/services/ingestion/etl/2014uk16rfop002/xls/babel.config.js b/services/ingestion/etl/2014uk16rfop002/xls/babel.config.js new file mode 100644 index 000000000..0397ff2b1 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop002/xls/babel.config.js @@ -0,0 +1,29 @@ +module.exports = { + presets: [ + '@babel/preset-flow', + [ + '@babel/preset-env', + { + targets: { + node: '8.10', + }, + modules: false, + loose: true, + }, + ], + ], + env: { + test: { + presets: [ + [ + '@babel/preset-env', + { + targets: { + node: '8.10', + }, + }, + ], + ], + }, + }, +}; diff --git a/services/ingestion/etl/2014uk16rfop002/xls/package.json b/services/ingestion/etl/2014uk16rfop002/xls/package.json new file mode 100644 index 000000000..e0bc7eb1e --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop002/xls/package.json @@ -0,0 +1,32 @@ +{ + "private": true, + "name": "@eubfr/ingestion-etl-2014uk16rfop002-xls", + "version": "0.6.0", + "scripts": { + "deploy": "sls deploy -v", + "test:unit": "jest --testPathPattern=unit" + }, + "dependencies": { + "@eubfr/lib": "^0.6.0", + "@eubfr/logger-messenger": "^0.6.0", + "xlsx": "0.14.2" + }, + "devDependencies": { + "@babel/core": "7.4.3", + "@babel/preset-env": "7.4.3", + "@babel/preset-flow": "7.0.0", + "@eubfr/types": "^0.6.0", + "aws-sdk": "2.434.0", + "babel-jest": "24.7.0", + "babel-loader": "8.0.5", + "jest": "24.7.0", + "serverless": "1.40.0", + "serverless-webpack": "5.2.0", + "webpack": "4.29.6" + }, + "jest": { + "transform": { + "^.+\\.js$": "babel-jest" + } + } +} diff --git a/services/ingestion/etl/2014uk16rfop002/xls/serverless.yml b/services/ingestion/etl/2014uk16rfop002/xls/serverless.yml new file mode 100644 index 000000000..e18390b2f --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop002/xls/serverless.yml @@ -0,0 +1,123 @@ +service: ingestion-etl-2014uk16rfop002-xls + +plugins: + - serverless-webpack + +custom: + webpack: + webpackConfig: ./webpack.config.js + includeModules: + forceExclude: + - aws-sdk + packager: yarn + eubfrEnvironment: ${opt:eubfr_env, file(../../../../../config.json):eubfr_env, env:EUBFR_ENV, 'dev'} + bucketName: ${file(../../../../../resources/harmonized-storage/serverless.yml):custom.bucketName} + +package: + individually: true + +provider: + name: aws + runtime: nodejs8.10 + timeout: 60 + stage: ${opt:stage, file(../../../../../config.json):stage, env:EUBFR_STAGE, 'dev'} + region: ${opt:region, file(../../../../../config.json):region, env:EUBFR_AWS_REGION, 'eu-central-1'} + deploymentBucket: + name: eubfr-${self:custom.eubfrEnvironment}-deploy + stackTags: + ENV: ${self:custom.eubfrEnvironment} + iamRoleStatements: + - Effect: 'Allow' + Action: + - 's3:PutObject' + Resource: + Fn::Join: + - '' + - - 'arn:aws:s3:::' + - ${self:custom.bucketName} + - '/*' + # Allow queueing messages to the DLQ https://docs.aws.amazon.com/lambda/latest/dg/dlq.html + - Effect: 'Allow' + Action: + - sqs:SendMessage + Resource: '*' + +functions: + parseXls: + handler: src/events/onParseXLS.handler + name: ${self:provider.stage}-${self:service}-parseXls + memorySize: 1024 + environment: + BUCKET: ${self:custom.bucketName} + REGION: ${self:provider.region} + STAGE: ${self:provider.stage} + events: + - sns: + arn: + Fn::Join: + - '' + - - 'arn:aws:sns:' + - Ref: 'AWS::Region' + - ':' + - Ref: 'AWS::AccountId' + - ':${self:provider.stage}-etl-2014uk16rfop002-xls' + topicName: ${self:provider.stage}-etl-2014uk16rfop002-xls + - sns: + arn: + Fn::Join: + - '' + - - 'arn:aws:sns:' + - Ref: 'AWS::Region' + - ':' + - Ref: 'AWS::AccountId' + - ':${self:provider.stage}-etl-2014uk16rfop002-xlsx' + topicName: ${self:provider.stage}-etl-2014uk16rfop002-xlsx + +resources: + Resources: + ParseXlsLambdaFunction: + Type: 'AWS::Lambda::Function' + Properties: + DeadLetterConfig: + TargetArn: + Fn::ImportValue: ${self:provider.stage}:ingestion-dead-letter-queue:LambdaFailureQueue + SNSTopic2014uk16rfop002XLS: + Type: AWS::SNS::Topic + Properties: + TopicName: ${self:provider.stage}-etl-2014uk16rfop002-xls + DisplayName: 2014uk16rfop002 XLS ETL + SNSTopic2014uk16rfop002XLSX: + Type: AWS::SNS::Topic + Properties: + TopicName: ${self:provider.stage}-etl-2014uk16rfop002-xlsx + DisplayName: 2014uk16rfop002 XLSX ETL + SNSTopic2014uk16rfop002XLSPolicy: + Type: AWS::SNS::TopicPolicy + Properties: + PolicyDocument: + Version: '2012-10-17' + Statement: + - Sid: Allow-IngestionManager-Publish + Action: + - sns:Publish + Effect: Allow + Resource: + Fn::Join: + - '' + - - 'arn:aws:sns:' + - Ref: 'AWS::Region' + - ':' + - Ref: 'AWS::AccountId' + - ':${self:provider.stage}-etl-2014uk16rfop002-*' + Principal: + AWS: + Fn::Join: + - '' + - - 'arn:aws:sts::' + - Ref: 'AWS::AccountId' + - ':assumed-role/ingestion-manager-${self:provider.stage}-' + - Ref: 'AWS::Region' + - '-lambdaRole/${self:provider.stage}-ingestion-manager-onObjectCreated' + Topics: + - Ref: SNSTopic2014uk16rfop002XLS + - Ref: SNSTopic2014uk16rfop002XLSX diff --git a/services/ingestion/etl/2014uk16rfop002/xls/src/events/onParseXLS.js b/services/ingestion/etl/2014uk16rfop002/xls/src/events/onParseXLS.js new file mode 100644 index 000000000..00d25d90c --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop002/xls/src/events/onParseXLS.js @@ -0,0 +1,122 @@ +import AWS from 'aws-sdk'; // eslint-disable-line import/no-extraneous-dependencies +import XLSX from 'xlsx'; + +// ETL utilities. +import ensureExtensions from '@eubfr/lib/etl/ensureExtensions'; +import extractMessage from '@eubfr/lib/etl/extractMessage'; +import handleError from '@eubfr/lib/etl/handleError'; + +import MessengerFactory from '@eubfr/logger-messenger/src/lib/MessengerFactory'; +import { STATUS } from '@eubfr/logger-messenger/src/lib/status'; + +import transformRecord from '../lib/transform'; +import improveObjectKeys from '../lib/improveObjectKeys'; + +export const handler = async (event, context) => { + const { BUCKET, REGION, STAGE } = process.env; + + if (!BUCKET || !REGION || !STAGE) { + throw new Error( + 'BUCKET, REGION and STAGE environment variables are required!' + ); + } + + try { + const snsMessage = extractMessage(event); + const { key } = snsMessage.object; + + if (!ensureExtensions({ file: key, extensions: ['.xls', '.xlsx'] })) { + throw new Error('XLS or XLSX file expected for this ETL.'); + } + + const messenger = MessengerFactory.Create({ context }); + const s3 = new AWS.S3(); + + await messenger.send({ + message: { + computed_key: key, + status_message: 'Start parsing XLS...', + status_code: STATUS.PARSING, + }, + to: ['logs'], + }); + + // Get file + const readStream = s3 + .getObject({ Bucket: snsMessage.bucket.name, Key: key }) + .createReadStream(); + + return new Promise((resolve, reject) => { + // Put data in buffer + const buffers = []; + + readStream.on('data', data => { + buffers.push(data); + }); + + readStream.on('error', async e => + handleError( + { messenger, key, statusCode: STATUS.ERROR }, + { error: e, callback: reject } + ) + ); + + // Manage data + readStream.on('end', async () => { + let dataString = ''; + const records = []; + + // Parse file + const buffer = Buffer.concat(buffers); + const workbook = XLSX.read(buffer, { + cellText: false, + cellDates: true, + }); + + const sheetNameList = workbook.SheetNames; + + sheetNameList.forEach(sheet => { + const rows = XLSX.utils + .sheet_to_json(workbook.Sheets[sheet]) + .map(improveObjectKeys) + .filter(record => record['Project Name']) + .filter(record => !record.Activity.includes('NOT APPROVED')) // Which are approved. + .filter(record => !record.Activity.includes('CANCELLED')); // And not cancelled. + + records.push(...rows); + }); + + records.forEach(record => { + const data = transformRecord(record); + dataString += `${JSON.stringify(data)}\n`; + }); + + // Load data + const params = { + Bucket: BUCKET, + Key: `${key}.ndjson`, + Body: dataString, + ContentType: 'application/x-ndjson', + }; + + await s3.upload(params).promise(); + + await messenger.send({ + message: { + computed_key: key, + status_message: + 'XLS parsed successfully. Results will be uploaded to ElasticSearch soon...', + status_code: STATUS.PARSED, + }, + to: ['logs'], + }); + + return resolve('XLS parsed successfully'); + }); + }); + } catch (e) { + throw e; + } +}; + +export default handler; diff --git a/services/ingestion/etl/2014uk16rfop002/xls/src/lib/improveObjectKeys.js b/services/ingestion/etl/2014uk16rfop002/xls/src/lib/improveObjectKeys.js new file mode 100644 index 000000000..5f4d4c25c --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop002/xls/src/lib/improveObjectKeys.js @@ -0,0 +1,16 @@ +const improveObjectKeys = o => { + const newObject = {}; + + Object.keys(o).forEach(key => { + const newKey = key + .trim() + .replace(/(\r\n|\n|\r)/gm, '') + .replace(/ {1,}/g, ' '); + + newObject[newKey] = o[key]; + }); + + return newObject; +}; + +module.exports = improveObjectKeys; diff --git a/services/ingestion/etl/2014uk16rfop002/xls/src/lib/transform.js b/services/ingestion/etl/2014uk16rfop002/xls/src/lib/transform.js new file mode 100644 index 000000000..cc19a9d6f --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop002/xls/src/lib/transform.js @@ -0,0 +1,235 @@ +// @flow + +import crypto from 'crypto'; +import type { Project } from '@eubfr/types'; +import sanitizeBudgetItem from '@eubfr/lib/budget/budgetFormatter'; + +/** + * Preprocess `budget`. + * + * Input fields taken from the `record` are: + * - `EU (£) (30%)` + * - `EU (£)` + * - `Total (£)` + * + * @memberof 2014uk16rfop002XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Budget} + */ + +const getBudget = record => { + const euContrib = record['EU (£) (30%)'] || record['EU (£)']; + + return { + total_cost: sanitizeBudgetItem({ + value: record['Total (£)'], + currency: 'GBP', + raw: record['Total (£)'], + }), + eu_contrib: sanitizeBudgetItem({ + value: euContrib, + currency: 'GBP', + raw: euContrib, + }), + private_fund: sanitizeBudgetItem(), + public_fund: sanitizeBudgetItem(), + other_contrib: sanitizeBudgetItem(), + funding_area: [], + mmf_heading: '', + }; +}; + +/** + * Preprocess `description`. + * + * Input fields taken from the `record` are: + * + * - `Project No.` + * - `GOG (£)` + * - `PS (£)` + * - `PS (£) (70%)` + * + * @memberof 2014uk16rfop002XlsTransform + * @param {Object} record The row received from parsed file + * @returns {String} + */ + +const getDescription = record => { + const fields = ['Project No.', 'GOG (£)', 'PS (£)']; + let description = ''; + + fields.forEach(field => { + if (record[field]) { + description += `${field}: ${record[field]} \n`; + } + }); + + if (!description.includes('PS (£)') && record['PS (£) (70%)']) { + description += `PS (£): ${record['PS (£) (70%)']}`; + } + + return description; +}; + +/** + * Preprocess `project_id`. + * + * Input fields taken from the `record` are: + * - `Project Name` + * + * @memberof 2014uk16rfop002XlsTransform + * @param {Object} record The row received from parsed file + * @returns {String} + */ + +const getProjectId = record => + record['Project Name'] + ? crypto + .createHash('md5') + .update(String(record['Project Name'])) + .digest('hex') + : ''; + +/** + * Preprocess `project_locations`. + * + * @memberof 2014uk16rfop002XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Array} + */ + +const getLocations = () => [ + { + address: '', + centroid: null, + country_code: 'GB', // Takes into account lib/getCountryCode.js rules directly. + location: null, + nuts: [], + postal_code: '', + region: '', + town: '', + }, +]; + +/** + * Preprocess `themes`. + * + * Input fields taken from the `record` are: + * - `Activity` + * + * @memberof 2014uk16rfop002XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Array} + */ + +const getThemes = record => + record.Activity ? record.Activity.trim().split(/\s*(?:,|&)\s*/) : []; + +/** + * Preprocess `third_parties`. + * + * Input fields taken from the `record` are: + * - `Sponsor` + * + * @memberof 2014uk16rfop002XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Array} + */ + +const getThirdParties = record => + record.Sponsor + ? [ + { + address: '', + country: 'GB', + email: '', + name: record.Sponsor ? record.Sponsor.trim() : '', + phone: '', + region: '', + role: 'Sponsor', + type: '', + website: '', + }, + ] + : []; + +/** + * Preprocess `timeframe`. + * + * Input fields taken from the `record` are: + * - `Start Date` + * - `End Date` + * + * @memberof 2014uk16rfop002XlsTransform + * @param {Object} record The row received from parsed file + * @returns {Timeframe} + */ + +const getTimeframe = record => { + const from = record['Start Date'] || null; + const to = record['End Date'] || null; + + return { + from, + from_precision: 'day', + to, + to_precision: 'day', + }; +}; + +/** + * Preprocess `title`. + * + * Input fields taken from the `record` are: + * - `Project Name` + * + * @memberof 2014uk16rfop002XlsTransform + * @param {Object} record The row received from parsed file + * @returns {String} + */ + +const getTitle = record => + record['Project Name'] ? record['Project Name'].trim() : ''; + +/** + * Map fields for 2014uk16rfop002 producer, XLS file types + * + * Example input data: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop002/xls/test/stubs/record.json|stub} + * + * Transform function: {@link https://github.com/ec-europa/eubfr-data-lake/blob/master/services/ingestion/etl/2014uk16rfop002/xls/src/lib/transform.js|implementation details} + * @name 2014uk16rfop002XlsTransform + * @param {Object} record Piece of data to transform before going to harmonized storage. + * @returns {Project} JSON matching the type fields. + */ +export default (record: Object): Project | null => { + if (!record) return null; + + // Map the fields + return { + action: '', + budget: getBudget(record), + call_year: '', + description: getDescription(record), + ec_priorities: [], + media: [], + programme_name: '', + project_id: getProjectId(record), + project_locations: getLocations(), + project_website: '', + complete: false, + related_links: [], + reporting_organisation: 'Member states', + results: { + available: '', + result: '', + }, + status: '', + sub_programme_name: '', + success_story: '', + themes: getThemes(record), + third_parties: getThirdParties(record), + timeframe: getTimeframe(record), + title: getTitle(record), + type: [], + }; +}; diff --git a/services/ingestion/etl/2014uk16rfop002/xls/test/stubs/record.json b/services/ingestion/etl/2014uk16rfop002/xls/test/stubs/record.json new file mode 100644 index 000000000..c5840bf01 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop002/xls/test/stubs/record.json @@ -0,0 +1,12 @@ +{ + "Project No.": "14-20/001", + "Project Name": "Solar Panels Phase II", + "Sponsor": "GSLA   ", + "EU (£) (30%)": 118185.15, + "GOG (£)": 275765.35, + "PS (£) (70%)": "-", + "Total (£)": 393950.5, + "Activity": "Equipment, Materials, Installation & Labour", + "Start Date": "2015-06-01T21:00:00.000Z", + "End Date": "2016-04-29T21:00:00.000Z" +} diff --git a/services/ingestion/etl/2014uk16rfop002/xls/test/unit/events/onParseXLS.spec.js b/services/ingestion/etl/2014uk16rfop002/xls/test/unit/events/onParseXLS.spec.js new file mode 100644 index 000000000..e5193ece1 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop002/xls/test/unit/events/onParseXLS.spec.js @@ -0,0 +1,20 @@ +/** + * @jest-environment node + */ + +import onParseXLS from '../../../src/events/onParseXLS'; + +describe(`Function onParseXLS in "@eubfr/ingestion-etl-2014uk16rfop002-xls"`, () => { + test('The function requires BUCKET, REGION and STAGE environment variables', async () => { + const event = {}; + const context = {}; + + try { + await onParseXLS(event, context); + } catch (error) { + expect(error.message).toEqual( + 'BUCKET, REGION and STAGE environment variables are required!' + ); + } + }); +}); diff --git a/services/ingestion/etl/2014uk16rfop002/xls/test/unit/lib/__snapshots__/transform.spec.js.snap b/services/ingestion/etl/2014uk16rfop002/xls/test/unit/lib/__snapshots__/transform.spec.js.snap new file mode 100644 index 000000000..91a690033 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop002/xls/test/unit/lib/__snapshots__/transform.spec.js.snap @@ -0,0 +1,94 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`XLS transformer for 2014uk16rfop002 Produces correct JSON output structure 1`] = ` +Object { + "action": "", + "budget": Object { + "eu_contrib": Object { + "currency": "GBP", + "raw": 118185.15, + "value": 118185.15, + }, + "funding_area": Array [], + "mmf_heading": "", + "other_contrib": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "private_fund": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "public_fund": Object { + "currency": "", + "raw": "", + "value": 0, + }, + "total_cost": Object { + "currency": "GBP", + "raw": 393950.5, + "value": 393950.5, + }, + }, + "call_year": "", + "complete": false, + "description": "Project No.: 14-20/001 +GOG (£): 275765.35 +PS (£): -", + "ec_priorities": Array [], + "media": Array [], + "programme_name": "", + "project_id": "c6c3755c6e0164343f7ad050a0ce9e77", + "project_locations": Array [ + Object { + "address": "", + "centroid": null, + "country_code": "GB", + "location": null, + "nuts": Array [], + "postal_code": "", + "region": "", + "town": "", + }, + ], + "project_website": "", + "related_links": Array [], + "reporting_organisation": "Member states", + "results": Object { + "available": "", + "result": "", + }, + "status": "", + "sub_programme_name": "", + "success_story": "", + "themes": Array [ + "Equipment", + "Materials", + "Installation", + "Labour", + ], + "third_parties": Array [ + Object { + "address": "", + "country": "GB", + "email": "", + "name": "GSLA", + "phone": "", + "region": "", + "role": "Sponsor", + "type": "", + "website": "", + }, + ], + "timeframe": Object { + "from": "2015-06-01T21:00:00.000Z", + "from_precision": "day", + "to": "2016-04-29T21:00:00.000Z", + "to_precision": "day", + }, + "title": "Solar Panels Phase II", + "type": Array [], +} +`; diff --git a/services/ingestion/etl/2014uk16rfop002/xls/test/unit/lib/transform.spec.js b/services/ingestion/etl/2014uk16rfop002/xls/test/unit/lib/transform.spec.js new file mode 100644 index 000000000..f76207082 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop002/xls/test/unit/lib/transform.spec.js @@ -0,0 +1,22 @@ +/** + * @jest-environment node + */ + +import mapper from '../../../src/lib/transform'; +import testRecord from '../../stubs/record.json'; + +describe('XLS transformer for 2014uk16rfop002', () => { + let results = {}; + + beforeAll(() => { + results = mapper(testRecord); + }); + + test('Returns null when record is not provided', () => { + expect(mapper()).toBe(null); + }); + + test('Produces correct JSON output structure', () => { + expect(results).toMatchSnapshot(); + }); +}); diff --git a/services/ingestion/etl/2014uk16rfop002/xls/webpack.config.js b/services/ingestion/etl/2014uk16rfop002/xls/webpack.config.js new file mode 100644 index 000000000..30fd8ced7 --- /dev/null +++ b/services/ingestion/etl/2014uk16rfop002/xls/webpack.config.js @@ -0,0 +1,32 @@ +const slsw = require('serverless-webpack'); +const path = require('path'); + +module.exports = { + entry: slsw.lib.entries, + target: 'node', + mode: slsw.lib.webpack.isLocal ? 'development' : 'production', + optimization: { + minimize: process.env.EUBFR_ENV && process.env.EUBFR_ENV === 'prod', + }, + devtool: 'nosources-source-map', + externals: [{ 'aws-sdk': true }], + module: { + rules: [ + { + test: /\.js$/, + use: [ + { + loader: 'babel-loader', + }, + ], + include: __dirname, + exclude: /node_modules/, + }, + ], + }, + output: { + libraryTarget: 'commonjs2', + path: path.join(__dirname, '.webpack'), + filename: '[name].js', + }, +}; diff --git a/tools/eubfr-cli/lib/getServices.js b/tools/eubfr-cli/lib/getServices.js index 05f526421..9217d3e5e 100644 --- a/tools/eubfr-cli/lib/getServices.js +++ b/tools/eubfr-cli/lib/getServices.js @@ -16,6 +16,7 @@ const allServices = [ { service: 'ingestion-etl-2014tc16rfcb047-xls', exportEnv: false }, { service: 'ingestion-etl-2014tc16rfpc001-xls', exportEnv: false }, { service: 'ingestion-etl-2014tc16rftn002-xls', exportEnv: false }, + { service: 'ingestion-etl-2014uk16rfop002-xls', exportEnv: false }, { service: 'ingestion-etl-bulgaria-xls', exportEnv: false }, { service: 'ingestion-etl-cordis-csv', exportEnv: false }, { service: 'ingestion-etl-devco-xls', exportEnv: false }, From d01907289a080ff74e17b775cd509a8dfe695b85 Mon Sep 17 00:00:00 2001 From: Kalin Chernev Date: Mon, 20 May 2019 16:12:16 +0300 Subject: [PATCH 2/3] Update --- services/ingestion/etl/2014uk16rfop002/xls/package.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/ingestion/etl/2014uk16rfop002/xls/package.json b/services/ingestion/etl/2014uk16rfop002/xls/package.json index e0bc7eb1e..75031945f 100644 --- a/services/ingestion/etl/2014uk16rfop002/xls/package.json +++ b/services/ingestion/etl/2014uk16rfop002/xls/package.json @@ -7,15 +7,15 @@ "test:unit": "jest --testPathPattern=unit" }, "dependencies": { - "@eubfr/lib": "^0.6.0", - "@eubfr/logger-messenger": "^0.6.0", + "@eubfr/lib": "^0.7.0", + "@eubfr/logger-messenger": "^0.7.0", "xlsx": "0.14.2" }, "devDependencies": { "@babel/core": "7.4.3", "@babel/preset-env": "7.4.3", "@babel/preset-flow": "7.0.0", - "@eubfr/types": "^0.6.0", + "@eubfr/types": "^0.7.0", "aws-sdk": "2.434.0", "babel-jest": "24.7.0", "babel-loader": "8.0.5", From 86fb1c7c2cef395f9a84fd4f54ee730b3f94d540 Mon Sep 17 00:00:00 2001 From: Kalin Chernev Date: Wed, 22 May 2019 14:33:56 +0300 Subject: [PATCH 3/3] Version bump --- services/ingestion/etl/2014uk16rfop002/xls/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/ingestion/etl/2014uk16rfop002/xls/package.json b/services/ingestion/etl/2014uk16rfop002/xls/package.json index 75031945f..c5aa3c6b7 100644 --- a/services/ingestion/etl/2014uk16rfop002/xls/package.json +++ b/services/ingestion/etl/2014uk16rfop002/xls/package.json @@ -1,7 +1,7 @@ { "private": true, "name": "@eubfr/ingestion-etl-2014uk16rfop002-xls", - "version": "0.6.0", + "version": "0.7.0", "scripts": { "deploy": "sls deploy -v", "test:unit": "jest --testPathPattern=unit"