From 445e1845be95644a80c12172d391f250d671865f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lennart=20M=C3=BChlenmeier?= Date: Thu, 8 Feb 2024 17:16:00 +0100 Subject: [PATCH 1/2] Add eu_stateaid This draft adds a new dataset covering EU State Aid ("Beihilfe" in German), as downloaded from their search interface via click work. Please note, I'd need advice on the mapping (i.e. transform stage). The original CSV headers look like this: Country,Another Beneficiary Member State,Aid Measure Title,Aid Measure Title [EN],SA.Number,Ref-no.,National ID,Name of the beneficiary,Name of the beneficiary [EN],Beneficiary Type,Region,Sector (NACE),Aid Instrument,Aid Instrument [EN],Objectives of the Aid,Objectives of the Aid [EN],"Nominal Amount, expressed as full amount","Aid element, expressed as full amount",Currency,Date of granting,Granting Authority Name,Granting Authority Name [EN],Published Date,Entrusted Entity,Financial Intermediaries,Third country outside of the EU The dataset ranges from 2016 until today. Apparently it's updated frequently; thus a scraper should be a worth addition at some point. --- datasets/eu_stateaid/config.yml | 40 +++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 datasets/eu_stateaid/config.yml diff --git a/datasets/eu_stateaid/config.yml b/datasets/eu_stateaid/config.yml new file mode 100644 index 0000000..d018528 --- /dev/null +++ b/datasets/eu_stateaid/config.yml @@ -0,0 +1,40 @@ +name: eu-stateaid +title: State Aid Transparency Public Search +publisher: + name: European Commission + url: https://webgate.ec.europa.eu/competition/transparency/public +extract: + sources: + - uri: ./20240208-eu-stateaid.csv + pandas: + read: + options: + sep: "," + encoding: latin + dtype: str + skiprows: 1 +transform: + queries: + - entities: + org: + schema: Payment + key_literal: eu-stateaids + keys: + - "Ref-no." + properties: + transactionNumber: + column: "Ref-no." + summary: + columns: + - "Aid Measure Title" + - "Aid Measure Title [EN]" + beneficiary: + columns: + - "Name of the beneficiary" + - "Name of the beneficiary [EN]" + - "Nationl ID" + - "Sector (NACE)" + currency: + column: "Currency" + amount: + column: "Nominal Amount, expressed as full amount" From bebd293ed2e66133ab04acf54c28c0a0b708336b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20W=C3=B6rpel?= Date: Tue, 4 Jun 2024 16:00:57 +0200 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=92=9A=20eu=5Fstateaid?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- datasets/eu_stateaid/config.yml | 78 +++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 14 deletions(-) diff --git a/datasets/eu_stateaid/config.yml b/datasets/eu_stateaid/config.yml index d018528..ebba383 100644 --- a/datasets/eu_stateaid/config.yml +++ b/datasets/eu_stateaid/config.yml @@ -5,36 +5,86 @@ publisher: url: https://webgate.ec.europa.eu/competition/transparency/public extract: sources: - - uri: ./20240208-eu-stateaid.csv + - uri: ./20240208-eu-stateaid.csv.gz + stream: false pandas: read: options: - sep: "," - encoding: latin + compression: gzip dtype: str - skiprows: 1 + operations: + - handler: DataFrame.rename + options: + columns: + "20240208-eu-stateaid.csv": Country transform: queries: - entities: - org: + payer: + schema: PublicBody + key_literal: eu-stateaid-authority + keys: + - Country + - Granting Authority Name + - Granting Authority Name [EN] + properties: + country: + column: Country + name: + columns: + - Granting Authority Name + - Granting Authority Name [EN] + beneficiary: + schema: LegalEntity + key_literal: eu-stateaid-beneficiary + keys: + - "National ID" + properties: + name: + columns: + - "Name of the beneficiary" + - "Name of the beneficiary [EN]" + sector: + column: "Sector (NACE)" + country: + column: "Another Beneficiary Member State" + payment: schema: Payment - key_literal: eu-stateaids + key_literal: eu-stateaid-payment keys: - "Ref-no." properties: transactionNumber: column: "Ref-no." + programme: + columns: + - "Aid Instrument" + - "Aid Instrument [EN]" + purpose: + columns: + - "Objectives of the Aid" + - "Objectives of the Aid [EN]" summary: columns: - - "Aid Measure Title" - - "Aid Measure Title [EN]" + - "Aid Measure Title" + - "Aid Measure Title [EN]" + date: + column: "Date of granting" + format: "%d/%m/%Y" beneficiary: - columns: - - "Name of the beneficiary" - - "Name of the beneficiary [EN]" - - "Nationl ID" - - "Sector (NACE)" + entity: beneficiary + payer: + entity: payer currency: column: "Currency" amount: - column: "Nominal Amount, expressed as full amount" + columns: + - "Nominal Amount, expressed as full amount" + - "Aid element, expressed as full amount" + +load: + fragments_uri: postgresql:///ftm + +aggregate: false +# aggregate: +# handler: investigraph.logic.aggregate:in_db