diff --git a/data-preparation/data/_raw.7z b/data-preparation/data/_raw.7z deleted file mode 100644 index ae854f4..0000000 Binary files a/data-preparation/data/_raw.7z and /dev/null differ diff --git a/data-preparation/notebooks/processed/.gitkeep b/data-preparation/notebooks/processed/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/data-preparation/notebooks/raw/.gitkeep b/data-preparation/notebooks/raw/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/data-preparation/src/sdp_data/sources/_raw_template.py b/data-preparation/src/_raw_template.py similarity index 100% rename from data-preparation/src/sdp_data/sources/_raw_template.py rename to data-preparation/src/_raw_template.py diff --git a/data-preparation/src/sdp_data/github_secret.py b/data-preparation/src/github_secret.py similarity index 100% rename from data-preparation/src/sdp_data/github_secret.py rename to data-preparation/src/github_secret.py diff --git a/data-preparation/src/sdp_data/main.py b/data-preparation/src/main.py similarity index 100% rename from data-preparation/src/sdp_data/main.py rename to data-preparation/src/main.py diff --git a/data-preparation/src/sdp_data/main_transformation.py b/data-preparation/src/main_transformation.py similarity index 91% rename from data-preparation/src/sdp_data/main_transformation.py rename to data-preparation/src/main_transformation.py index 47032eb..31bac06 100644 --- a/data-preparation/src/sdp_data/main_transformation.py +++ b/data-preparation/src/main_transformation.py @@ -1,23 +1,40 @@ import sys + sys.path.append("../../") -from src.sdp_data.transformation.demographic.population import GapMinderPerZoneAndCountryProcessor, PopulationPerZoneAndCountryProcessor -from src.sdp_data.transformation.demographic.population import StatisticsPerCapitaJoiner -from src.sdp_data.transformation.co2_consumption_based_accounting import EoraCo2TradePerZoneAndCountryProcessor -from src.sdp_data.transformation.footprint_vs_territorial import FootprintVsTerrotorialProcessor -from src.sdp_data.transformation.demographic.worldbank_scrap import WorldBankScrapper -from src.sdp_data.transformation.demographic.gdp import GdpMaddissonPerZoneAndCountryProcessor, GdpWorldBankPerZoneAndCountryProcessor -from src.sdp_data.transformation.eia import EiaConsumptionGasBySectorProcessor, EiaConsumptionOilPerProductProcessor, EiaFinalEnergyConsumptionProcessor, EiaFinalEnergyPerSectorPerEnergyProcessor, EiaElectricityGenerationByEnergyProcessor, EiaConsumptionOilsPerSectorProcessor, EiaFinalEnergyConsumptionPerSectorProcessor -from src.sdp_data.utils.format import StatisticsDataframeFormatter -from src.sdp_data.transformation.ghg.pik import PikCleaner -from src.sdp_data.transformation.ghg.edgar import EdgarCleaner -from src.sdp_data.transformation.ghg.ghg import GhgPikEdgarCombinator, PikUnfcccAnnexesCombinator, EdgarUnfcccAnnexesCombinator, GhgMultiSourcesCombinator -from src.sdp_data.transformation.ghg.unfcc import UnfcccAnnexesCleaner, UnfccProcessor -from src.sdp_data.transformation.ghg.fao import FaoDataProcessor -from src.sdp_data.transformation.ghg.cait import CaitProcessor -import pandas as pd import os + +import pandas as pd import requests from pandas import json_normalize +from src.source_aggregations.co2_consumption_based_accounting import \ + EoraCo2TradePerZoneAndCountryProcessor +from src.source_aggregations.demographic.gdp import ( + GdpMaddissonPerZoneAndCountryProcessor, + GdpWorldBankPerZoneAndCountryProcessor) +from src.source_aggregations.footprint_vs_territorial import \ + FootprintVsTerrotorialProcessor +from src.source_aggregations.ghg.cait import CaitProcessor +from src.source_aggregations.ghg.edgar import EdgarCleaner +from src.source_aggregations.ghg.fao import FaoDataProcessor +from src.source_aggregations.ghg.ghg import (EdgarUnfcccAnnexesCombinator, + GhgMultiSourcesCombinator, + GhgPikEdgarCombinator, + PikUnfcccAnnexesCombinator) +from src.source_aggregations.ghg.pik import PikCleaner +from src.source_aggregations.ghg.unfcc import (UnfcccAnnexesCleaner, + UnfccProcessor) +from src.sources.eia.eia import (EiaConsumptionGasBySectorProcessor, + EiaConsumptionOilPerProductProcessor, + EiaConsumptionOilsPerSectorProcessor, + EiaElectricityGenerationByEnergyProcessor, + EiaFinalEnergyConsumptionPerSectorProcessor, + EiaFinalEnergyConsumptionProcessor, + EiaFinalEnergyPerSectorPerEnergyProcessor) +from src.sources.gapminder.population import ( + GapMinderPerZoneAndCountryProcessor, PopulationPerZoneAndCountryProcessor, + StatisticsPerCapitaJoiner) +from src.sources.worldbank.worldbank_scrap import WorldBankScrapper +from src.utils.format import StatisticsDataframeFormatter RAW_DATA_DIR = os.path.join(os.path.dirname(__file__), "../../results/raw_new_data") RESULTS_DIR = os.path.join(os.path.dirname(__file__), "../../results/new_prod_data") @@ -39,7 +56,7 @@ def process_population_data(self, df_country): df_population_raw = WorldBankScrapper().run("population") df_population = PopulationPerZoneAndCountryProcessor().run(df_population_raw, df_country) df_population.to_csv(f"{RESULTS_DIR}/DEMOGRAPHIC_POPULATION_WORLDBANK_prod.csv", index=False) - + # update GapMinder data (source GapMinder) df_population_gapmidner_raw = pd.read_excel(f"{RAW_DATA_DIR}/population/GM-Population - Dataset - v7.xlsx", sheet_name="data-pop-gmv6-in-columns") df_gapminder = GapMinderPerZoneAndCountryProcessor().run(df_population_gapmidner_raw, df_country) @@ -56,9 +73,9 @@ def process_footprint_vs_territorial_data(self, df_country, df_population): df_footprint_vs_territorial_per_capita = StatisticsPerCapitaJoiner().run_footprint_vs_territorial_per_capita(df_footprint_vs_territorial, df_population) df_footprint_vs_territorial_per_capita.to_csv(f"{RESULTS_DIR}/CO2_CBA_PER_CAPITA_eora_cba_zones_per_capita_prod.csv", index=False) - + def process_iea_data(self, df_country): - + # gas products df_gas_cons_by_sector = EiaConsumptionGasBySectorProcessor().prepare_data(df_country) df_gas_cons_by_sector.to_csv(f"{RESULTS_DIR}/FINAL_CONS_GAS_BY_SECTOR_prod.csv", index=False) @@ -66,13 +83,13 @@ def process_iea_data(self, df_country): df_original = StatisticsDataframeFormatter.select_and_sort_values(df_original, "final_energy", round_statistics=4) df_original.to_csv(f"{CURRENT_PROD_DATA}/FINAL_CONS_GAS_BY_SECTOR_prod.csv", index=False) - # oil products + # oil products df_oil_cons_per_product = EiaConsumptionOilPerProductProcessor().prepare_data(df_country) df_oil_cons_per_product.to_csv(f"{RESULTS_DIR}/FINAL_CONS_OIL_BY_PRODUCT_prod.csv", index=False) df_original = pd.read_csv(f"{CURRENT_DATA_DIR}/final_cons_oil_products_by_product.csv", sep=',') df_original = StatisticsDataframeFormatter.select_and_sort_values(df_original, "final_energy", round_statistics=4) df_original.to_csv(f"{CURRENT_PROD_DATA}/FINAL_CONS_OIL_BY_PRODUCT_prod.csv", index=False) - + df_oil_cons_per_sector = EiaConsumptionOilsPerSectorProcessor().prepare_data(df_country) df_oil_cons_per_sector.to_csv(f"{RESULTS_DIR}/FINAL_CONS_OIL_BY_SECTOR_prod.csv", index=False) df_original = pd.read_csv(f"{CURRENT_DATA_DIR}/final_cons_oil_products_by_sector_prod.csv", sep=',') @@ -164,7 +181,7 @@ def process_ghg_data(self, df_country): df_unfccc_annex_1 = pd.read_excel(os.path.join(os.path.dirname(__file__), "../../data/thibaud/ghg/" + "unfccc_annex1.xlsx")) df_unfccc_annex_2 = pd.read_excel(os.path.join(os.path.dirname(__file__), "../../data/thibaud/ghg/" + "unfccc_annex2.xlsx")) df_unfccc_annex_clean = UnfcccAnnexesCleaner().run(df_unfccc_annex_1, df_unfccc_annex_2) - + # combine PIK and UNFCCC annexes data df_pik_unfccc_annexes = PikUnfcccAnnexesCombinator().run(df_pik_cleaned, df_unfccc_annex_clean) df_pik_unfccc_annexes.to_csv(f"{RESULTS_DIR}/GHG_PIK_UNFCCC_prod.csv", index=False) diff --git a/data-preparation/src/sdp_data/raw.py b/data-preparation/src/raw.py similarity index 100% rename from data-preparation/src/sdp_data/raw.py rename to data-preparation/src/raw.py diff --git a/data-preparation/src/sdp_data/data/_raw.7z b/data-preparation/src/sdp_data/data/_raw.7z deleted file mode 100644 index e44bf07..0000000 Binary files a/data-preparation/src/sdp_data/data/_raw.7z and /dev/null differ diff --git a/data-preparation/src/sdp_data/sources/raw_owid_TODO.py b/data-preparation/src/sdp_data/sources/raw_owid_TODO.py deleted file mode 100644 index 2c4cd6d..0000000 --- a/data-preparation/src/sdp_data/sources/raw_owid_TODO.py +++ /dev/null @@ -1,10 +0,0 @@ - -# OWID - -import raw - - -def main(raw, test): - pass - - \ No newline at end of file diff --git a/data-preparation/src/sdp_data/utils/__init__.py b/data-preparation/src/sdp_data/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/data-preparation/src/countries/__init__.py b/data-preparation/src/source_aggregations/__init__.py similarity index 100% rename from data-preparation/src/countries/__init__.py rename to data-preparation/src/source_aggregations/__init__.py diff --git a/data-preparation/src/sdp_data/transformation/co2_consumption_based_accounting.py b/data-preparation/src/source_aggregations/co2_consumption_based_accounting.py similarity index 98% rename from data-preparation/src/sdp_data/transformation/co2_consumption_based_accounting.py rename to data-preparation/src/source_aggregations/co2_consumption_based_accounting.py index 0857336..b0e074e 100644 --- a/data-preparation/src/sdp_data/transformation/co2_consumption_based_accounting.py +++ b/data-preparation/src/source_aggregations/co2_consumption_based_accounting.py @@ -1,6 +1,6 @@ import pandas as pd -from src.sdp_data.utils.translation import CountryTranslatorFrenchToEnglish -from src.sdp_data.utils.iso3166 import countries_by_alpha3 +from src.utils.iso3166 import countries_by_alpha3 +from src.utils.translation import CountryTranslatorFrenchToEnglish class EoraCo2TradePerZoneAndCountryProcessor: diff --git a/data-preparation/src/sdp_data/__init__.py b/data-preparation/src/source_aggregations/demographic/__init__.py similarity index 100% rename from data-preparation/src/sdp_data/__init__.py rename to data-preparation/src/source_aggregations/demographic/__init__.py diff --git a/data-preparation/src/sdp_data/transformation/demographic/countries.py b/data-preparation/src/source_aggregations/demographic/countries.py similarity index 100% rename from data-preparation/src/sdp_data/transformation/demographic/countries.py rename to data-preparation/src/source_aggregations/demographic/countries.py diff --git a/data-preparation/src/sdp_data/transformation/demographic/gdp.py b/data-preparation/src/source_aggregations/demographic/gdp.py similarity index 95% rename from data-preparation/src/sdp_data/transformation/demographic/gdp.py rename to data-preparation/src/source_aggregations/demographic/gdp.py index 1403194..5cf9270 100644 --- a/data-preparation/src/sdp_data/transformation/demographic/gdp.py +++ b/data-preparation/src/source_aggregations/demographic/gdp.py @@ -1,7 +1,8 @@ import pandas as pd -from src.sdp_data.utils.translation import CountryTranslatorFrenchToEnglish -from src.sdp_data.transformation.demographic.countries import StatisticsPerCountriesAndZonesJoiner -from src.sdp_data.utils.iso3166 import countries_by_alpha3 +from src.source_aggregations.demographic.countries import \ + StatisticsPerCountriesAndZonesJoiner +from src.utils.iso3166 import countries_by_alpha3 +from src.utils.translation import CountryTranslatorFrenchToEnglish class GdpMaddissonPerZoneAndCountryProcessor: diff --git a/data-preparation/src/sdp_data/transformation/footprint_vs_territorial.py b/data-preparation/src/source_aggregations/footprint_vs_territorial.py similarity index 97% rename from data-preparation/src/sdp_data/transformation/footprint_vs_territorial.py rename to data-preparation/src/source_aggregations/footprint_vs_territorial.py index 86dc7bd..93fd0c9 100644 --- a/data-preparation/src/sdp_data/transformation/footprint_vs_territorial.py +++ b/data-preparation/src/source_aggregations/footprint_vs_territorial.py @@ -2,8 +2,9 @@ Footprint versus territorial emissions """ import pandas as pd -from src.sdp_data.utils.translation import CountryTranslatorFrenchToEnglish -from src.sdp_data.transformation.demographic.countries import StatisticsPerCountriesAndZonesJoiner +from src.source_aggregations.demographic.countries import \ + StatisticsPerCountriesAndZonesJoiner +from src.utils.translation import CountryTranslatorFrenchToEnglish class EoraCbaPerZoneAndCountryProcessor: @@ -74,7 +75,7 @@ def run(self, df_eora_cba: pd.DataFrame, df_country: pd.DataFrame): # filter on sectors of interest list_scope_to_filter = ["Territorial Emissions", "CO2 Footprint"] df_eora_cba = df_eora_cba[df_eora_cba["scope"].isin(list_scope_to_filter)] - + # join with countries list_cols_group_by = ['group_type', 'group_name', 'year', 'scope', 'co2_unit', 'source'] dict_agg = {'co2': "sum"} diff --git a/data-preparation/src/sdp_data/test/__init__.py b/data-preparation/src/source_aggregations/ghg/__init__.py similarity index 100% rename from data-preparation/src/sdp_data/test/__init__.py rename to data-preparation/src/source_aggregations/ghg/__init__.py diff --git a/data-preparation/src/sdp_data/transformation/ghg/cait.py b/data-preparation/src/source_aggregations/ghg/cait.py similarity index 99% rename from data-preparation/src/sdp_data/transformation/ghg/cait.py rename to data-preparation/src/source_aggregations/ghg/cait.py index 1b779ee..65d30d3 100644 --- a/data-preparation/src/sdp_data/transformation/ghg/cait.py +++ b/data-preparation/src/source_aggregations/ghg/cait.py @@ -1,5 +1,5 @@ import pandas as pd -from sdp_data.utils.translation import CountryTranslatorFrenchToEnglish +from src.utils.translation import CountryTranslatorFrenchToEnglish class CaitProcessor: diff --git a/data-preparation/src/sdp_data/transformation/ghg/edgar.py b/data-preparation/src/source_aggregations/ghg/edgar.py similarity index 95% rename from data-preparation/src/sdp_data/transformation/ghg/edgar.py rename to data-preparation/src/source_aggregations/ghg/edgar.py index 07bf159..2b039b9 100644 --- a/data-preparation/src/sdp_data/transformation/ghg/edgar.py +++ b/data-preparation/src/source_aggregations/ghg/edgar.py @@ -1,7 +1,8 @@ -import pandas as pd import numpy as np -from src.sdp_data.utils.translation import CountryTranslatorFrenchToEnglish, SectorTranslator -from src.sdp_data.utils.format import StatisticsDataframeFormatter +import pandas as pd +from src.utils.format import StatisticsDataframeFormatter +from src.utils.translation import (CountryTranslatorFrenchToEnglish, + SectorTranslator) class EdgarCleaner: diff --git a/data-preparation/src/sdp_data/transformation/ghg/fao.py b/data-preparation/src/source_aggregations/ghg/fao.py similarity index 93% rename from data-preparation/src/sdp_data/transformation/ghg/fao.py rename to data-preparation/src/source_aggregations/ghg/fao.py index d56a7f8..10e90ff 100644 --- a/data-preparation/src/sdp_data/transformation/ghg/fao.py +++ b/data-preparation/src/source_aggregations/ghg/fao.py @@ -1,7 +1,8 @@ import pandas as pd -from sdp_data.utils.translation import CountryTranslatorFrenchToEnglish -from sdp_data.transformation.demographic.countries import StatisticsPerCountriesAndZonesJoiner -from sdp_data.utils.format import StatisticsDataframeFormatter +from src.source_aggregations.demographic.countries import \ + StatisticsPerCountriesAndZonesJoiner +from src.utils.format import StatisticsDataframeFormatter +from src.utils.translation import CountryTranslatorFrenchToEnglish class FaoDataProcessor: diff --git a/data-preparation/src/sdp_data/transformation/ghg/ghg.py b/data-preparation/src/source_aggregations/ghg/ghg.py similarity index 98% rename from data-preparation/src/sdp_data/transformation/ghg/ghg.py rename to data-preparation/src/source_aggregations/ghg/ghg.py index ec49c5d..6fceb0a 100644 --- a/data-preparation/src/sdp_data/transformation/ghg/ghg.py +++ b/data-preparation/src/source_aggregations/ghg/ghg.py @@ -1,9 +1,8 @@ -import pandas as pd -from src.sdp_data.transformation.demographic.countries import ( - StatisticsPerCountriesAndZonesJoiner, -) import numpy as np -from src.sdp_data.utils.format import StatisticsDataframeFormatter +import pandas as pd +from src.source_aggregations.demographic.countries import \ + StatisticsPerCountriesAndZonesJoiner +from src.utils.format import StatisticsDataframeFormatter class GhgPikEdgarCombinator: @@ -72,7 +71,7 @@ def compute_pik_edgar_energy_ratio(self, df_pik_clean, df_edgar_clean): "ghg_pik", "ghg_edgar"]) # concatenate with EDGAR and PIK transport and energy - df_edgar_transport_energy = df_edgar_clean[df_edgar_clean["sector"].isin(["Transport", "Electricity & Heat", "Other Energy"])] + df_edgar_transport_energy = df_edgar_clean[df_edgar_clean["sector"].isin(["Transport", "Electricity & Heat", "Other Energy"])] df_pik_edgar_diff_industry_transport = pd.concat([df_pik_edgar_diff_industry, df_edgar_transport_energy], axis=0) # merge on PIK energy @@ -92,7 +91,7 @@ def compute_pik_edgar_energy_ratio(self, df_pik_clean, df_edgar_clean): df_pik_edgar_ratio["ratio"] = df_pik_edgar_ratio["ghg_edgar"] / df_pik_edgar_ratio["ghg_pik"] return df_pik_edgar_ratio - + def compute_pik_edgar_extrapolated_glued(self, df_pik_clean, df_edgar_clean): # TODO - revoir complètement cette méthode. Dette technique monstrueuse... # compute the energy ratio between PIK and Edgar print("\n----- Combine PIK and EDGAR extrapolated") @@ -199,8 +198,8 @@ def run(self, df_pik_clean, df_edgar_clean, df_fao_clean, df_cait_sector_stacked df_multi_sources_sum_per_country["group_type"] = "country" df_multi_sources_sum_per_country = df_multi_sources_sum_per_country.rename(columns={"group_name": "country"}) df_ghg_multi_with_zones = pd.concat([df_multi_sources, df_multi_sources_sum_per_country, df_fao_clean], axis=0) - - # group by GAS and merge with CAIT + + # group by GAS and merge with CAIT list_group_by_gas = ["source", "group_type", "group_name", "year", "gas"] df_ghg_multi_by_gas = df_ghg_multi_with_zones.groupby(list_group_by_gas).agg(ghg=("ghg", "sum"), ghg_unit=("ghg_unit", "first")).reset_index() df_cait_gas_stacked["source"] = "CAIT" diff --git a/data-preparation/src/sdp_data/transformation/ghg/pik.py b/data-preparation/src/source_aggregations/ghg/pik.py similarity index 93% rename from data-preparation/src/sdp_data/transformation/ghg/pik.py rename to data-preparation/src/source_aggregations/ghg/pik.py index 26c3d71..576c5d7 100644 --- a/data-preparation/src/sdp_data/transformation/ghg/pik.py +++ b/data-preparation/src/source_aggregations/ghg/pik.py @@ -1,6 +1,6 @@ import pandas as pd -from src.sdp_data.utils.translation import CountryTranslatorFrenchToEnglish -from src.sdp_data.utils.format import StatisticsDataframeFormatter +from src.utils.format import StatisticsDataframeFormatter +from src.utils.translation import CountryTranslatorFrenchToEnglish class PikCleaner: diff --git a/data-preparation/src/sdp_data/transformation/ghg/unfcc.py b/data-preparation/src/source_aggregations/ghg/unfcc.py similarity index 96% rename from data-preparation/src/sdp_data/transformation/ghg/unfcc.py rename to data-preparation/src/source_aggregations/ghg/unfcc.py index 4581b2d..20c8f86 100644 --- a/data-preparation/src/sdp_data/transformation/ghg/unfcc.py +++ b/data-preparation/src/source_aggregations/ghg/unfcc.py @@ -1,6 +1,7 @@ import pandas as pd -from src.sdp_data.utils.translation import CountryTranslatorFrenchToEnglish, SectorTranslator -from src.sdp_data.utils.iso3166 import countries_by_alpha3 +from src.utils.iso3166 import countries_by_alpha3 +from src.utils.translation import (CountryTranslatorFrenchToEnglish, + SectorTranslator) class UnfccProcessor: diff --git a/data-preparation/src/sdp_data/transformation/historical_co2.py b/data-preparation/src/source_aggregations/historical_co2.py similarity index 95% rename from data-preparation/src/sdp_data/transformation/historical_co2.py rename to data-preparation/src/source_aggregations/historical_co2.py index 3c86b70..a3a061c 100644 --- a/data-preparation/src/sdp_data/transformation/historical_co2.py +++ b/data-preparation/src/source_aggregations/historical_co2.py @@ -1,6 +1,7 @@ import pandas as pd -from sdp_data.utils.translation import CountryTranslatorFrenchToEnglish -from sdp_data.transformation.demographic.countries import StatisticsPerCountriesAndZonesJoiner +from src.source_aggregations.demographic.countries import \ + StatisticsPerCountriesAndZonesJoiner +from src.utils.translation import CountryTranslatorFrenchToEnglish class HistoricalCo2PerZoneAndCountryProcessor: @@ -18,7 +19,7 @@ def retrieve_urss_countries(self, df_pik_cleaned): condition_urss = (df_pik_cleaned['country'].isin(self.list_urss_countries) & (df_pik_cleaned['year'] >= 1922) & (df_pik_cleaned['year'] < 1992)) df_pik_cleaned.loc[condition_urss, 'country'] = 'Russian Federation & USSR' return df_pik_cleaned - + @staticmethod def melt_years(df: pd.DataFrame): return pd.melt(df, id_vars=["type", "country", "unit"], diff --git a/data-preparation/notebooks/processed/BP_GasReserves.csv b/data-preparation/src/sources/bp/BP_GasReserves.csv similarity index 100% rename from data-preparation/notebooks/processed/BP_GasReserves.csv rename to data-preparation/src/sources/bp/BP_GasReserves.csv diff --git a/data-preparation/notebooks/processed/BP_OilReserves.csv b/data-preparation/src/sources/bp/BP_OilReserves.csv similarity index 100% rename from data-preparation/notebooks/processed/BP_OilReserves.csv rename to data-preparation/src/sources/bp/BP_OilReserves.csv diff --git a/data-preparation/notebooks/processed/FOSSIL_RESERVES_bp_fossil_with_zones_prod-Final.ipynb b/data-preparation/src/sources/bp/FOSSIL_RESERVES_bp_fossil_with_zones_prod-Final.ipynb similarity index 100% rename from data-preparation/notebooks/processed/FOSSIL_RESERVES_bp_fossil_with_zones_prod-Final.ipynb rename to data-preparation/src/sources/bp/FOSSIL_RESERVES_bp_fossil_with_zones_prod-Final.ipynb diff --git a/data-preparation/src/data_processing/FOSSIL_RESERVES_bp_fossil_with_zones_prod.py b/data-preparation/src/sources/bp/FOSSIL_RESERVES_bp_fossil_with_zones_prod.py similarity index 100% rename from data-preparation/src/data_processing/FOSSIL_RESERVES_bp_fossil_with_zones_prod.py rename to data-preparation/src/sources/bp/FOSSIL_RESERVES_bp_fossil_with_zones_prod.py diff --git a/data-preparation/notebooks/raw/BP/bp.ipynb b/data-preparation/src/sources/bp/bp.ipynb similarity index 100% rename from data-preparation/notebooks/raw/BP/bp.ipynb rename to data-preparation/src/sources/bp/bp.ipynb diff --git a/data-preparation/data/_processed/bp_oil_gas_reserves_processed.csv b/data-preparation/src/sources/bp/data/processed/bp_oil_gas_reserves_processed.csv similarity index 100% rename from data-preparation/data/_processed/bp_oil_gas_reserves_processed.csv rename to data-preparation/src/sources/bp/data/processed/bp_oil_gas_reserves_processed.csv diff --git a/data-preparation/notebooks/raw/BP/BP_GasReserves.csv b/data-preparation/src/sources/bp/data/raw/BP_GasReserves.csv similarity index 100% rename from data-preparation/notebooks/raw/BP/BP_GasReserves.csv rename to data-preparation/src/sources/bp/data/raw/BP_GasReserves.csv diff --git a/data-preparation/notebooks/raw/BP/BP_OilReserves.csv b/data-preparation/src/sources/bp/data/raw/BP_OilReserves.csv similarity index 100% rename from data-preparation/notebooks/raw/BP/BP_OilReserves.csv rename to data-preparation/src/sources/bp/data/raw/BP_OilReserves.csv diff --git a/data-preparation/src/sdp_data/data/_raw/bp/bp_file_energy_review_world.csv b/data-preparation/src/sources/bp/data/raw/bp_file_energy_review_world.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/bp/bp_file_energy_review_world.csv rename to data-preparation/src/sources/bp/data/raw/bp_file_energy_review_world.csv diff --git a/data-preparation/src/sdp_data/transformation/fossil_reserves.py b/data-preparation/src/sources/bp/fossil_reserves.py similarity index 99% rename from data-preparation/src/sdp_data/transformation/fossil_reserves.py rename to data-preparation/src/sources/bp/fossil_reserves.py index 5b92581..4b471a7 100644 --- a/data-preparation/src/sdp_data/transformation/fossil_reserves.py +++ b/data-preparation/src/sources/bp/fossil_reserves.py @@ -1,8 +1,10 @@ -import pandas as pd -import numpy as np import sys + +import numpy as np +import pandas as pd + sys.path.insert(0, r'C:\Users\HP\Desktop\shiftdataportal_data') -from src.sdp_data.utils.translation import CountryTranslatorFrenchToEnglish +from src.utils.translation import CountryTranslatorFrenchToEnglish class CoalReservesConsolidatedProdGenerator: diff --git a/data-preparation/notebooks/processed/process_bp-oil-gas.ipynb b/data-preparation/src/sources/bp/process_bp-oil-gas.ipynb similarity index 100% rename from data-preparation/notebooks/processed/process_bp-oil-gas.ipynb rename to data-preparation/src/sources/bp/process_bp-oil-gas.ipynb diff --git a/data-preparation/src/sdp_data/sources/raw_bp.py b/data-preparation/src/sources/bp/raw_bp.py similarity index 100% rename from data-preparation/src/sdp_data/sources/raw_bp.py rename to data-preparation/src/sources/bp/raw_bp.py diff --git a/data-preparation/src/sdp_data/test/test_utils/__init__.py b/data-preparation/src/sources/countries/__init__.py similarity index 100% rename from data-preparation/src/sdp_data/test/test_utils/__init__.py rename to data-preparation/src/sources/countries/__init__.py diff --git a/data-preparation/src/countries/data/multiselect_groups.csv b/data-preparation/src/sources/countries/data/multiselect_groups.csv similarity index 100% rename from data-preparation/src/countries/data/multiselect_groups.csv rename to data-preparation/src/sources/countries/data/multiselect_groups.csv diff --git a/data-preparation/src/countries/main.py b/data-preparation/src/sources/countries/main.py similarity index 70% rename from data-preparation/src/countries/main.py rename to data-preparation/src/sources/countries/main.py index 9d54237..bfdd23e 100644 --- a/data-preparation/src/countries/main.py +++ b/data-preparation/src/sources/countries/main.py @@ -1,9 +1,9 @@ import pandas as pd -from src.countries.multi_selection_country_groups import \ +from src.sources.countries.multi_selection_country_groups import \ process_multi_selection_country_groups df = pd.read_csv( - "src/countries/data/multiselect_groups.csv", + "src/sources/countries/data/multiselect_groups.csv", sep=",", ) multi_selection_country_groups = process_multi_selection_country_groups(df) diff --git a/data-preparation/src/countries/multi_selection_country_groups.py b/data-preparation/src/sources/countries/multi_selection_country_groups.py similarity index 96% rename from data-preparation/src/countries/multi_selection_country_groups.py rename to data-preparation/src/sources/countries/multi_selection_country_groups.py index 73b1c0a..7f7e486 100644 --- a/data-preparation/src/countries/multi_selection_country_groups.py +++ b/data-preparation/src/sources/countries/multi_selection_country_groups.py @@ -1,5 +1,6 @@ import pandas as pd -from src.countries.new_country_group_member import add_new_members_to_group +from src.sources.countries.new_country_group_member import \ + add_new_members_to_group def process_multi_selection_country_groups(raw_multi_selection_country_groups: pd.DataFrame) -> pd.DataFrame: diff --git a/data-preparation/src/countries/multiselection_country_groups.ipynb b/data-preparation/src/sources/countries/multiselection_country_groups.ipynb similarity index 100% rename from data-preparation/src/countries/multiselection_country_groups.ipynb rename to data-preparation/src/sources/countries/multiselection_country_groups.ipynb diff --git a/data-preparation/src/countries/new_country_group_member.py b/data-preparation/src/sources/countries/new_country_group_member.py similarity index 100% rename from data-preparation/src/countries/new_country_group_member.py rename to data-preparation/src/sources/countries/new_country_group_member.py diff --git a/data-preparation/src/countries/notebook.ipynb b/data-preparation/src/sources/countries/notebook.ipynb similarity index 100% rename from data-preparation/src/countries/notebook.ipynb rename to data-preparation/src/sources/countries/notebook.ipynb diff --git a/data-preparation/data/_processed/CW_PIK_historical_emissions_processed.csv b/data-preparation/src/sources/data/processed/CW_PIK_historical_emissions_processed.csv similarity index 100% rename from data-preparation/data/_processed/CW_PIK_historical_emissions_processed.csv rename to data-preparation/src/sources/data/processed/CW_PIK_historical_emissions_processed.csv diff --git a/data-preparation/notebooks/processed/process_cw_historical_emissions.ipynb b/data-preparation/src/sources/data/processed/process_cw_historical_emissions.ipynb similarity index 100% rename from data-preparation/notebooks/processed/process_cw_historical_emissions.ipynb rename to data-preparation/src/sources/data/processed/process_cw_historical_emissions.ipynb diff --git a/data-preparation/notebooks/processed/process_cw_pik_edgar_concat.ipynb b/data-preparation/src/sources/data/processed/process_cw_pik_edgar_concat.ipynb similarity index 100% rename from data-preparation/notebooks/processed/process_cw_pik_edgar_concat.ipynb rename to data-preparation/src/sources/data/processed/process_cw_pik_edgar_concat.ipynb diff --git a/data-preparation/notebooks/raw/cw/historical_emissions.csv b/data-preparation/src/sources/data/raw/historical_emissions.csv similarity index 100% rename from data-preparation/notebooks/raw/cw/historical_emissions.csv rename to data-preparation/src/sources/data/raw/historical_emissions.csv diff --git a/data-preparation/notebooks/processed/edgar/Clean and Visualize Edgar Total CO2eq (edgar_file_em_tot_co2eq_sect).ipynb b/data-preparation/src/sources/edgar/Clean and Visualize Edgar Total CO2eq (edgar_file_em_tot_co2eq_sect).ipynb similarity index 100% rename from data-preparation/notebooks/processed/edgar/Clean and Visualize Edgar Total CO2eq (edgar_file_em_tot_co2eq_sect).ipynb rename to data-preparation/src/sources/edgar/Clean and Visualize Edgar Total CO2eq (edgar_file_em_tot_co2eq_sect).ipynb diff --git a/data-preparation/notebooks/processed/edgar/Compare_edgartotal_edgar_gases.ipynb b/data-preparation/src/sources/edgar/Compare_edgartotal_edgar_gases.ipynb similarity index 99% rename from data-preparation/notebooks/processed/edgar/Compare_edgartotal_edgar_gases.ipynb rename to data-preparation/src/sources/edgar/Compare_edgartotal_edgar_gases.ipynb index b9b30ce..96dc9a8 100644 --- a/data-preparation/notebooks/processed/edgar/Compare_edgartotal_edgar_gases.ipynb +++ b/data-preparation/src/sources/edgar/Compare_edgartotal_edgar_gases.ipynb @@ -16,8 +16,8 @@ "if module_path not in sys.path:\n", " sys.path.append(module_path)\n", " \n", - "from src.sdp_data.utils.process_data import *\n", - "from src.sdp_data.utils.info_mapping import *\n", + "from src.utils.process_data import *\n", + "from src.utils.info_mapping import *\n", "\n", "pd.options.plotting.backend = 'holoviews'\n", "import re" diff --git a/data-preparation/data/_processed/concat_edgar_pik_ghg_emissions.csv b/data-preparation/src/sources/edgar/data/processed/concat_edgar_pik_ghg_emissions.csv similarity index 100% rename from data-preparation/data/_processed/concat_edgar_pik_ghg_emissions.csv rename to data-preparation/src/sources/edgar/data/processed/concat_edgar_pik_ghg_emissions.csv diff --git a/data-preparation/data/_processed/edgar_processed_ch4_n2o_co2_fgases.csv b/data-preparation/src/sources/edgar/data/processed/edgar_processed_ch4_n2o_co2_fgases.csv similarity index 100% rename from data-preparation/data/_processed/edgar_processed_ch4_n2o_co2_fgases.csv rename to data-preparation/src/sources/edgar/data/processed/edgar_processed_ch4_n2o_co2_fgases.csv diff --git a/data-preparation/notebooks/raw/edgar/edgar.ipynb b/data-preparation/src/sources/edgar/edgar.ipynb similarity index 100% rename from data-preparation/notebooks/raw/edgar/edgar.ipynb rename to data-preparation/src/sources/edgar/edgar.ipynb diff --git a/data-preparation/notebooks/processed/process_edgar_raw_CH4_N2O_CO2_Fgases.ipynb b/data-preparation/src/sources/edgar/process_edgar_raw_CH4_N2O_CO2_Fgases.ipynb similarity index 100% rename from data-preparation/notebooks/processed/process_edgar_raw_CH4_N2O_CO2_Fgases.ipynb rename to data-preparation/src/sources/edgar/process_edgar_raw_CH4_N2O_CO2_Fgases.ipynb diff --git a/data-preparation/src/sdp_data/sources/doc/eia/APIv2.1.0.pdf b/data-preparation/src/sources/eia/APIv2.1.0.pdf similarity index 100% rename from data-preparation/src/sdp_data/sources/doc/eia/APIv2.1.0.pdf rename to data-preparation/src/sources/eia/APIv2.1.0.pdf diff --git a/data-preparation/notebooks/fossil_import_export_project/Fossil_Import_Export_new.ipynb b/data-preparation/src/sources/eia/Fossil_Import_Export_new.ipynb similarity index 99% rename from data-preparation/notebooks/fossil_import_export_project/Fossil_Import_Export_new.ipynb rename to data-preparation/src/sources/eia/Fossil_Import_Export_new.ipynb index 8ca7665..ec8d76a 100644 --- a/data-preparation/notebooks/fossil_import_export_project/Fossil_Import_Export_new.ipynb +++ b/data-preparation/src/sources/eia/Fossil_Import_Export_new.ipynb @@ -20,7 +20,7 @@ "import matplotlib.pyplot as plt\n", "import sys\n", "sys.path.insert(0, r'C:\\Users\\HP\\Desktop\\shiftdataportal_data')\n", - "from src.sdp_data.utils.translation import CountryTranslatorFrenchToEnglish" + "from src.utils.translation import CountryTranslatorFrenchToEnglish" ] }, { diff --git a/data-preparation/notebooks/processed/ab_eia_international.ipynb b/data-preparation/src/sources/eia/ab_eia_international.ipynb similarity index 100% rename from data-preparation/notebooks/processed/ab_eia_international.ipynb rename to data-preparation/src/sources/eia/ab_eia_international.ipynb diff --git a/data-preparation/notebooks/raw/ab_eia_raw_product_name_id.ipynb b/data-preparation/src/sources/eia/ab_eia_raw_product_name_id.ipynb similarity index 100% rename from data-preparation/notebooks/raw/ab_eia_raw_product_name_id.ipynb rename to data-preparation/src/sources/eia/ab_eia_raw_product_name_id.ipynb diff --git a/data-preparation/notebooks/processed/ab_process_eia.ipynb b/data-preparation/src/sources/eia/ab_process_eia.ipynb similarity index 100% rename from data-preparation/notebooks/processed/ab_process_eia.ipynb rename to data-preparation/src/sources/eia/ab_process_eia.ipynb diff --git a/data-preparation/data/_processed/eia_product_name_id.csv b/data-preparation/src/sources/eia/data/processed/eia_product_name_id.csv similarity index 100% rename from data-preparation/data/_processed/eia_product_name_id.csv rename to data-preparation/src/sources/eia/data/processed/eia_product_name_id.csv diff --git a/data-preparation/src/sdp_data/data/_raw/eia/eia_api_ieo_world.csv b/data-preparation/src/sources/eia/data/raw/eia_api_ieo_world.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/eia/eia_api_ieo_world.csv rename to data-preparation/src/sources/eia/data/raw/eia_api_ieo_world.csv diff --git a/data-preparation/src/sdp_data/data/_raw/eia/eia_api_intl_region.csv b/data-preparation/src/sources/eia/data/raw/eia_api_intl_region.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/eia/eia_api_intl_region.csv rename to data-preparation/src/sources/eia/data/raw/eia_api_intl_region.csv diff --git a/data-preparation/src/sdp_data/transformation/eia.py b/data-preparation/src/sources/eia/eia.py similarity index 98% rename from data-preparation/src/sdp_data/transformation/eia.py rename to data-preparation/src/sources/eia/eia.py index 8f67912..357705f 100644 --- a/data-preparation/src/sdp_data/transformation/eia.py +++ b/data-preparation/src/sources/eia/eia.py @@ -1,12 +1,14 @@ -from src.sdp_data.utils.iso3166 import countries -from src.sdp_data.utils.translation import CountryTranslatorFrenchToEnglish -from src.sdp_data.transformation.demographic.countries import StatisticsPerCountriesAndZonesJoiner -from src.sdp_data.utils.format import StatisticsDataframeFormatter -import requests import json -import pandas as pd import os +import pandas as pd +import requests +from src.source_aggregations.demographic.countries import \ + StatisticsPerCountriesAndZonesJoiner +from src.utils.format import StatisticsDataframeFormatter +from src.utils.iso3166 import countries +from src.utils.translation import CountryTranslatorFrenchToEnglish + class EiaScrapper: # TODO - à refactorer @@ -311,7 +313,7 @@ def compute_nuclear_share_in_electricity(self): df_nuclear_electricity_share = StatisticsDataframeFormatter().select_and_sort_values( df_nuclear_electricity_share, "nuclear_share_of_electricity_generation", round_statistics=4) return df_nuclear_electricity_share - + def compute_electricity_by_energy_family(self): print("-- compute electricity per energy family for each country") assert self.df_electricity_by_energy_family is not None diff --git a/data-preparation/src/sdp_data/transformation/fossil_import_export.py b/data-preparation/src/sources/eia/fossil_import_export.py similarity index 98% rename from data-preparation/src/sdp_data/transformation/fossil_import_export.py rename to data-preparation/src/sources/eia/fossil_import_export.py index a64339e..9984cee 100644 --- a/data-preparation/src/sdp_data/transformation/fossil_import_export.py +++ b/data-preparation/src/sources/eia/fossil_import_export.py @@ -1,10 +1,11 @@ -import pandas as pd -import numpy as np import sys +import numpy as np +import pandas as pd + sys.path.insert(0, r'C:\Users\HP\Desktop\shiftdataportal_data') -from src.sdp_data.utils.translation import CountryTranslatorFrenchToEnglish -from src.sdp_data.utils.utils import diff_evaluation +from src.utils.translation import CountryTranslatorFrenchToEnglish +from src.utils.utils import diff_evaluation # Class to update recent data diff --git a/data-preparation/src/sdp_data/sources/raw_eia.py b/data-preparation/src/sources/eia/raw_eia.py similarity index 100% rename from data-preparation/src/sdp_data/sources/raw_eia.py rename to data-preparation/src/sources/eia/raw_eia.py diff --git a/data-preparation/data/_processed/ember/ember_electricity_demand.csv b/data-preparation/src/sources/ember/data/processed/ember_electricity_demand.csv similarity index 100% rename from data-preparation/data/_processed/ember/ember_electricity_demand.csv rename to data-preparation/src/sources/ember/data/processed/ember_electricity_demand.csv diff --git a/data-preparation/data/_processed/ember/ember_electricity_generation_capacity.csv b/data-preparation/src/sources/ember/data/processed/ember_electricity_generation_capacity.csv similarity index 100% rename from data-preparation/data/_processed/ember/ember_electricity_generation_capacity.csv rename to data-preparation/src/sources/ember/data/processed/ember_electricity_generation_capacity.csv diff --git a/data-preparation/data/_processed/ember/ember_electricity_imports.csv b/data-preparation/src/sources/ember/data/processed/ember_electricity_imports.csv similarity index 100% rename from data-preparation/data/_processed/ember/ember_electricity_imports.csv rename to data-preparation/src/sources/ember/data/processed/ember_electricity_imports.csv diff --git a/data-preparation/data/_processed/ember/ember_power_secor_emmissions.csv b/data-preparation/src/sources/ember/data/processed/ember_power_secor_emmissions.csv similarity index 100% rename from data-preparation/data/_processed/ember/ember_power_secor_emmissions.csv rename to data-preparation/src/sources/ember/data/processed/ember_power_secor_emmissions.csv diff --git a/data-preparation/src/sdp_data/data/_raw/ember/ember_file_elec_all_month.csv b/data-preparation/src/sources/ember/data/raw/ember_file_elec_all_month.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/ember/ember_file_elec_all_month.csv rename to data-preparation/src/sources/ember/data/raw/ember_file_elec_all_month.csv diff --git a/data-preparation/src/sdp_data/data/_raw/ember/ember_file_elec_all_year.csv b/data-preparation/src/sources/ember/data/raw/ember_file_elec_all_year.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/ember/ember_file_elec_all_year.csv rename to data-preparation/src/sources/ember/data/raw/ember_file_elec_all_year.csv diff --git a/data-preparation/src/sdp_data/data/_raw/ember/ember_file_elec_review_europe_month.csv b/data-preparation/src/sources/ember/data/raw/ember_file_elec_review_europe_month.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/ember/ember_file_elec_review_europe_month.csv rename to data-preparation/src/sources/ember/data/raw/ember_file_elec_review_europe_month.csv diff --git a/data-preparation/src/sdp_data/data/_raw/ember/ember_file_elec_review_europe_year.csv b/data-preparation/src/sources/ember/data/raw/ember_file_elec_review_europe_year.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/ember/ember_file_elec_review_europe_year.csv rename to data-preparation/src/sources/ember/data/raw/ember_file_elec_review_europe_year.csv diff --git a/data-preparation/src/sdp_data/data/_raw/ember/ember_file_elec_review_global_month.csv b/data-preparation/src/sources/ember/data/raw/ember_file_elec_review_global_month.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/ember/ember_file_elec_review_global_month.csv rename to data-preparation/src/sources/ember/data/raw/ember_file_elec_review_global_month.csv diff --git a/data-preparation/src/sdp_data/data/_raw/ember/ember_file_elec_review_global_year.csv b/data-preparation/src/sources/ember/data/raw/ember_file_elec_review_global_year.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/ember/ember_file_elec_review_global_year.csv rename to data-preparation/src/sources/ember/data/raw/ember_file_elec_review_global_year.csv diff --git a/data-preparation/notebooks/processed/ember/ember.ipynb b/data-preparation/src/sources/ember/ember.ipynb similarity index 100% rename from data-preparation/notebooks/processed/ember/ember.ipynb rename to data-preparation/src/sources/ember/ember.ipynb diff --git a/data-preparation/src/sdp_data/sources/raw_ember.py b/data-preparation/src/sources/ember/raw_ember.py similarity index 100% rename from data-preparation/src/sdp_data/sources/raw_ember.py rename to data-preparation/src/sources/ember/raw_ember.py diff --git a/data-preparation/notebooks/raw/population/20230501_population_gapminder.ipynb b/data-preparation/src/sources/gapminder/20230501_population_gapminder.ipynb similarity index 100% rename from data-preparation/notebooks/raw/population/20230501_population_gapminder.ipynb rename to data-preparation/src/sources/gapminder/20230501_population_gapminder.ipynb diff --git a/data-preparation/notebooks/processed/population_gapminder.csv b/data-preparation/src/sources/gapminder/data/processed/population_gapminder.csv similarity index 100% rename from data-preparation/notebooks/processed/population_gapminder.csv rename to data-preparation/src/sources/gapminder/data/processed/population_gapminder.csv diff --git a/data-preparation/src/sdp_data/transformation/demographic/population.py b/data-preparation/src/sources/gapminder/population.py similarity index 96% rename from data-preparation/src/sdp_data/transformation/demographic/population.py rename to data-preparation/src/sources/gapminder/population.py index 44c06a3..14f3bed 100644 --- a/data-preparation/src/sdp_data/transformation/demographic/population.py +++ b/data-preparation/src/sources/gapminder/population.py @@ -1,11 +1,13 @@ import pandas as pd -from src.sdp_data.utils.translation import CountryTranslatorFrenchToEnglish -from src.sdp_data.transformation.demographic.countries import StatisticsPerCountriesAndZonesJoiner +from src.source_aggregations.demographic.countries import \ + StatisticsPerCountriesAndZonesJoiner +from src.utils.translation import CountryTranslatorFrenchToEnglish + # TODO - à revoir """ -> Revue des valeurs manquantes "zone supprimées" pour PopulationCleaner. -> ajouter des tests unitaires. --> mutuliser le code entre GapMinderCleaner et +-> mutuliser le code entre GapMinderCleaner et """ diff --git a/data-preparation/data/_processed/iea/iea_final_energy.csv b/data-preparation/src/sources/iea/data/processed/iea_final_energy.csv similarity index 100% rename from data-preparation/data/_processed/iea/iea_final_energy.csv rename to data-preparation/src/sources/iea/data/processed/iea_final_energy.csv diff --git a/data-preparation/src/sdp_data/data/_raw/iea/iea_api_eei.csv b/data-preparation/src/sources/iea/data/raw/iea_api_eei.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/iea/iea_api_eei.csv rename to data-preparation/src/sources/iea/data/raw/iea_api_eei.csv diff --git a/data-preparation/notebooks/raw/iea/iea_weo.csv b/data-preparation/src/sources/iea/data/raw/iea_weo.csv similarity index 100% rename from data-preparation/notebooks/raw/iea/iea_weo.csv rename to data-preparation/src/sources/iea/data/raw/iea_weo.csv diff --git a/data-preparation/notebooks/processed/iea/iea.ipynb b/data-preparation/src/sources/iea/iea.ipynb similarity index 100% rename from data-preparation/notebooks/processed/iea/iea.ipynb rename to data-preparation/src/sources/iea/iea.ipynb diff --git a/data-preparation/src/sdp_data/transformation/iea.py b/data-preparation/src/sources/iea/iea.py similarity index 98% rename from data-preparation/src/sdp_data/transformation/iea.py rename to data-preparation/src/sources/iea/iea.py index d043dd6..dd0cf5d 100644 --- a/data-preparation/src/sdp_data/transformation/iea.py +++ b/data-preparation/src/sources/iea/iea.py @@ -1,12 +1,14 @@ -from sdp_data.utils.iso3166 import countries -from sdp_data.utils.translation import CountryTranslatorFrenchToEnglish -from sdp_data.transformation.demographic.countries import StatisticsPerCountriesAndZonesJoiner -from sdp_data.utils.format import StatisticsDataframeFormatter -import requests import json -import pandas as pd import os +import pandas as pd +import requests +from src.source_aggregations.demographic.countries import \ + StatisticsPerCountriesAndZonesJoiner +from src.utils.format import StatisticsDataframeFormatter +from src.utils.iso3166 import countries +from src.utils.translation import CountryTranslatorFrenchToEnglish + class EiaScrapper: # TODO - à refactorer diff --git a/data-preparation/notebooks/processed/iea/iea_process_final_energy.ipynb b/data-preparation/src/sources/iea/iea_process_final_energy.ipynb similarity index 100% rename from data-preparation/notebooks/processed/iea/iea_process_final_energy.ipynb rename to data-preparation/src/sources/iea/iea_process_final_energy.ipynb diff --git a/data-preparation/src/sdp_data/sources/raw_iea.py b/data-preparation/src/sources/iea/raw_iea.py similarity index 100% rename from data-preparation/src/sdp_data/sources/raw_iea.py rename to data-preparation/src/sources/iea/raw_iea.py diff --git a/data-preparation/notebooks/raw/PROCESS_RAW.ipynb b/data-preparation/src/sources/info/PROCESS_RAW.ipynb similarity index 100% rename from data-preparation/notebooks/raw/PROCESS_RAW.ipynb rename to data-preparation/src/sources/info/PROCESS_RAW.ipynb diff --git a/data-preparation/data/_info/__INFO_M49_REGION.csv b/data-preparation/src/sources/info/data/__INFO_M49_REGION.csv similarity index 100% rename from data-preparation/data/_info/__INFO_M49_REGION.csv rename to data-preparation/src/sources/info/data/__INFO_M49_REGION.csv diff --git a/data-preparation/data/_info/__INFO_UN_ISIC.csv b/data-preparation/src/sources/info/data/__INFO_UN_ISIC.csv similarity index 100% rename from data-preparation/data/_info/__INFO_UN_ISIC.csv rename to data-preparation/src/sources/info/data/__INFO_UN_ISIC.csv diff --git a/data-preparation/data/_info/__INFO_UN_ISIC_en.csv b/data-preparation/src/sources/info/data/__INFO_UN_ISIC_en.csv similarity index 100% rename from data-preparation/data/_info/__INFO_UN_ISIC_en.csv rename to data-preparation/src/sources/info/data/__INFO_UN_ISIC_en.csv diff --git a/data-preparation/data/_info/__INFO_UN_ISIC_es.csv b/data-preparation/src/sources/info/data/__INFO_UN_ISIC_es.csv similarity index 100% rename from data-preparation/data/_info/__INFO_UN_ISIC_es.csv rename to data-preparation/src/sources/info/data/__INFO_UN_ISIC_es.csv diff --git a/data-preparation/data/_info/__INFO_UN_ISIC_fr.csv b/data-preparation/src/sources/info/data/__INFO_UN_ISIC_fr.csv similarity index 100% rename from data-preparation/data/_info/__INFO_UN_ISIC_fr.csv rename to data-preparation/src/sources/info/data/__INFO_UN_ISIC_fr.csv diff --git a/data-preparation/data/_info/__INFO_UN_M49_en.csv b/data-preparation/src/sources/info/data/__INFO_UN_M49_en.csv similarity index 100% rename from data-preparation/data/_info/__INFO_UN_M49_en.csv rename to data-preparation/src/sources/info/data/__INFO_UN_M49_en.csv diff --git a/data-preparation/data/_info/__INFO_UN_M49_es.csv b/data-preparation/src/sources/info/data/__INFO_UN_M49_es.csv similarity index 100% rename from data-preparation/data/_info/__INFO_UN_M49_es.csv rename to data-preparation/src/sources/info/data/__INFO_UN_M49_es.csv diff --git a/data-preparation/data/_info/__INFO_UN_M49_fr.csv b/data-preparation/src/sources/info/data/__INFO_UN_M49_fr.csv similarity index 100% rename from data-preparation/data/_info/__INFO_UN_M49_fr.csv rename to data-preparation/src/sources/info/data/__INFO_UN_M49_fr.csv diff --git a/data-preparation/notebooks/processed/owid_catalog.csv b/data-preparation/src/sources/owid/data/processed/owid_catalog.csv similarity index 100% rename from data-preparation/notebooks/processed/owid_catalog.csv rename to data-preparation/src/sources/owid/data/processed/owid_catalog.csv diff --git a/data-preparation/notebooks/raw/owid/files/owid_kaya-identity-co2.csv b/data-preparation/src/sources/owid/data/raw/owid_kaya-identity-co2.csv similarity index 100% rename from data-preparation/notebooks/raw/owid/files/owid_kaya-identity-co2.csv rename to data-preparation/src/sources/owid/data/raw/owid_kaya-identity-co2.csv diff --git a/data-preparation/notebooks/raw/owid/owid_catalog.ipynb b/data-preparation/src/sources/owid/owid_catalog.ipynb similarity index 100% rename from data-preparation/notebooks/raw/owid/owid_catalog.ipynb rename to data-preparation/src/sources/owid/owid_catalog.ipynb diff --git a/data-preparation/notebooks/raw/owid/owid_kaya.ipynb b/data-preparation/src/sources/owid/owid_kaya.ipynb similarity index 100% rename from data-preparation/notebooks/raw/owid/owid_kaya.ipynb rename to data-preparation/src/sources/owid/owid_kaya.ipynb diff --git a/data-preparation/notebooks/processed/gdp_worldbank.csv b/data-preparation/src/sources/worldbank/data/processed/gdp_worldbank.csv similarity index 100% rename from data-preparation/notebooks/processed/gdp_worldbank.csv rename to data-preparation/src/sources/worldbank/data/processed/gdp_worldbank.csv diff --git a/data-preparation/notebooks/processed/population_worldbank.csv b/data-preparation/src/sources/worldbank/data/processed/population_worldbank.csv similarity index 100% rename from data-preparation/notebooks/processed/population_worldbank.csv rename to data-preparation/src/sources/worldbank/data/processed/population_worldbank.csv diff --git a/data-preparation/data/_processed/processed_gdp_worldbank.csv b/data-preparation/src/sources/worldbank/data/processed/processed_gdp_worldbank.csv similarity index 100% rename from data-preparation/data/_processed/processed_gdp_worldbank.csv rename to data-preparation/src/sources/worldbank/data/processed/processed_gdp_worldbank.csv diff --git a/data-preparation/data/_processed/processed_population_worldbank.csv b/data-preparation/src/sources/worldbank/data/processed/processed_population_worldbank.csv similarity index 100% rename from data-preparation/data/_processed/processed_population_worldbank.csv rename to data-preparation/src/sources/worldbank/data/processed/processed_population_worldbank.csv diff --git a/data-preparation/src/sdp_data/data/_raw/wb/__info_wb_api_countries.csv b/data-preparation/src/sources/worldbank/data/raw/__info_wb_api_countries.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/wb/__info_wb_api_countries.csv rename to data-preparation/src/sources/worldbank/data/raw/__info_wb_api_countries.csv diff --git a/data-preparation/src/sdp_data/data/_raw/wb/__info_wb_api_indicators.csv b/data-preparation/src/sources/worldbank/data/raw/__info_wb_api_indicators.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/wb/__info_wb_api_indicators.csv rename to data-preparation/src/sources/worldbank/data/raw/__info_wb_api_indicators.csv diff --git a/data-preparation/src/sdp_data/data/_raw/wb/__info_wb_api_regions.csv b/data-preparation/src/sources/worldbank/data/raw/__info_wb_api_regions.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/wb/__info_wb_api_regions.csv rename to data-preparation/src/sources/worldbank/data/raw/__info_wb_api_regions.csv diff --git a/data-preparation/src/sdp_data/data/_raw/wb/__info_wb_api_sources.csv b/data-preparation/src/sources/worldbank/data/raw/__info_wb_api_sources.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/wb/__info_wb_api_sources.csv rename to data-preparation/src/sources/worldbank/data/raw/__info_wb_api_sources.csv diff --git a/data-preparation/src/sdp_data/data/_raw/wb/__info_wb_api_topics.csv b/data-preparation/src/sources/worldbank/data/raw/__info_wb_api_topics.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/wb/__info_wb_api_topics.csv rename to data-preparation/src/sources/worldbank/data/raw/__info_wb_api_topics.csv diff --git a/data-preparation/src/sdp_data/data/_raw/wb/wb_api_all_gdp.csv b/data-preparation/src/sources/worldbank/data/raw/wb_api_all_gdp.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/wb/wb_api_all_gdp.csv rename to data-preparation/src/sources/worldbank/data/raw/wb_api_all_gdp.csv diff --git a/data-preparation/src/sdp_data/data/_raw/wb/wb_api_all_ghge_sect.csv b/data-preparation/src/sources/worldbank/data/raw/wb_api_all_ghge_sect.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/wb/wb_api_all_ghge_sect.csv rename to data-preparation/src/sources/worldbank/data/raw/wb_api_all_ghge_sect.csv diff --git a/data-preparation/src/sdp_data/data/_raw/wb/wb_api_all_pop.csv b/data-preparation/src/sources/worldbank/data/raw/wb_api_all_pop.csv similarity index 100% rename from data-preparation/src/sdp_data/data/_raw/wb/wb_api_all_pop.csv rename to data-preparation/src/sources/worldbank/data/raw/wb_api_all_pop.csv diff --git a/data-preparation/notebooks/processed/process_WB_population.ipynb b/data-preparation/src/sources/worldbank/process_WB_population.ipynb similarity index 100% rename from data-preparation/notebooks/processed/process_WB_population.ipynb rename to data-preparation/src/sources/worldbank/process_WB_population.ipynb diff --git a/data-preparation/notebooks/processed/process_wb_gdp.ipynb b/data-preparation/src/sources/worldbank/process_wb_gdp.ipynb similarity index 100% rename from data-preparation/notebooks/processed/process_wb_gdp.ipynb rename to data-preparation/src/sources/worldbank/process_wb_gdp.ipynb diff --git a/data-preparation/src/sdp_data/sources/raw_wb.py b/data-preparation/src/sources/worldbank/raw_wb.py similarity index 100% rename from data-preparation/src/sdp_data/sources/raw_wb.py rename to data-preparation/src/sources/worldbank/raw_wb.py diff --git a/data-preparation/src/sdp_data/transformation/demographic/worldbank_scrap.py b/data-preparation/src/sources/worldbank/worldbank_scrap.py similarity index 100% rename from data-preparation/src/sdp_data/transformation/demographic/worldbank_scrap.py rename to data-preparation/src/sources/worldbank/worldbank_scrap.py diff --git a/data-preparation/src/sdp_data/transformation/__init__.py b/data-preparation/src/test/__init__.py similarity index 100% rename from data-preparation/src/sdp_data/transformation/__init__.py rename to data-preparation/src/test/__init__.py diff --git a/data-preparation/src/sdp_data/transformation/demographic/__init__.py b/data-preparation/src/test/test_utils/__init__.py similarity index 100% rename from data-preparation/src/sdp_data/transformation/demographic/__init__.py rename to data-preparation/src/test/test_utils/__init__.py diff --git a/data-preparation/src/sdp_data/test/test_utils/test_translation.py b/data-preparation/src/test/test_utils/test_translation.py similarity index 94% rename from data-preparation/src/sdp_data/test/test_utils/test_translation.py rename to data-preparation/src/test/test_utils/test_translation.py index 4ec48a3..ebc0e1c 100644 --- a/data-preparation/src/sdp_data/test/test_utils/test_translation.py +++ b/data-preparation/src/test/test_utils/test_translation.py @@ -1,7 +1,8 @@ -import pandas as pd import unittest -from sdp_data.utils.translation import CountryTranslatorFrenchToEnglish + import numpy as np +import pandas as pd +from src.utils.translation import CountryTranslatorFrenchToEnglish class TestCountryTranslatorFrenchToEnglish(unittest.TestCase): diff --git a/data-preparation/src/sdp_data/transformation/ghg/__init__.py b/data-preparation/src/utils/__init__.py similarity index 100% rename from data-preparation/src/sdp_data/transformation/ghg/__init__.py rename to data-preparation/src/utils/__init__.py diff --git a/data-preparation/src/sdp_data/utils/christophe.py b/data-preparation/src/utils/christophe.py similarity index 100% rename from data-preparation/src/sdp_data/utils/christophe.py rename to data-preparation/src/utils/christophe.py diff --git a/data-preparation/src/sdp_data/utils/compare.py b/data-preparation/src/utils/compare.py similarity index 100% rename from data-preparation/src/sdp_data/utils/compare.py rename to data-preparation/src/utils/compare.py diff --git a/data-preparation/src/sdp_data/utils/download.py b/data-preparation/src/utils/download.py similarity index 100% rename from data-preparation/src/sdp_data/utils/download.py rename to data-preparation/src/utils/download.py diff --git a/data-preparation/src/sdp_data/utils/format.py b/data-preparation/src/utils/format.py similarity index 100% rename from data-preparation/src/sdp_data/utils/format.py rename to data-preparation/src/utils/format.py diff --git a/data-preparation/src/sdp_data/utils/iso3166.py b/data-preparation/src/utils/iso3166.py similarity index 100% rename from data-preparation/src/sdp_data/utils/iso3166.py rename to data-preparation/src/utils/iso3166.py diff --git a/data-preparation/src/sdp_data/utils/translation.py b/data-preparation/src/utils/translation.py similarity index 100% rename from data-preparation/src/sdp_data/utils/translation.py rename to data-preparation/src/utils/translation.py diff --git a/data-preparation/src/sdp_data/utils/utils.py b/data-preparation/src/utils/utils.py similarity index 98% rename from data-preparation/src/sdp_data/utils/utils.py rename to data-preparation/src/utils/utils.py index e088adf..743beb7 100644 --- a/data-preparation/src/sdp_data/utils/utils.py +++ b/data-preparation/src/utils/utils.py @@ -1,4 +1,4 @@ -from src.sdp_data.utils.translation import CountryTranslatorFrenchToEnglish +from src.utils.translation import CountryTranslatorFrenchToEnglish def subtract(series1, series2): diff --git a/data-preparation/src/sdp_data/utils/zip.py b/data-preparation/src/utils/zip.py similarity index 100% rename from data-preparation/src/sdp_data/utils/zip.py rename to data-preparation/src/utils/zip.py