diff --git a/README.md b/README.md
index 9b99762..72c063f 100644
--- a/README.md
+++ b/README.md
@@ -18,12 +18,14 @@ The remainder of the charts in the response can be produced from code in the rep
   - Install packages listed in `requirements.txt`
 - Activate conda environment: `conda activate asf_welsh_energy_consultation`
 - Run `make inputs-pull` to pull the zipped static data from S3 and put it in `/inputs`
-
 - Run `python asf_welsh_energy_consultation/analysis/produce_plots.py --local_data_dir <YOUR_LOCAL_DIR>`. You need to specify the path to the local
   directory where your local copy of the EPC data is/will be saved by replacing `<YOUR_LOCAL_DIR>` with the path to your "ASF_data" directory or equivalent.
-  If you don't have a local directory for ASF core data, you can create a folder called "ASF_data" in your home directory. You can specify which
-  batch of EPC data to download and MCS data to load from S3 by passing the `--epc_batch` and `--mcs_batch` arguments, both
-  default to downloading/loading the newest data from S3, respectively. Run `python asf_welsh_energy_consultation/analysis/produce_plots.py -h` for more info.
+  If you don't have a local directory for ASF core data, you can create a folder called "ASF_data" in your home directory.
+
+- You can specify which batch of EPC data to download and MCS data to load from S3 by passing the `--epc_batch` and `--mcs_batch` arguments, both
+  default to downloading/loading the newest data from S3, respectively. You can also specify which set of supplementary data should be used by passing
+  the `--supp_data` argument followed by the name of the directory, e.g. data_202310. See the `Historical analyses` section below to see which version was used for each analysis.
+  Run `python asf_welsh_energy_consultation/analysis/produce_plots.py -h` for more info.
 
 The script should generate the following six plots which will be saved in your local repo in `outputs/figures`:
 
@@ -34,6 +36,16 @@ The script should generate the following six plots which will be saved in your l
 - `new_build_hp_cumulative.html`
 - `new_build_hp_proportion.html`
 
+It should generate a further 10 plots, five in English and five in Welsh, saved in `outputs/figures/english` and `outputs/figures/welsh`, respectively:
+
+- `age_prop[_welsh].png`
+- `epc_all[_welsh].html`
+- `epc_hp_private_retrofit[_welsh].html`
+- `epc_hp_private[_welsh].html`
+- `hp_tenure[_welsh].html`
+
+An additional figure, `hp_map.html`, should be saved in `outputs/figures/english`.
+
 ## Skeleton folder structure
 
 ```
@@ -57,8 +69,18 @@ outputs/
 
 Versions/batches of data used for previous analysis are listed below.
 
+October 2023 analysis:
+
+- Supplementary data: data_202310
+- EPC: 2023_Q2_complete (preprocessed)
+- mcs_installations_231009.csv
+- mcs_installations_epc_full_231009.csv
+- off-gas-live-postcodes-2022.xlsx - check [here](https://www.xoserve.com/a-to-z/) for updates
+- rurality.ods - 2011 Rural Urban Classification for small area geographies, see [here](https://www.ons.gov.uk/methodology/geography/geographicalproducts/ruralurbanclassifications)
+
 April 2023 analysis:
 
+- Supplementary data: data_202304
 - EPC: 2022_Q4_complete (preprocessed)
 - mcs_installations_230315.csv
 - mcs_installations_epc_full_230315.csv
diff --git a/asf_welsh_energy_consultation/__init__.py b/asf_welsh_energy_consultation/__init__.py
index 811053a..1f5e26a 100644
--- a/asf_welsh_energy_consultation/__init__.py
+++ b/asf_welsh_energy_consultation/__init__.py
@@ -32,4 +32,4 @@ def get_yaml_config(file_path: Path) -> Optional[dict]:
 
 # base/global config
 _base_config_path = Path(__file__).parent.resolve() / "config/base.yaml"
-config = get_yaml_config(_base_config_path)
+config_file = get_yaml_config(_base_config_path)
diff --git a/asf_welsh_energy_consultation/analysis/produce_plots.py b/asf_welsh_energy_consultation/analysis/produce_plots.py
index f2843d3..20e35e4 100644
--- a/asf_welsh_energy_consultation/analysis/produce_plots.py
+++ b/asf_welsh_energy_consultation/analysis/produce_plots.py
@@ -4,79 +4,43 @@
 """
 
 import altair as alt
+import os
 
+from asf_welsh_energy_consultation import config_file
 from asf_welsh_energy_consultation.getters.get_data import get_electric_tenure
-from asf_welsh_energy_consultation.pipeline.process_data import *
-from asf_welsh_energy_consultation.utils.formatting import format_number
+from asf_welsh_energy_consultation.getters.get_data import (
+    load_wales_df,
+    load_wales_hp,
+    pc_to_coords_df,
+)
+from asf_welsh_energy_consultation.pipeline import process_data
+from asf_core_data.getters.data_getters import logger
+from asf_welsh_energy_consultation.pipeline.plotting import (
+    proportions_bar_chart,
+    age_prop_chart,
+    time_series_comparison,
+    plot_kepler_graph,
+)
 
 from nesta_ds_utils.viz.altair.formatting import setup_theme
 
 alt.data_transformers.disable_max_rows()
 setup_theme()
 
-
 output_folder = "outputs/figures/"
+time_series_min = config_file["plots"]["time_series_min_default"]
 
 if not os.path.isdir(output_folder):
     os.makedirs(output_folder)
 
 
-def time_series_comparison(
-    data,
-    title,
-    y_var,
-    y_title,
-    color_var,
-    x_var="date:T",
-    x_title="Date",
-    domain_min="2015-01-01",
-    domain_max="2023-01-01",
-    width=600,
-    height=300,
-):
-    """Generic function for plotting a line chart by category (represented by color_var).
-
-    Args:
-        data (pd.DataFrame): Base data. Needs to be structured as a column of consecutive dates,
-            a column indicating categories and a column with cumulative values.
-        title (str/list): Chart title.
-        y_var (str): y variable.
-        y_title (str): y axis title.
-        color_var (str): Variable to split by.
-        x_var (str, optional): x variable. Defaults to "date:T".
-        x_title (str, optional): x axis title. Defaults to "Date".
-        domain_min (str, optional): x axis minimum. Defaults to "2015-01-01".
-        domain_max (str, optional): x axis maximum. Defaults to "2023-01-01".
-        width (int, optional): Chart width. Defaults to 600.
-        height (int, optional): Chart height. Defaults to 300.
-
-    Returns:
-        alt.Chart: Base altair chart.
-    """
-    chart = (
-        alt.Chart(
-            data,
-            title=title,
-        )
-        .mark_line()
-        .encode(
-            x=alt.X(
-                x_var, title=x_title, scale=alt.Scale(domain=[domain_min, domain_max])
-            ),
-            y=alt.Y(y_var, title=y_title),
-            color=color_var,
-        )
-        .properties(width=width, height=height)
-    )
-
-    return chart
-
-
 if __name__ == "__main__":
     # ======================================================
     # MCS installations, by off-gas status
 
-    installations_by_gas_status = cumsums_by_variable("off_gas", "Gas status")
+    installations_by_gas_status = process_data.cumsums_by_variable(
+        "off_gas", "Gas status"
+    )
 
     installations_by_gas_status_chart = time_series_comparison(
         data=installations_by_gas_status,
@@ -96,7 +60,9 @@ def time_series_comparison(
     # ======================================================
     # MCS installations, by rurality
 
-    installations_by_rurality = cumsums_by_variable("rurality_2_label", "Rurality")
+    installations_by_rurality = process_data.cumsums_by_variable(
+        "rurality_2_label", "Rurality"
+    )
 
     installations_by_rurality_chart = time_series_comparison(
         data=installations_by_rurality,
@@ -107,6 +73,7 @@ def time_series_comparison(
         y_var="Number of heat pumps:Q",
         y_title="Number of heat pump installations",
         color_var="Rurality:N",
+        domain_max=installations_by_rurality.date.max(),
     )
 
     installations_by_rurality_chart.save(
@@ -116,7 +83,7 @@ def time_series_comparison(
     # ======================================================
     # Proportions of new builds that have heat pumps
 
-    new_build_hp_proportion = get_new_hp_counts()
+    new_build_hp_proportion = process_data.get_new_hp_counts()
 
     new_build_hp_proportion_chart = (
         alt.Chart(
@@ -129,7 +96,7 @@ def time_series_comparison(
                 # domain ensures good margin at left/right of chart
                 "year",
                 title="Year",
-                scale=alt.Scale(domain=["2007-07-01", "2022-06-01"]),
+                scale=alt.Scale(domain=["2007-07-01", "2023-01-01"]),
             ),
             y=alt.Y("sum(value)", title="Number of EPCs"),
             # want heat pumps to be at the bottom of each bar - hacky but works
@@ -144,7 +111,7 @@ def time_series_comparison(
     # ======================================================
     # Cumulative number of new builds with heat pumps
 
-    new_build_hp_cumulative = get_new_hp_cumsums()
+    new_build_hp_cumulative = process_data.get_new_hp_cumsums()
 
     new_build_hp_cumulative_chart = (
         alt.Chart(
@@ -167,8 +134,8 @@ def time_series_comparison(
     # ======================================================
     # Cumulative MCS retrofits
 
-    ret = get_mcs_retrofits()
-    ret_cumsums = cumsums_by_variable("country", "wales_col", data=ret)
+    ret = process_data.get_mcs_retrofits()
+    ret_cumsums = process_data.cumsums_by_variable("country", "wales_col", data=ret)
     # this function works without separating by category - 'wales_col' is a whole column of "Wales" (not used)
 
     cumulative_retrofits_chart = (
@@ -181,7 +148,7 @@ def time_series_comparison(
             x=alt.X(
                 "date",
                 title="Date",
-                scale=alt.Scale(domain=["2015-01-01", "2023-01-01"]),
+                scale=alt.Scale(domain=[time_series_min, ret_cumsums.date.max()]),
             ),
             y="Number of heat pumps",
         )
@@ -200,7 +167,7 @@ def time_series_comparison(
         alt.Chart(
             electric_tenure,
             title="Fig. 2: Properties in Wales with only electric heating, split by tenure (N = "
-            + format_number(N)
+            + "{:,}".format(N)
             + ")",
         )
         .mark_bar()
@@ -213,3 +180,208 @@ def time_series_comparison(
     ).configure_title(fontSize=20)
 
     electric_tenure_chart.save(output_folder + "electric_tenure.html")
+
+    # ======================================================
+    # Original plots and stats
+
+    wales_df = load_wales_df(from_csv=False)
+    wales_hp = load_wales_hp(wales_df)
+
+    # English plots
+
+    # Key statistics
+    print("Number of heat pumps:", len(wales_hp))
+    print("Number of properties in EPC:", len(wales_df))
+    print(
+        "Estimated percentage of properties with a heat pump:",
+        "{:.2%}".format(len(wales_hp) / len(wales_df)),
+    )
+    print(wales_hp.TENURE.value_counts(normalize=True))
+
+    epc_c_or_above_and_good_walls = wales_df.loc[
+        wales_df["CURRENT_ENERGY_RATING"].isin(["A", "B", "C"])
+        & wales_df["WALLS_ENERGY_EFF"].isin(["Good", "Very Good"])
+    ]
+
+    epc_c_or_above_and_good_walls_and_roof = epc_c_or_above_and_good_walls.loc[
+        epc_c_or_above_and_good_walls["ROOF_ENERGY_EFF"].isin(["Good", "Very Good"])
+    ]
+
+    print(
+        "Number of EPC C+ properties with good or very good wall insulation:",
+        len(epc_c_or_above_and_good_walls),
+    )
+    print(
+        "As a proportion of properties in EPC:",
+        len(epc_c_or_above_and_good_walls) / len(wales_df),
+    )
+
+    print(
+        "\nNumber of EPC C+ properties with good or very good wall and roof insulation:",
+        len(epc_c_or_above_and_good_walls_and_roof),
+    )
+    print(
+        "As a proportion of properties in EPC:",
+        len(epc_c_or_above_and_good_walls_and_roof) / len(wales_df),
+    )
+
+    # Tenure of Welsh HPs
+    proportions_bar_chart(
+        wales_hp,
+        "TENURE",
+        "Fig. 3: Tenure of Welsh properties with heat pumps",
+        "Tenure",
+        "Percentage of properties",
+        filename="hp_tenure",
+        x_type="tenure",
+        expand_y=True,
+    )
+
+    # EPC, all
+    unknown_vals = len(wales_df.loc[wales_df.CURRENT_ENERGY_RATING == "unknown"])
+    if unknown_vals > 0:
+        logger.warning(
+            f"{unknown_vals} properties with unknown EPC ratings. These records will be removed from the count."
+        )
+    proportions_bar_chart(
+        # only one unknown EPC property so fine to just remove it
+        wales_df.loc[wales_df.CURRENT_ENERGY_RATING != "unknown"],
+        "CURRENT_ENERGY_RATING",
+        "Fig. 5: EPC ratings of all Welsh properties",
+        "Energy efficiency rating",
+        "Percentage of properties",
+        filename="epc_all",
+        x_type="other",
+    )
+
+    # EPC, private sector with HPs
+    proportions_bar_chart(
+        wales_hp.loc[wales_hp.TENURE.isin(["Owner-occupied", "Privately rented"])],
+        "CURRENT_ENERGY_RATING",
+        [
+            "Fig. 6: EPC ratings of owner-occupied and privately rented",
+            "Welsh properties with heat pumps",
+        ],
+        "Energy efficiency rating",
+        "Percentage of properties",
+        filename="epc_hp_private",
+        x_type="other",
+    )
+
+    # EPCs, private sector with retrofitted HPs
+    proportions_bar_chart(
+        wales_hp.loc[
+            wales_hp.TENURE.isin(["Owner-occupied", "Privately rented"])
+            & (wales_hp.CONSTRUCTION_AGE_BAND != "2007 onwards")
+        ],
+        "CURRENT_ENERGY_RATING",
+        [
+            "Fig. 7: EPC ratings of owner-occupied and privately rented",
+            "Welsh properties with heat pumps, built pre-2007",
+        ],
+        "Energy efficiency rating",
+        "Percentage of properties",
+        filename="epc_hp_private_retrofit",
+        x_type="other",
+    )
+
+    age_data = process_data.generate_age_data(wales_df)
+    age_prop_chart(
+        age_data, "Fig. 9: Construction age bands and energy efficiencies", "age_prop"
+    )
+
+    ## Welsh plots
+
+    welsh_replacements = {
+        "TENURE": {
+            "Owner-occupied": "Perchen-feddiannaeth",
+            "Socially rented": "Rhentu cymdeithasol",
+            "Privately rented": "Rhentu preifat",
+            "Unknown": "Anhysbys",
+        },
+        "CONSTRUCTION_AGE_BAND": {
+            "England and Wales: before 1900": "Cyn 1900",
+            "Pre-1900": "Cyn 1900",
+            "2007 onwards": "2007 ymlaen",
+            "unknown": "Anhysbys",
+        },
+    }
+
+    for df in [wales_df, wales_hp, age_data]:
+        for col in ["TENURE", "CONSTRUCTION_AGE_BAND"]:
+            if col in df.columns:
+                df[col] = df[col].replace(welsh_replacements[col])
+
+    # Tenure of Welsh HPs
+    proportions_bar_chart(
+        wales_hp,
+        "TENURE",
+        "Ffig. 4: Deiliadaeth eiddo â phympiau gwres yng Nghymru",
+        "Deiliadaeth",
+        "Canran yr eiddo",
+        filename="hp_tenure_welsh",
+        x_type="tenure",
+        expand_y=True,
+        language="welsh",
+    )
+
+    # EPC, all
+    proportions_bar_chart(
+        wales_df.loc[wales_df.CURRENT_ENERGY_RATING != "unknown"],
+        "CURRENT_ENERGY_RATING",
+        "Ffig. 6: Sgoriau EPC holl eiddo Cymru",
+        "Sgôr effeithlonrwydd ynni",
+        "Canran yr eiddo",
+        filename="epc_all_welsh",
+        x_type="other",
+        language="welsh",
+    )
+
+    # EPC, private sector with HPs
+    proportions_bar_chart(
+        wales_hp.loc[wales_hp.TENURE.isin(["Perchen-feddiannaeth", "Rhentu preifat"])],
+        "CURRENT_ENERGY_RATING",
+        [
+            "Ffig. 7: Sgoriau EPC eiddo perchen-feddiannaeth a",
+            "rhentu preifat Cymru sydd â phympiau gwres",
+        ],
+        "Sgôr effeithlonrwydd ynni",
+        "Canran yr eiddo",
+        filename="epc_hp_private_welsh",
+        x_type="other",
+        language="welsh",
+    )
+
+    # EPCs, private sector with retrofitted HPs
+    proportions_bar_chart(
+        wales_hp.loc[
+            wales_hp.TENURE.isin(["Perchen-feddiannaeth", "Rhentu preifat"])
+            & (wales_hp.CONSTRUCTION_AGE_BAND != "2007 ymlaen")
+        ],
+        "CURRENT_ENERGY_RATING",
+        [
+            "Ffig. 8: Sgoriau EPC eiddo perchen-feddiannaeth a rhentu prifat",
+            "Cymru sydd â phympiau gwres, a adeiladwyd cyn 2007",
+        ],
+        "Sgôr effeithlonrwydd ynni",
+        "Canran yr eiddo",
+        filename="epc_hp_private_retrofit_welsh",
+        x_type="other",
+        language="welsh",
+    )
+
+    # Ages and EPC ratings
+    age_prop_chart(
+        age_data,
+        "Ffig. 9: Bandiau oedran adeiladu ac effeithlonrwydd ynni",
+        "age_prop_welsh",
+        language="welsh",
+    )
+
+    # Map of Welsh HPs
+    wales_df = load_wales_df(from_csv=False)
+    pc_df = pc_to_coords_df()
+
+    hp_hex_counts = process_data.generate_hex_counts(wales_df, pc_df)
+
+    plot_kepler_graph(hp_hex_counts, "hp_map")
diff --git a/asf_welsh_energy_consultation/config/base.yaml b/asf_welsh_energy_consultation/config/base.yaml
index 855fb1c..1dff292 100644
--- a/asf_welsh_energy_consultation/config/base.yaml
+++ b/asf_welsh_energy_consultation/config/base.yaml
@@ -1,3 +1,5 @@
 epc_data_config:
   epc_processing_version: "preprocessed"
   download_core_data_epc_version: "epc_preprocessed"
+plots:
+  time_series_min_default: "2015-01-01"
diff --git a/asf_welsh_energy_consultation/config/translation_config.py b/asf_welsh_energy_consultation/config/translation_config.py
new file mode 100644
index 0000000..d2d25ae
--- /dev/null
+++ b/asf_welsh_energy_consultation/config/translation_config.py
@@ -0,0 +1,26 @@
+quality_list = {
+    "english": ["Very Poor", "Poor", "Average", "Good", "Very Good"],
+    "welsh": ["Gwael Iawn", "Gwael", "Cymedrig", "Da", "Da Iawn"],
+}
+
+tenure_list = {
+    "english": ["Owner-occupied", "Socially rented", "Privately rented", "Unknown"],
+    "welsh": [
+        "Perchen-feddiannaeth",
+        "Rhentu cymdeithasol",
+        "Rhentu preifat",
+        "Anhysbys",
+    ],
+}
+
+energy_efficiency_text = {
+    "english": "Mean energy efficiency: ",
+    "welsh": "Effeithlonrwydd ynni cymedrig: ",
+}
+
+housing_stock_text = {
+    "english": "Percentage of Welsh housing stock",
+    "welsh": "Canran stoc tai Cymru",
+}
+
+age_band_text = {"english": "Age band", "welsh": "Band oedran"}
diff --git a/asf_welsh_energy_consultation/getters/get_data.py b/asf_welsh_energy_consultation/getters/get_data.py
index a7b77d4..f35f900 100644
--- a/asf_welsh_energy_consultation/getters/get_data.py
+++ b/asf_welsh_energy_consultation/getters/get_data.py
@@ -4,15 +4,16 @@
 """
 
 from asf_welsh_energy_consultation import PROJECT_DIR
-from asf_welsh_energy_consultation import config
+from asf_welsh_energy_consultation import config_file
 
 from asf_core_data import load_preprocessed_epc_data, get_mcs_installations
 from asf_core_data.getters.mcs_getters.get_mcs_installations import (
     get_processed_installations_data_by_batch,
 )
+
 from asf_core_data.getters.epc.data_batches import get_batch_path
 from asf_core_data.config import base_config
-from asf_core_data.getters.data_getters import download_core_data, logger
+from asf_core_data.getters.data_getters import download_core_data, logger, load_data
 
 import pandas as pd
 import numpy as np
@@ -20,18 +21,6 @@
 
 from argparse import ArgumentParser
 
-epc_processing_version = config["epc_data_config"]["epc_processing_version"]
-download_core_data_epc_version = config["epc_data_config"][
-    "download_core_data_epc_version"
-]
-
-postcode_path = "inputs/data/postcodes"
-regions_path = "inputs/data/regions.csv"
-off_gas_path = "inputs/data/off-gas-live-postcodes-2022.xlsx"
-oa_path = "inputs/data/postcode_to_output_area.csv"
-rurality_path = "inputs/data/rurality.ods"
-tenure_path = "inputs/data/tenure.csv"
-
 
 def create_argparser():
     """
@@ -51,6 +40,13 @@ def create_argparser():
         type=str,
     )
 
+    parser.add_argument(
+        "--supp_data",
+        help="Name of directory where supplementary data is stored",
+        default="newest",
+        type=str,
+    )
+
     parser.add_argument(
         "--epc_batch",
         help='Specifies which EPC data batch to use in the form `YYYY_[Quarter]_complete`. Defaults to "newest"',
@@ -78,10 +74,28 @@ def get_args():
     """
     parser = create_argparser()
 
-    return parser.parse_args()
+    args = parser.parse_args()
+
+    if args.supp_data == "newest":
+        subdirs = [subdir for subdir in os.listdir("inputs")]
+        args.supp_data = max(subdirs)
+
+    return args
 
 
 arguments = get_args()
+
+input_data_path = f"inputs/{arguments.supp_data}/"
+
+wales_epc_path = "wales_epc.csv"
+
+postcode_path = f"inputs/{arguments.supp_data}/postcodes"
+regions_path = f"inputs/{arguments.supp_data}/regions.csv"
+off_gas_path = f"inputs/{arguments.supp_data}/off-gas-live-postcodes-2022.xlsx"
+oa_path = f"inputs/{arguments.supp_data}/postcode_to_output_area.csv"
+rurality_path = f"inputs/{arguments.supp_data}/rurality.ods"
+tenure_path = f"inputs/{arguments.supp_data}/tenure.csv"
+
 LOCAL_DATA_DIR = arguments.local_data_dir
 
 
@@ -232,10 +246,9 @@ def get_rurality():
     return oa_rural
 
 
-def check_local_epc():
+def check_local_epc(epc_processing_version=None, download_core_data_epc_version=None):
     """
     Checks local directory for relevant EPC batch and downloads relevant EPC batch from S3 to local directory if not found.
-
     """
     epc_batch = arguments.epc_batch
 
@@ -269,20 +282,23 @@ def check_local_epc():
         )
 
 
-def get_wales_epc():
+def get_wales_processed_epc():
     """Get Welsh EPC data (processed but not deduplicated).
 
     Returns:
         pd.DataFrame: Welsh preprocessed EPC data.
     """
-    check_local_epc()
+    check_local_epc(
+        epc_processing_version="preprocessed",
+        download_core_data_epc_version="epc_preprocessed",
+    )
 
     epc_batch = arguments.epc_batch
 
     wales_epc = load_preprocessed_epc_data(
         data_path=LOCAL_DATA_DIR,
         usecols=None,
-        version=epc_processing_version,
+        version="preprocessed",
         subset="Wales",
         batch=epc_batch,
     )
@@ -351,3 +367,97 @@ def get_electric_tenure():
     )
 
     return data
+
+
+def load_wales_df(from_csv=True):
+    """Load preprocessed and deduplicated EPC dataset for Wales.
+    If data is loaded from all-GB file, the filtered version is saved to csv
+    for easier future loading.
+
+    Args:
+        from_csv (bool, optional): Whether to load from saved CSV. Defaults to True.
+
+    Returns:
+        pd.DataFrame: EPC data.
+    """
+    if from_csv:
+        wales_epc = pd.read_csv(wales_epc_path)
+    else:
+        check_local_epc(
+            epc_processing_version="preprocessed_and_deduplicated",
+            download_core_data_epc_version="epc_preprocessed_dedupl",
+        )
+        batch = arguments.epc_batch
+        wales_epc = load_preprocessed_epc_data(
+            data_path=LOCAL_DATA_DIR,
+            subset="Wales",
+            batch=batch,
+            version="preprocessed_dedupl",
+            usecols=[
+                "LMK_KEY",
+                "INSPECTION_DATE",
+                "UPRN",
+                "POSTCODE",
+                "CURRENT_ENERGY_EFFICIENCY",
+                "CURRENT_ENERGY_RATING",
+                "WALLS_ENERGY_EFF",
+                "FLOOR_ENERGY_EFF",
+                "ROOF_ENERGY_EFF",
+                "CONSTRUCTION_AGE_BAND",
+                "TENURE",
+                "TRANSACTION_TYPE",
+                "HP_INSTALLED",
+            ],
+        )
+
+        wales_epc.TENURE = wales_epc.TENURE.replace(
+            {
+                "owner-occupied": "Owner-occupied",
+                "rental (social)": "Socially rented",
+                "rental (private)": "Privately rented",
+                "unknown": "Unknown",
+            }
+        )
+        # if CONSTRUCTION_AGE_BAND is unknown and TRANSACTION_TYPE is new dwelling,
+        # assume construction age is >2007 because EPCs started in 2008
+        wales_epc["CONSTRUCTION_AGE_BAND"].loc[
+            (wales_epc.CONSTRUCTION_AGE_BAND == "unknown")
+            & (wales_epc.TRANSACTION_TYPE == "new dwelling")
+        ] = "2007 onwards"
+
+        if not os.path.isdir(input_data_path):
+            os.makedirs(input_data_path)
+
+        wales_epc.to_csv(input_data_path + wales_epc_path)
+
+    return wales_epc
+
+
+def load_wales_hp(wales_epc):
+    """Load Welsh EPC data filtered to properties with heat pumps.
+
+    Args:
+        wales_epc (pd.DataFrame): Wales EPC data.
+
+    Returns:
+        pd.DataFrame: EPC data filtered to properties with heat pumps.
+    """
+    wales_hp = wales_epc.loc[wales_epc.HP_INSTALLED].reset_index(drop=True)
+
+    return wales_hp
+
+
+def pc_to_coords_df():
+    pc_df = load_data(
+        data_path="S3",
+        file_path="inputs/supplementary_data/geospatial/ukpostcodes_to_coordindates.csv",
+    )
+    pc_df = pc_df.rename(
+        columns={
+            "postcode": "POSTCODE",
+            "latitude": "LATITUDE",
+            "longitude": "LONGITUDE",
+        }
+    )
+
+    return pc_df
diff --git a/asf_welsh_energy_consultation/pipeline/plotting.py b/asf_welsh_energy_consultation/pipeline/plotting.py
new file mode 100644
index 0000000..4b79be1
--- /dev/null
+++ b/asf_welsh_energy_consultation/pipeline/plotting.py
@@ -0,0 +1,295 @@
+# File: asf_welsh_energy_consultation/pipeline/plotting.py
+"""
+Defines plotting functions.
+"""
+
+import pandas as pd
+import altair as alt
+import matplotlib.pyplot as plt
+import matplotlib.ticker as mtick
+import os
+from keplergl import KeplerGl
+
+from asf_welsh_energy_consultation.config import translation_config
+from asf_welsh_energy_consultation import config_file
+from asf_core_data.getters.data_getters import logger
+from nesta_ds_utils.viz.altair.formatting import setup_theme
+
+from asf_welsh_energy_consultation.utils.utils import arial
+
+alt.themes.register("arial", arial)
+alt.themes.enable("arial")
+
+plt.rc("font", family="Arial")
+
+fig_output_path = {
+    "english": "outputs/figures/english/",
+    "welsh": "outputs/figures/welsh/",
+}
+
+for file_path in fig_output_path.values():
+    if not os.path.isdir(file_path):
+        os.makedirs(file_path)
+
+setup_theme()
+
+
+def time_series_comparison(
+    data,
+    title,
+    y_var,
+    y_title,
+    color_var,
+    x_var="date:T",
+    x_title="Date",
+    domain_min=None,
+    domain_max=None,
+    width=600,
+    height=300,
+):
+    """Generic function for plotting a line chart by category (represented by color_var).
+
+    Args:
+        data (pd.DataFrame): Base data. Needs to be structured as a column of consecutive dates,
+            a column indicating categories and a column with cumulative values.
+        title (str/list): Chart title.
+        y_var (str): y variable.
+        y_title (str): y-axis title.
+        color_var (str): Variable to split by.
+        x_var (str, optional): x variable. Defaults to "date:T".
+        x_title (str, optional): x-axis title. Defaults to "Date".
+        domain_min (str, optional): x-axis minimum. Defaults to "2015-01-01".
+        domain_max (str, optional): x-axis maximum. Defaults to max date of series.
+        width (int, optional): Chart width. Defaults to 600.
+        height (int, optional): Chart height. Defaults to 300.
+
+    Returns:
+        alt.Chart: Base altair chart.
+    """
+    if domain_min is None:
+        domain_min = config_file["plots"]["time_series_min_default"]
+        logger.info(f"Time series comparison using {domain_min} as min date")
+    if domain_max is None:
+        domain_max = data.date.max()
+        logger.info(f"Time series comparison using {domain_max} as max date")
+    chart = (
+        alt.Chart(
+            data,
+            title=title,
+        )
+        .mark_line()
+        .encode(
+            x=alt.X(
+                x_var, title=x_title, scale=alt.Scale(domain=[domain_min, domain_max])
+            ),
+            y=alt.Y(y_var, title=y_title),
+            color=color_var,
+        )
+        .properties(width=width, height=height)
+    )
+
+    return chart
+
+
+def proportions_bar_chart(
+    base_data,
+    field,
+    title,
+    x_label,
+    y_label,
+    filename,
+    expand_y=False,
+    x_type="good",
+    language="english",
+):
+    """Create a generic bar chart of proportions of properties in a given category.
+
+    Args:
+        base_data (pd.DataFrame): EPC data.
+        field (str): Feature name.
+        title (str): Chart title.
+        x_label (str): x axis label.
+        y_label (str): y axis label.
+        filename (str): Filename.
+        expand_y (bool, optional): Whether to extend the y axis beyond altair's default. Defaults to False.
+        x_type (str, optional): Type of x variable (to control formatting).
+            Can be "good" (insulation quality), "tenure", or otherwise assumed to be A-G energy efficiencies.
+            Defaults to "good".
+        language (str, optional): Language of chart text. Defaults to "english".
+    """
+    source = pd.DataFrame({"count": base_data[field].value_counts()}).reset_index()
+
+    if x_type == "good":
+        order = translation_config.quality_list[language]
+    elif x_type == "tenure":
+        order = translation_config.tenure_list[language]
+    else:
+        order = ["A", "B", "C", "D", "E", "F", "G"]
+
+    N_count = "{:,}".format(len(base_data))
+
+    chart = (
+        alt.Chart(source)
+        .transform_joinaggregate(
+            Total="sum(count)",
+        )
+        .transform_calculate(PercentOfTotal="datum.count / datum.Total")
+        .mark_bar()
+        .encode(
+            x=alt.X("index", sort=order, title=x_label, axis=alt.Axis(labelAngle=0)),
+            y=alt.Y(
+                shorthand="PercentOfTotal:Q",
+                axis=alt.Axis(format=".0%"),
+                title=y_label,
+                scale=alt.Scale(domain=[0, 0.5]) if expand_y is True else alt.Scale(),
+            ),
+        )
+        .properties(
+            width=500,
+            height=300,
+            # add N to title (just append to end if string, otherwise append to last in list of strings)
+            title=title + " (N = " + N_count + ")"
+            if type(title) == str
+            else title[:-1] + [title[-1] + " (N = " + N_count + ")"],
+        )
+    ).configure_title(fontSize=20)
+
+    chart.save(fig_output_path[language] + filename + ".html")
+
+    print("Saved: " + filename + ".html")
+
+
+# matplotlib only cycles through 10 colours, so manually defining 11 here to cover all age categories
+colors = [
+    "#000000",
+    "#1f77b4",
+    "#ff7f0e",
+    "#2ca02c",
+    "#d62728",
+    "#9467bd",
+    "#8c564b",
+    "#e377c2",
+    "#7f7f7f",
+    "#bcbd22",
+    "#17becf",
+]
+
+
+def age_prop_chart(base_data, title, filename, language="english"):
+    """Create single-column bar chart with property ages, proportions and average energy efficiencies.
+
+    Args:
+        base_data (pd.DataFrame): EPC data.
+        title (str): Chart title.
+        filename (str): Filename.
+        language (str, optional): Language of chart text. Defaults to "english".
+    """
+
+    text_labels = [
+        translation_config.energy_efficiency_text[language] + str(val)
+        for val in base_data["CURRENT_ENERGY_EFFICIENCY"]
+    ]
+    prop_labels = [str(round(val, 1)) + "%" for val in base_data["percentage"]]
+    width = 1
+
+    fig, ax = plt.subplots()
+    fig.set_figheight(10)
+    ax.spines["top"].set_visible(False)
+    ax.spines["right"].set_visible(False)
+    ax.spines["bottom"].set_visible(False)
+
+    # create initial bar
+    ax.bar(
+        x=" ",
+        height=base_data.loc[0, "percentage"],
+        width=width,
+        label=base_data.loc[0, "CONSTRUCTION_AGE_BAND"],
+        color=colors[0],
+    )
+
+    # plot remaining bars on top
+    for i in range(1, len(colors)):
+        ax.bar(
+            x=" ",
+            height=base_data.loc[i, "percentage"],
+            width=width,
+            bottom=base_data.loc[i - 1, "cumul_prop"],
+            label=base_data.loc[i, "CONSTRUCTION_AGE_BAND"],
+            color=colors[i],
+        )
+
+    # format y axis
+    ax.set_ylim(0, 100)
+    ax.yaxis.set_major_formatter(mtick.PercentFormatter(100))
+    ax.set_ylabel(
+        translation_config.housing_stock_text[language], fontweight="bold", fontsize=12
+    )
+    ax.set_title(title, fontweight="bold", fontsize=14, pad=20)
+
+    # put legend in top right
+    box = ax.get_position()
+    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
+
+    handles, labels = ax.get_legend_handles_labels()
+    ax.legend(
+        reversed(handles),
+        reversed(labels),
+        loc="upper right",
+        bbox_to_anchor=(1.6, 1),
+        fontsize=10,
+        title=translation_config.age_band_text[language],
+        title_fontproperties={"weight": "bold"},
+    )
+
+    # put text in centres of bars
+    rects = ax.patches
+
+    for rect, label in zip(rects, text_labels):
+        height = rect.get_height()
+        ax.text(
+            rect.get_x() + rect.get_width() / 2,
+            rect.get_y() + height / 2,
+            label,
+            ha="center",
+            va="center",
+            color="white",
+            fontsize=12,
+        )
+
+    for rect, label in zip(rects, prop_labels):
+        height = rect.get_height()
+        ax.text(
+            rect.get_x() + rect.get_width() + 0.01,
+            rect.get_y() + height / 2,
+            label,
+            ha="left",
+            va="center",
+            fontsize=12,
+        )
+
+    # format axes
+    plt.tick_params(
+        axis="x",  # changes apply to the x-axis
+        which="both",  # both major and minor ticks are affected
+        bottom=False,  # ticks along the bottom edge are off
+        top=False,  # ticks along the top edge are off
+        labelbottom=False,
+    )  # labels along the bottom edge are off
+
+    plt.tight_layout()
+
+    plt.savefig(fig_output_path[language] + filename + ".png", bbox_inches="tight")
+
+    print("Saved: " + filename + ".png")
+
+
+def plot_kepler_graph(base_data, filename):
+    hex_map = KeplerGl(height=500)
+    hex_map.add_data(
+        data=base_data[["perc_true", "hex_id"]], name="Heat pump proportions"
+    )
+    hex_map.save_to_html(
+        file_name=os.path.join(fig_output_path["english"], f"{filename}.html")
+    )
+
+    print("Saved: " + filename + ".html")
diff --git a/asf_welsh_energy_consultation/pipeline/process_data.py b/asf_welsh_energy_consultation/pipeline/process_data.py
index d434c29..b7d2e2f 100644
--- a/asf_welsh_energy_consultation/pipeline/process_data.py
+++ b/asf_welsh_energy_consultation/pipeline/process_data.py
@@ -5,7 +5,9 @@
 
 import pandas as pd
 
-from asf_welsh_energy_consultation.getters.get_data import *
+from asf_core_data.utils.geospatial.data_agglomeration import add_hex_id
+from asf_welsh_energy_consultation.getters import get_data
+from asf_core_data.getters.data_getters import logger
 
 
 # PROCESSING MCS
@@ -17,10 +19,10 @@ def get_enhanced_mcs():
     Returns:
         pd.DataFrame: Dataset as described above.
     """
-    mcs = get_mcs_domestic()
-    og = get_offgas()
-    countries = get_countries()
-    rural = get_rurality()
+    mcs = get_data.get_mcs_domestic()
+    og = get_data.get_offgas()
+    countries = get_data.get_countries()
+    rural = get_data.get_rurality()
 
     # join with off-gas data
     mcs = mcs.merge(og, on="postcode", how="left")
@@ -28,15 +30,23 @@ def get_enhanced_mcs():
 
     # join with regions in order to filter to Wales
     mcs = mcs.merge(countries, on="postcode", how="left")
+    if mcs.country.isna().sum() > 0:
+        logger.warning(
+            f"{mcs.country.isna().sum()} MCS installation records have no country match."
+            f"Potential loss of data when filtering for Wales."
+        )
     mcs = mcs.loc[mcs["country"] == "Wales"].reset_index(drop=True)
-    # 1203 records with no match - 273 are Northern Ireland which leaves 918
+    # There will be records with no match
     # Some will be new postcodes (new build developments)
     # and some may be expired postcodes
     # In future, implement new solution that uses outward codes
 
     # join with rurality data
     mcs = mcs.merge(rural, on="postcode", how="left")
-    # only 13 postcodes lost in this merge
+    if mcs.rurality_10_code.isna().sum() > 0:
+        logger.warning(
+            f"Loss of data: {mcs.rurality_10_code.isna().sum()} Welsh MCS installation records have no rurality code match."
+        )
 
     # add custom rurality column (rurality "type 7": all different types of urban mapped to Urban)
     mcs["rurality_7"] = mcs["rurality_10_label"].replace(
@@ -102,7 +112,7 @@ def cumsums_by_variable(variable, new_var_name, data=enhanced_mcs):
 
 # PROCESSING EPC
 
-wales_epc = get_wales_epc()
+wales_epc = get_data.get_wales_processed_epc()
 
 
 def get_wales_epc_new():
@@ -134,8 +144,13 @@ def get_new_hp_counts():
         pd.DataFrame: New build HP counts.
     """
     wales_epc_new = get_wales_epc_new()
-    # 2023 not yet complete so drop any post-2022 data
-    wales_epc_new = wales_epc_new.loc[wales_epc_new["INSPECTION_DATE"] < "2023-01-01"]
+    # Requires full year of data so remove most recent year if it doesn't have 12 months of data
+    max_date = wales_epc_new["INSPECTION_DATE"].max()
+    max_year = max_date.year
+    if max_date != pd.to_datetime(f"{max_year}-12-31"):
+        wales_epc_new = wales_epc_new.loc[
+            wales_epc_new["INSPECTION_DATE"] < f"{max_year}-01-01"
+        ]
 
     new_hp_counts = (
         wales_epc_new.groupby(["year", "HP_INSTALLED"])
@@ -196,10 +211,15 @@ def mcs_epc_first_records():
     Returns:
         pd.DataFrame: MCS records joined with first EPC.
     """
-    mcs_epc = get_mcs_epc_domestic()
-    regions = get_countries()
+    mcs_epc = get_data.get_mcs_epc_domestic()
+    regions = get_data.get_countries()
 
     mcs_epc = mcs_epc.merge(regions, on="postcode", how="left")
+    if mcs_epc.country.isna().sum() > 0:
+        logger.warning(
+            f"{mcs_epc.country.isna().sum()} joined MCS-EPC records have no country match. "
+            f"Potential loss of data when filtering for Wales."
+        )
     mcs_epc = mcs_epc.loc[mcs_epc["country"] == "Wales"].reset_index(drop=True)
 
     first_records = (
@@ -256,3 +276,92 @@ def get_mcs_retrofits():
     mcs_retrofits = enhanced_mcs.loc[~enhanced_mcs.index.isin(hp_when_built_indices)]
 
     return mcs_retrofits
+
+
+def generate_hex_counts(wales_df, pc_df):
+    """
+    Merges two dataframes on 'postcode' and generates pandas.DataFrame containing information on % of properties with
+    heat pumps installed in each Hex 3 partition (https://h3geo.org/docs/) in Wales
+    Args:
+        wales_df (pandas.Dataframe): df of processed EPC data for Wales
+        pc_df (pandas.Dataframe): df containing Welsh postcodes and corresponding lat/lon coordinates
+
+    Returns:
+        pandas.Dataframe: df containing information on % of properties with heat pumps installed in each Hex3 partition
+        in Wales
+
+    """
+
+    wales_df_coords = pd.merge(
+        wales_df, pc_df, on=["POSTCODE"]
+    )  # merge EPC with postcode df
+    wales_df_hex = add_hex_id(wales_df_coords, 6)  # add H3 hex id to each row
+    hp_hex_counts = (
+        wales_df_hex.groupby(["hex_id", "HP_INSTALLED"]).size().unstack(fill_value=0)
+    )  # get counts of HP installations in each hex id
+    hp_hex_counts["total"] = hp_hex_counts[True] + hp_hex_counts[False]
+    hp_hex_counts["perc_true"] = (
+        hp_hex_counts[True] / hp_hex_counts["total"] * 100
+    )  # calculate % of properties with HP in each hex
+    hp_hex_counts = hp_hex_counts.reset_index()
+
+    return hp_hex_counts
+
+
+def generate_age_data(wales_df):
+    """Generate table of proportion of properties in each age band.
+    Also includes average energy efficiency for each age band.
+
+    Args:
+        wales_df (pd.DataFrame): EPC data with "CONSTRUCTION_AGE_BAND" column.
+
+    Returns:
+        pd.DataFrame: Age band proportions and efficiencies.
+    """
+    age_props = (
+        wales_df.loc[
+            wales_df.CONSTRUCTION_AGE_BAND != "unknown"
+        ].CONSTRUCTION_AGE_BAND.value_counts(normalize=True)
+        * 100
+    )
+    age_props = age_props.reset_index()
+    age_props = age_props.rename(
+        columns={
+            "index": "CONSTRUCTION_AGE_BAND",
+            "CONSTRUCTION_AGE_BAND": "percentage",
+        }
+    )
+    ages_efficiencies = (
+        wales_df.groupby("CONSTRUCTION_AGE_BAND")["CURRENT_ENERGY_EFFICIENCY"]
+        .mean()
+        .reset_index()
+    )
+    age_data = age_props.merge(ages_efficiencies, on="CONSTRUCTION_AGE_BAND")
+    age_data["CONSTRUCTION_AGE_BAND"] = age_data["CONSTRUCTION_AGE_BAND"].replace(
+        {"England and Wales: before 1900": "Pre-1900"}
+    )
+    age_data = (
+        age_data.set_index("CONSTRUCTION_AGE_BAND")
+        .loc[
+            [
+                "Pre-1900",
+                "1900-1929",
+                "1930-1949",
+                "1950-1966",
+                "1965-1975",
+                "1976-1983",
+                "1983-1991",
+                "1991-1998",
+                "1996-2002",
+                "2003-2007",
+                "2007 onwards",
+            ]
+        ]
+        .reset_index()
+    )
+    age_data["CURRENT_ENERGY_EFFICIENCY"] = age_data["CURRENT_ENERGY_EFFICIENCY"].round(
+        1
+    )
+    age_data["cumul_prop"] = age_data["percentage"].cumsum()
+
+    return age_data
diff --git a/asf_welsh_energy_consultation/utils/formatting.py b/asf_welsh_energy_consultation/utils/formatting.py
deleted file mode 100644
index 93494f9..0000000
--- a/asf_welsh_energy_consultation/utils/formatting.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# File: asf_welsh_energy_consultation/utils/formatting.py
-"""
-Formatting utility functions.
-"""
-
-import re
-
-
-def format_number(n):
-    """
-    If number is 5 or more digits, add a comma every 3 digits from the right.
-
-    Args:
-        n (int): Number to format.
-
-    Returns:
-        str: Formatted number.
-    """
-    if n > 9999:
-        return re.sub(r"(\d)(?=(\d{3})+(?!\d))", r"\1,", str(n))
-    else:
-        return str(n)
diff --git a/asf_welsh_energy_consultation/utils/utils.py b/asf_welsh_energy_consultation/utils/utils.py
new file mode 100644
index 0000000..be4443d
--- /dev/null
+++ b/asf_welsh_energy_consultation/utils/utils.py
@@ -0,0 +1,9 @@
+def arial():
+    font = "Arial"
+
+    return {
+        "config": {
+            "title": {"font": font},
+            "axis": {"labelFont": font, "titleFont": font},
+        }
+    }
diff --git a/requirements.txt b/requirements.txt
index 28a0ff2..7eb8214 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,6 +7,7 @@ matplotlib
 odfpy
 selenium==4.2.0
 argparse==1.4.0
+keplergl
 s3fs>=2023.3.0
 asf_core_data@ git+ssh://git@github.com/nestauk/asf_core_data.git
 nesta_ds_utils@ git+ssh://git@github.com/nestauk/nesta_ds_utils.git