diff --git a/app/algo.py b/app/algo.py
index 6f9354c..6e4df32 100644
--- a/app/algo.py
+++ b/app/algo.py
@@ -1,3 +1,8 @@
+"""
+This module contains functions to compute and/or display the visualizations, defined by EU Tax Observatory, which
+are needed in Taxplorer tool. Below functions will be used in different pages of the website.
+"""
+
import pandas as pd
import numpy as np
import plotly.express as px
@@ -5,89 +10,105 @@
import humanize
from wordcloud import WordCloud, get_single_color_func
+
+# Define custom template
+custom_template = {
+ "layout": {
+ "autosize": True,
+ "plot_bgcolor": "white",
+ "font": {"family": "Roboto, sans-serif"},
+ "title": None,
+ "margin": dict(l=0, r=0, b=0, t=0),
+ }
+}
+
+
# Define color sequence for plots
COLOR_SEQUENCE = ["#D9D9D9", "#1E2E5C"]
-# TODO add viz comment
-# Viz 1 -
-def number_of_tracked_reports(df):
- number_of_tracked_reports = len(df.groupby(["year", "mnc"])["mnc"])
- return number_of_tracked_reports
+# Viz 1 : Number of tracked reports
+def number_of_tracked_reports(
+ df: pd.DataFrame, filter_name: str = None, filter_value: str = None
+) -> int:
+ """Calculate the number of tracked reports with possibility to filter on company name, sector or headquarter
+ location.
-# TODO add viz comment
-def number_of_tracked_reports_company(df_selected_company):
- number_of_tracked_reports_company = len(
- df_selected_company.groupby(["year"])["year"]
- )
- return number_of_tracked_reports_company
+ Args:
+ df (pd.DataFrame): CbCRs database.
+ filter_name (str, optional): Filter to apply, could be "mnc", "sector" or "upe_name". Defaults to None.
+ filter_value (str, optional): Value to filter with. Defaults to None.
+
+ Returns:
+ int: number of tracked reports.
+ """
+ # Initialise available filters
+ filter_values = [None, "mnc", "sector", "upe_name"]
-def number_of_tracked_mnc(df: pd.DataFrame) -> int:
- return df["mnc"].nunique()
+ # Raise an error if "filter_value" not in list
+ if filter_name not in filter_values:
+ raise ValueError(f"Filter '{filter_name}' is not a valid filter.")
+ # Compute number of reports
+ if filter_name:
+ n_reports = (
+ df.loc[df[filter_name] == filter_value].groupby("mnc")["year"].nunique().sum()
+ )
+ else:
+ n_reports = df.groupby("mnc")["year"].nunique().sum()
-# TODO add viz comment
-def number_of_tracked_reports_sector(df_selected_sector):
- number_of_tracked_reports_sector = len(
- df_selected_sector.groupby(["year", "mnc"])["year"]
- )
- return number_of_tracked_reports_sector
+ return int(n_reports)
-# TODO add viz comment
-def number_of_tracked_reports_country(df_selected_country):
- number_of_tracked_reports_country = len(
- df_selected_country.groupby(["year", "mnc"])["year"]
- )
- return number_of_tracked_reports_country
+# Viz 2 : Number of tracked reports over time
+def number_of_tracked_reports_over_time(
+ df: pd.DataFrame, filter_name: str = None, filter_value: str = None
+) -> go.Figure:
+ """Compute and plot the number of tracked reports over time with possibility to filter on company name, sector or
+ headquarter location.
+ Args:
+ df (pd.DataFrame): CbCRs database.
+ filter_name (str, optional): Filter to apply, could be "mnc", "sector" or "upe_name". Defaults to None.
+ filter_value (str, optional): Value to filter with. Defaults to None.
-# TODO add viz comment
-# Viz 2 - Number of tracked reports over time
-def number_of_tracked_reports_over_time(df):
- df_count = df.groupby(["year"])["mnc"].nunique().reset_index()
- return df_count
+ Returns:
+ go.Figure: number of tracked reports over time in a Plotly figure.
+ """
+ # Initialise available filters
+ filter_values = [None, "mnc", "sector", "upe_name"]
-def display_number_of_tracked_reports_over_time(df):
- # Calculate number of companies per year
- data = number_of_tracked_reports_over_time(df=df)
+ # Raise an error if "filter_value" not in list
+ if filter_name not in filter_values:
+ raise ValueError(f"Filter '{filter_name}' is not a valid filter.")
- # Bar color sequence
- bar_color = '#D9D9D9'
+ # Compute number of reports
+ if filter_name:
+ data = (
+ df.loc[df[filter_name] == filter_value]
+ .groupby("year")["mnc"]
+ .nunique()
+ .reset_index()
+ )
+ else:
+ data = df.groupby("year")["mnc"].nunique().reset_index()
# Create figure
fig = px.bar(
- data,
- x='year',
- y='mnc',
- color_discrete_sequence=[bar_color],
- text_auto=True
+ data, x="year", y="mnc", text_auto=True, color_discrete_sequence=COLOR_SEQUENCE
)
# Force position and color of bar values
- fig.update_traces(
- textposition='outside', textfont=dict(color='black')
- )
+ fig.update_traces(textposition="outside", textfont=dict(color="black"))
+
+ # Define axes settings
+ fig.update_xaxes(title=None, tickvals=data["year"].unique())
+ fig.update_yaxes(title=None, visible=False, range=[0, data["mnc"].max() * 1.1])
# Update layout settings
- fig.update_layout(
- autosize=True,
- height=360,
- font_family='Roboto',
- title=None,
- xaxis=dict(
- title=None,
- tickvals=data['year'].unique()
- ),
- yaxis=dict(
- title=None,
- visible=False,
- ),
- plot_bgcolor='white',
- margin=dict(l=0, r=0, b=0, t=0)
- )
+ fig.update_layout(template=custom_template, height=360)
# Define style of hover on bars
fig.update_traces(
@@ -97,328 +118,214 @@ def display_number_of_tracked_reports_over_time(df):
return go.Figure(fig)
-# TODO add viz comment
-def number_of_tracked_reports_over_time_company(df_selected_company):
- df_count_company = (
- df_selected_company.groupby(["year"])["mnc"].nunique().reset_index()
- )
- # df_count_all_company = df.groupby(["year"])["mnc"].nunique().reset_index()
-
- # row[3].line_chart(df_count_all_company, x="year", y="mnc")
-
- # row[4].write("selected sector")
- # row[4].write(
- # "df_selected_sector.groupby(['year'])['mnc'].nunique().reset_index()"
- # )
- return df_count_company
-
-
-# TODO add viz comment
-def number_of_tracked_reports_over_time_sector(df_selected_sector):
- df_count_sector = (
- df_selected_sector.groupby(["year"])["mnc"].nunique().reset_index()
- )
-
- # df_count_all_sector = (
- # df.groupby(["year", "sector"])["mnc"].nunique().reset_index()
- # )
-
- # row[4].line_chart(df_count_all_sector, x="year", y="mnc", color="sector")
-
- # row[5].write("selected country")
- # row[5].write(
- # "df_selected_country.groupby(['year'])['mnc'].nunique().reset_index()"
- # )
- return df_count_sector
+# Viz 3 : Number of tracked mnc
+def number_of_tracked_mnc(
+ df: pd.DataFrame, filter_name: str = None, filter_value: str = None
+) -> int:
+ """Calculate the number of tracked reports with possibility to filter on company name, sector or headquarter
+ location.
+ Args:
+ df (pd.DataFrame): CbCRs database.
+ filter_name (str, optional): Filter to apply, could be "sector" or "upe_name". Defaults to None.
+ filter_value (str, optional): Value to filter with. Defaults to None.
-# TODO add viz comment
-def number_of_tracked_reports_over_time_country(df_selected_country):
- df_count_country = (
- df_selected_country.groupby(["year"])["mnc"].nunique().reset_index()
- )
- # df_count_all_country = (
- # df.groupby(["year", "jur_name"])["mnc"].nunique().reset_index()
- # )
-
- # row[5].line_chart(df_count_all_country, x="year", y="mnc", color="jur_name")
- return df_count_country
-
-
-# Viz 16
-
-# company’s % pre-tax profit and profit per employee
-# plot chart : x-axis = % profit, y axis = profit / employee
-# size of the bubble based on % profit and a color code for
-# tax havens vs others
-def company_pourcentage_pretax_profit_and_profit_per_employee(df_selected_company):
- # pretax_profit_col_name = 'profit_before_tax'
- profit_col_name = ''
- employee_col_name = 'employees'
- df_selected_company[profit_col_name] / df_selected_company[employee_col_name]
-
-
-# Viz 19
-# what are the tax havens being used by the company
-# to test but could be a table with one row per jurisdiction (filtering on TH) with
-# % profit
-# % employee
-# profit per employee
-# % related party revenue
-# for domestic vs tax havens vs. non havens
-def tax_haven_used_by_company(df_selected_company):
- company_upe_code = df_selected_company['upe_code'].unique()[0]
- pc_list = ['employees', 'profit_before_tax', 'related_revenues']
- # grouper = df_selected_company.groupby('jur_name')
-
- df = pd.DataFrame(df_selected_company)
-
- df_domestic_company = df[df['jur_code'] == company_upe_code]
- df_selected_company_th = df[df['jur_tax_haven'] != 'not.TH']
- df_selected_company_nth = df[df['jur_tax_haven'] == 'not.TH']
-
- for col in pc_list:
-
- df.insert(
- len(df_selected_company.columns),
- col + '_domestic_sum',
- df_domestic_company[col].sum())
-
- df.insert(
- len(df_selected_company.columns),
- col + '_th_sum',
- df_selected_company_th[col].sum())
-
- df.insert(
- len(df.columns),
- col + '_nth_sum',
- df_selected_company_nth[col].sum())
-
- df.insert(
- len(df.columns),
- col + '_sum',
- df_selected_company[col].sum())
+ Returns:
+ int: number of companies in the database.
+ """
- df.insert(
- len(df.columns),
- col + '_pc',
- 100 * df[col] / df[col + '_sum'])
- # df_selected_company[col + '_pc'] = 100 * df_selected_company[col] / df_selected_company[col+'_sum']
+ # Initialise available filters
+ filter_values = [None, "sector", "upe_name"]
- df_selected_company_th = df[df['jur_tax_haven'] != 'not.TH']
- df_selected_company_th_agg = df_selected_company_th.groupby(['mnc', 'jur_name']).agg(
- profit_before_tax=('profit_before_tax', 'sum'),
- profit_before_tax_pc=('profit_before_tax_pc', 'sum'),
- employees_pc=('employees_pc', 'sum'),
- employees=('employees', 'sum'),
- related_revenues_pc=('related_revenues_pc', 'sum')
- )
- df_selected_company_th_agg = df_selected_company_th_agg.reset_index()
- df_selected_company_th_agg['profit per employee'] = \
- df_selected_company_th_agg['profit_before_tax'] / df_selected_company_th_agg['employees']
- df_selected_company_th_agg['profit per employee'] = df_selected_company_th_agg['profit per employee'].replace(
- [np.inf, -np.inf], None)
+ # Raise an error if "filter_value" not in list
+ if filter_name not in filter_values:
+ raise ValueError(f"Filter '{filter_name}' is not a valid filter.")
- return df_selected_company, df_selected_company_th_agg
+ # Compute number of reports
+ if filter_name:
+ n_company = df.loc[df[filter_name] == filter_value, "mnc"].nunique()
+ else:
+ n_company = df["mnc"].nunique()
+ return int(n_company)
-# TODO add viz comment
-# complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for each (color code for tax havens)
-def company_table(df_selected_company):
- # company_upe_code = df_selected_company['upe_code'].unique()[0]
- pc_list = ['employees', 'profit_before_tax', 'unrelated_revenues', 'related_revenues', 'total_revenues', 'tax_paid']
-
- df = pd.DataFrame(df_selected_company)
- for col in pc_list:
- if col + '_sum' not in df.columns:
- df.insert(
- len(df.columns),
- col + '_sum',
- df[col].sum())
-
- df.insert(
- len(df.columns),
- col + '_pc',
- 100 * df[col] / df[col + '_sum'])
- # f_selected_company[col + '_sum'] = df_selected_company[col].sum()
- # df_selected_company[col + '_pc'] = 100 * df_selected_company[col] / df_selected_company[col + '_sum']
- # complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for each (color code for tax havens)
- df_selected_company_by_jur = df.groupby(['mnc', 'jur_name']).agg(
- related_revenues_pc=('related_revenues_pc', 'sum'),
- unrelated_revenues=('unrelated_revenues', 'sum'),
- total_revenues=('total_revenues', 'sum'),
- profit_before_tax=('profit_before_tax', 'sum'),
- employees_pc=('employees_pc', 'sum'),
- tax_paid=('tax_paid', 'sum'),
- tax_paid_pc=('tax_paid_pc', 'sum'),
+# Viz 4 : Breakdown of reports by sector
+def breakdown_of_reports_by_sector(df: pd.DataFrame) -> go.Figure:
+ # Dataframe called df
+ df_reports_per_sector_year = (
+ df.groupby(["sector", "year"])["mnc"].nunique().reset_index(name="unique_company_count")
)
- return df_selected_company_by_jur.reset_index()
-
-
-# Viz 4 - Breakdown of reports by sector (pie chart)
-def breakdown_of_reports_by_sector(df):
- #Dataframe called df
- df_reports_per_sector_year = df.groupby(['sector', 'year'])['mnc'].nunique().reset_index(
- name='unique_company_count')
# Aggregate the counts of unique companies across all years for each sector
- df_reports_per_sector = df_reports_per_sector_year.groupby('sector')['unique_company_count'].sum().reset_index()
+ df_reports_per_sector = (
+ df_reports_per_sector_year.groupby("sector")["unique_company_count"].sum().reset_index()
+ )
# Calculate the total count of unique companies across all sectors
- total_companies = df_reports_per_sector['unique_company_count'].sum()
+ total_companies = df_reports_per_sector["unique_company_count"].sum()
# Calculate the percentage of each sector's count relative to the total count and round to 2 decimals
- df_reports_per_sector['percent'] = ((df_reports_per_sector['unique_company_count'] / total_companies) * 100).round(
- 2)
+ df_reports_per_sector["percent"] = (
+ (df_reports_per_sector["unique_company_count"] / total_companies) * 100
+ ).round(2)
# Sort the DataFrame by the count of unique companies in ascending order
- df_reports_per_sector = df_reports_per_sector.sort_values(by='unique_company_count', ascending=True)
+ df_reports_per_sector = df_reports_per_sector.sort_values(
+ by="unique_company_count", ascending=True
+ )
- return df_reports_per_sector
+ # Plotting the horizontal bar chart with Plotly Express
+ fig = px.bar(
+ df_reports_per_sector,
+ y="sector",
+ x="percent",
+ orientation="h", # Horizontal orientation
+ labels={"percent": "Percentage of Companies (%)", "sector": "Sector"},
+ text="percent", # Show the percentage as text label
+ hover_data={
+ "unique_company_count": True,
+ "percent": ":.2f%",
+ }, # Add tooltip for count and rounded percentage
+ )
+ # Update layout settings
+ fig.update_layout(template=custom_template)
-def breakdown_of_reports_by_sector_viz(df_reports_per_sector):
- # Plotting the horizontal bar chart with Plotly Express
- fig = px.bar(df_reports_per_sector, y='sector', x='percent',
- orientation='h', # Horizontal orientation
- title='Breakdown of Reports by Sector (All Years)',
- labels={'percent': 'Percentage of Companies (%)', 'sector': 'Sector'},
- text='percent', # Show the percentage as text label
- hover_data={'unique_company_count': True, 'percent': ':.2f%'},
- # Add tooltip for count and rounded percentage
- )
-
- # Update layout to display the title above the chart
- fig.update_layout(title='Breakdown of Reports by Sector',
- title_x=0.5, title_y=0.9, # Adjust position
- title_font_size=20) # Adjust font size
-
- # Show the horizontal bar chart
return go.Figure(fig)
-# Viz 5 - Breakdown of reports by HQ country (pie chart)
-def breakdown_of_reports_by_hq_country(df):
- # Group the DataFrame by 'upe_name' (HQ country) and 'year' and count the number of unique companies for each HQ country and year
- df_reports_per_country_year = df.groupby(['upe_name', 'year'])['mnc'].nunique().reset_index(
- name='unique_company_count')
+# Viz 5 : Breakdown of reports by hq country
+def breakdown_of_reports_by_hq_country(df: pd.DataFrame) -> go.Figure:
+ # Group the DataFrame by 'upe_name' (HQ country) and 'year' and count the number of unique companies for each HQ
+ # country and year
+ df_reports_per_country_year = (
+ df.groupby(["upe_name", "year"])["mnc"]
+ .nunique()
+ .reset_index(name="unique_company_count")
+ )
# Aggregate the counts of unique companies across all years for each HQ country
- df_reports_per_country = df_reports_per_country_year.groupby('upe_name')['unique_company_count'].sum().reset_index()
+ df_reports_per_country = (
+ df_reports_per_country_year.groupby("upe_name")["unique_company_count"]
+ .sum()
+ .reset_index()
+ )
# Calculate the total count of unique companies across all HQ countries
- total_companies = df_reports_per_country['unique_company_count'].sum()
+ total_companies = df_reports_per_country["unique_company_count"].sum()
# Calculate the percentage of each HQ country's count relative to the total count and round to 2 decimals
- df_reports_per_country['percent'] = (
- (df_reports_per_country['unique_company_count'] / total_companies) * 100).round(2)
+ df_reports_per_country["percent"] = (
+ (df_reports_per_country["unique_company_count"] / total_companies) * 100
+ ).round(2)
# Sort the DataFrame by the count of unique companies in ascending order
- df_reports_per_country = df_reports_per_country.sort_values(by='unique_company_count', ascending=True)
-
- return df_reports_per_country
-
+ df_reports_per_country = df_reports_per_country.sort_values(
+ by="unique_company_count", ascending=True
+ )
-def breakdown_of_reports_by_hq_country_viz(df_reports_per_country):
# Plotting the horizontal bar chart with Plotly Express
- fig = px.bar(df_reports_per_country, y='upe_name', x='percent',
- orientation='h', # Horizontal orientation
- title='Breakdown of Reports by HQ Country over Time',
- labels={'percent': 'Percentage of Companies (%)', 'upe_name': 'HQ Country'},
- text='percent', # Show the percentage as text label
- hover_data={'unique_company_count': True, 'percent': ':.2f%'},
- # Add tooltip for count and rounded percentage
- )
-
- # Update layout to display the title above the chart
- fig.update_layout(title='Breakdown of Reports by HQ Country over Time',
- title_x=0.5, title_y=0.95, # Adjust position
- title_font_size=20) # Adjust font size
-
- # Show the horizontal bar chart
- # fig.show()
- return go.Figure(fig)
-
+ fig = px.bar(
+ df_reports_per_country,
+ y="upe_name",
+ x="percent",
+ orientation="h", # Horizontal orientation
+ labels={"percent": "Percentage of Companies (%)", "upe_name": "HQ Country"},
+ text="percent", # Show the percentage as text label
+ hover_data={"unique_company_count": True, "percent": ":.2f%"},
+ # Add tooltip for count and rounded percentage
+ )
-## Viz 6 - Breakdown of reports by sector over time (bar chart)
+ # Update layout settings
+ fig.update_layout(template=custom_template)
+ return go.Figure(fig)
-def breakdown_of_reports_by_sector_over_time(df):
- # df_reports_per_sector_over_time = df
- # return df_reports_per_sector_over_time
+# Viz 6 : Breakdown of reports by sector over time
+def breakdown_of_reports_by_sector_over_time(df: pd.DataFrame) -> go.Figure:
# Step 1: Determine the top 10 sectors that released reports
- top_10_sectors = df['sector'].value_counts().nlargest(10).index.tolist()
+ top_10_sectors = df["sector"].value_counts().nlargest(10).index.tolist()
# Step 2: Group all other sectors as "Others"
- df['Sectors'] = df['sector'].apply(lambda x: x if x in top_10_sectors else 'Others')
+ df["Sectors"] = df["sector"].apply(lambda x: x if x in top_10_sectors else "Others")
# Step 3: Group the DataFrame by 'year', 'Sectors', and count the number of unique companies for each year and sector
- df_reports_per_year_sector = df.groupby(['year', 'Sectors'])['mnc'].nunique().reset_index(
- name='unique_company_count')
+ df_reports_per_year_sector = (
+ df.groupby(["year", "Sectors"])["mnc"]
+ .nunique()
+ .reset_index(name="unique_company_count")
+ )
# Sort sectors alphabetically
- df_reports_per_year_sector = df_reports_per_year_sector.sort_values(by='Sectors', ascending=False)
-
- return df_reports_per_year_sector, top_10_sectors
-
+ df_reports_per_year_sector = df_reports_per_year_sector.sort_values(
+ by="Sectors", ascending=False
+ )
-def breakdown_of_reports_by_sector_over_time_viz(df_reports_per_year_sector, top_10_sectors):
# Define the order of sectors for the stacked bar chart and legend, reversed
- chart_order = ['Others'] + top_10_sectors[::-1]
- legend_order = ['Others'] + top_10_sectors[::-1]
+ chart_order = ["Others"] + top_10_sectors[::-1]
+ legend_order = ["Others"] + top_10_sectors[::-1]
# Plotting the bar chart using Plotly Express
- fig = px.bar(df_reports_per_year_sector, x='year', y='unique_company_count', color='Sectors',
- title='Breakdown of Reports by Sector over Time',
- labels={'unique_company_count': 'Number of Companies Reporting', 'year': 'Year'},
- barmode='stack',
- category_orders={'Sectors': chart_order})
+ fig = px.bar(
+ df_reports_per_year_sector,
+ x="year",
+ y="unique_company_count",
+ color="Sectors",
+ labels={"unique_company_count": "Number of Companies Reporting", "year": "Year"},
+ barmode="stack",
+ category_orders={"Sectors": chart_order},
+ )
- # Reverse the order of legend items
- fig.update_layout(legend=dict(traceorder='reversed'))
+ # Update layout settings
+ fig.update_layout(template=custom_template, legend=dict(traceorder="reversed"))
# Adjusting the legend order and formatting the legend labels
for i, trace in enumerate(fig.data):
trace.name = legend_order[i]
# Change color of the "Others" bar to grey
- if trace.name == 'Others':
- trace.marker.color = 'grey'
+ if trace.name == "Others":
+ trace.marker.color = "grey"
- # Show the plot
- # fig.show()
return go.Figure(fig)
-## Viz 7 - Breakdown of reports by HQ country over time (bar chart)
+# Viz 7 : Breakdown of reports by hq country over time
# TODO add code
-## Viz 8 - Breakdown of MNC by sector (pie chart - changed to bar chart for more visibility)
+
+# Viz 8 : Breakdown of MNC by sector
# TODO add code
-## Viz 9 - Breakdown of MNC by HQ country (pie chart - changed to bar chart for more visibility)
+
+# Viz 9 : Breakdown of MNC by HQ country
# TODO add code
-## Viz 10/11 - Breakdown of MNC by sector
+
+# Viz 10/11 : Breakdown of MNC by sector
# TODO add code
-## Viz 11 - Breakdown of MNC by HQ country
+
+# Viz 11 : Breakdown of MNC by HQ country
# TODO add code
-# Viz 12 - available reports by company
-def compute_company_available_reports(df: pd.DataFrame, company: str) -> dict:
- """Compute the number of reports tracked for a specific company and the
- available fiscal years.
+
+# Viz 12 : available reports by company
+def company_available_reports(
+ df: pd.DataFrame, company: str, hide_company: bool = True
+) -> pd.DataFrame:
+ """Compute the number of reports tracked and the available fiscal years for a specific company.
Args:
df (pd.DataFrame): CbCRs database.
- company (str): company name.
+ company (str): Company name.
+ hide_company (bool, optional): Hide company name in final table. Defaults to True.
Returns:
- dict: numbers of reports and fiscal years.
+ pd.DataFrame: numbers of reports and fiscal years in a table.
"""
- available_years = df.loc[df['mnc'] == company, 'year'].unique()
+ available_years = df.loc[df["mnc"] == company, "year"].unique()
n_reports = len(available_years)
# Convert type of items from 'int' to 'str' in available years list
@@ -428,467 +335,342 @@ def compute_company_available_reports(df: pd.DataFrame, company: str) -> dict:
if len(years_string_list) == 1:
years_string = years_string_list[0]
elif len(years_string_list) > 1:
- years_string = ', '.join(years_string_list[:-1])
- years_string += ' and ' + years_string_list[-1]
-
- # Create a dictionnary with the results
- data = {
- 'Company': company,
- 'Reports': n_reports,
- 'Fiscal year(s) available': years_string
- }
-
- return data
-
-
-def display_company_available_reports(
- df: pd.DataFrame, company: str, hide_company: bool = True) -> pd.DataFrame:
- """Display the number of reports tracked for a specific company and the
- available fiscal years.
-
- Args:
- df (pd.DataFrame): CbCRs database.
- company (str): company name.
- hide_company (bool, optional): hide company name in final table. Defaults to True.
-
- Returns:
- pd.DataFrame: numbers of reports and fiscal years.
- """
-
- # Compute data
- data = compute_company_available_reports(df=df, company=company)
+ years_string = ", ".join(years_string_list[:-1])
+ years_string += " and " + years_string_list[-1]
# Create the table
- df = pd.DataFrame.from_dict(data=data, orient='index')
+ table = pd.DataFrame(
+ data=[company, n_reports, years_string], index=["Company", "Reports", "Fiscal year(s)"]
+ )
+ # Hide the company name in the table
if hide_company:
- return df[1:].style.hide(axis='columns')
+ return table[1:].style.hide(axis="columns")
- return df.style.hide(axis='columns')
+ return table.style.hide(axis="columns")
-# Viz 13 - company key financials kpis
-def compute_company_key_financials_kpis(
- df: pd.DataFrame,
- company: str,
- year: int = None) -> dict:
- """Compute key financial KPIs for a company.
+# Viz 13 : Company key financials kpis
+def company_key_financials_kpis(df: pd.DataFrame, company: str, year: int = None) -> dict:
+ """Compute key financial KPIs for a company in a table.
Args:
df (pd.DataFrame): CbCRs database.
- company (str): Company name
+ company (str): company name.
year (int, optional): fiscal year to filter the results with. Defaults to None.
Returns:
- dict: company key financial KPIs.
+ pd.DataFrame: table with company key financial KPIs.
"""
- kpis_list = ['total_revenues', 'unrelated_revenues', 'related_revenues',
- 'profit_before_tax', 'tax_paid', 'employees']
+ kpis_list = [
+ "total_revenues",
+ "unrelated_revenues",
+ "related_revenues",
+ "profit_before_tax",
+ "tax_paid",
+ "employees",
+ ]
- years_list = df.loc[df['mnc'] == company, 'year'].unique()
+ years_list = df.loc[df["mnc"] == company, "year"].unique()
# Compute sum of kpis
if not year or year not in years_list:
- df = (df.loc[df['mnc'] == company]
- .groupby(['year', 'upe_name'], as_index=False)[kpis_list]
- .sum()
- )
+ df = (
+ df.loc[df["mnc"] == company]
+ .groupby(["year", "upe_name"], as_index=False)[kpis_list]
+ .sum()
+ )
else:
- df = (df.loc[(df['mnc'] == company) & (df['year'] == year)]
- .groupby(['year', 'upe_name'], as_index=False)[kpis_list]
- .sum())
-
- # df = df.set_index('year')
+ df = (
+ df.loc[(df["mnc"] == company) & (df["year"] == year)]
+ .groupby(["year", "upe_name"], as_index=False)[kpis_list]
+ .sum()
+ )
# Make financial numbers easily readable with 'humanize' package
for column in df.columns:
- if column not in ['employees', 'upe_name']:
+ if column not in ["employees", "upe_name"]:
df[column] = df[column].apply(
- lambda x: humanize.intword(x) if isinstance(x, (int, float)) else x)
- df[column] = '€ ' + df[column]
- elif column == 'employees':
+ lambda x: humanize.intword(x) if isinstance(x, (int, float)) else x
+ )
+ df[column] = "€ " + df[column]
+ elif column == "employees":
df[column] = df[column].astype(int)
# Remove 'upe_name' and 'year''
- df = df.drop(columns=['upe_name', 'year'])
+ df = df.drop(columns=["upe_name", "year"])
# Clean columns string
- df.columns = df.columns.str.replace('_', ' ').str.capitalize()
-
- # Create a dictionary with the results
- data = df.to_dict(orient='index')
-
- return data
-
-
-def display_company_key_financials_kpis(
- df: pd.DataFrame, company: str, year: int = None):
- """Display key financial KPIs for a company.
-
- Args:
- df (pd.DataFrame): CbCRs database.
- company (str): Company name
- year (int, optional): fiscal year to filter the results with. Defaults to None.
-
- Returns:
- pd.DataFrame: company key financial KPIs.
- """
+ df.columns = df.columns.str.replace("_", " ").str.capitalize()
- # Compute data
- data = compute_company_key_financials_kpis(df=df, company=company, year=year)
-
- # Create the table
- df = pd.DataFrame.from_dict(data)
- df = df.reset_index()
+ # Transpose DataFrame
+ df = df.T.reset_index()
# Rename columns
- df = df.rename(columns={'index': 'Variable', 0: 'Value'})
+ df = df.rename(columns={"index": "Variable", 0: "Value"})
# Replace 0 values with 'N/A'
- df.loc[df['Value'] == '€ 0', 'Value'] = 'N/A'
+ df.loc[df["Value"] == "€ 0", "Value"] = "N/A"
return df
-# Viz 14
-def compute_top_jurisdictions_revenue(
- df: pd.DataFrame, company: str, year: int) -> dict:
- """Rank jurisdictions on their percentage of total revenues.
+# Viz 14 : company top jurisdictions for revenue
+def top_jurisdictions_revenue(df: pd.DataFrame, company: str, year: int) -> go.Figure:
+ """Compute and plot top jurisdictions on their percentage of total revenues.
Args:
df (pd.DataFrame): CbCRs database.
company (str): Company name
- year (int): fiscal year.
+ year (int): Fiscal year.
Returns:
- dict: Rank of jurisdictions by percentage of total revenues.
+ go.Figure: Jurisdictions by percentage of total revenues in a Plotly figure.
"""
df = df.loc[
- (df['mnc'] == company) & (df['year'] == year),
- ['jur_name', 'related_revenues', 'unrelated_revenues', 'total_revenues']
+ (df["mnc"] == company) & (df["year"] == year),
+ ["jur_name", "related_revenues", "unrelated_revenues", "total_revenues"],
]
# Calculate missing values in 'total_revenues' if 'related_revenues' and
# 'unrelated_revenues' are available
df.loc[
- df['related_revenues'].notna()
- & df['unrelated_revenues'].notna()
- & df['total_revenues'].isna(),
- 'total_revenues'
- ] = df['related_revenues'] + df['unrelated_revenues']
+ df["related_revenues"].notna()
+ & df["unrelated_revenues"].notna()
+ & df["total_revenues"].isna(),
+ "total_revenues",
+ ] = df["related_revenues"] + df["unrelated_revenues"]
# Subset DataFrame
- df = df[['jur_name', 'total_revenues']]
+ df = df[["jur_name", "total_revenues"]]
# Remove rows where 'total_revenues' is missing
- df = df.dropna(subset=['total_revenues'])
+ df = df.dropna(subset=["total_revenues"])
# Compute percentage of revenue
- df['total_revenues_%'] = df['total_revenues'] / df['total_revenues'].sum()
-
- # Convert DataFrame to dictionnary
- data = df.to_dict()
-
- return data
-
-
-def display_jurisdictions_top_revenue(df: pd.DataFrame, company: str, year: int):
- """Display jurisdictions by percentage of total revenues in an
- horizontal bar chart.
-
- Args:
- df (pd.DataFrame): CbCRs database.
- company (str): Company name
- year (int): fiscal year.
- """
-
- # Compute data
- data = compute_top_jurisdictions_revenue(df=df, company=company, year=year)
-
- # Create DataFrame
- df = pd.DataFrame.from_dict(data)
- df = df.sort_values(by='total_revenues_%')
+ df["total_revenues_%"] = df["total_revenues"] / df["total_revenues"].sum()
- # Bar color sequence
- bar_color = '#D9D9D9'
+ # Sort jurisdictions by percentage of total revenues
+ df = df.sort_values(by="total_revenues_%")
# Create figure
fig = px.bar(
df,
- x='total_revenues_%',
- y='jur_name',
- orientation='h',
- color_discrete_sequence=[bar_color],
- text_auto='.1%'
+ x="total_revenues_%",
+ y="jur_name",
+ orientation="h",
+ text_auto=".1%",
+ color_discrete_sequence=COLOR_SEQUENCE,
)
# Set figure height (min. 480) depending on the number of jurisdictions
- fig_height = max(480, (48 * len(df['jur_name'])))
+ fig_height = max(480, (48 * len(df["jur_name"])))
+
+ # Update axis layout
+ fig.update_xaxes(title="Percentage of total revenue", tickformat=".0%")
+ fig.update_yaxes(title=None)
# Update layout settings
- fig.update_layout(
- font_family='Roboto',
- xaxis=dict(
- title='Percentage of total revenue',
- tickformat='.0%'
- ),
- yaxis_title=None,
- plot_bgcolor='white',
- height=fig_height,
- margin=dict(l=0, r=0, t=0, b=0)
- )
+ fig.update_layout(template=custom_template, height=fig_height)
# Define position of text values
values_positions = [
- 'outside' if value <= 0.05 else 'inside' for value in df['total_revenues_%']]
+ "outside" if value <= 0.05 else "inside" for value in df["total_revenues_%"]
+ ]
- fig.update_traces(
- textangle=0,
- textposition=values_positions,
- selector=dict(name='')
- )
+ fig.update_traces(textangle=0, textposition=values_positions, selector=dict(name=""))
# Define style of hover on bars
fig.update_traces(
- hovertemplate=(
- "%{hovertext}
% revenue: %{x:.3%}
"
- ),
- hovertext=df['jur_name']
+ hovertemplate=("%{hovertext}
% revenue: %{x:.3%}
"),
+ hovertext=df["jur_name"],
)
return go.Figure(fig)
-# Viz 15
-def compute_pretax_profit_and_employees_rank(
- df: pd.DataFrame, company: str, year: int) -> pd.DataFrame:
- """Compute jurisdictions percentage of profit before tax and percentage
- of employees and rank by percentage of profit.
+# Viz 15 : company’s % pre-tax profit and % employees by jurisdiction
+def pretax_profit_and_employees_rank(df: pd.DataFrame, company: str, year: int) -> go.Figure:
+ """Compute and plot jurisdictions percentage of profit before tax and percentage of employees then rank by
+ percentage of profit.
Args:
df (pd.DataFrame): CbCRs database.
company (str): Company name
- year (int): fiscal year.
+ year (int): Fiscal year.
Returns:
- dict: rank of jurisdictions with percentage of profit before and percentage
- of employees.
+ go.Figure:: rank of jurisdictions with percentage of profit before and percentage of employees in a Plotly
+ figure.
"""
# Filter rows with selected company/year and subset with necessary features
- features = ['jur_name', 'profit_before_tax', 'employees']
- df = df.loc[(df['mnc'] == company) & (df['year'] == year), features]
+ features = ["jur_name", "profit_before_tax", "employees"]
+ df = df.loc[(df["mnc"] == company) & (df["year"] == year), features]
# Keep only profitable jurisdictions
- df = df.loc[df['profit_before_tax'] >= 0]
+ df = df.loc[df["profit_before_tax"] >= 0]
# Sort jurisdictions by profits
- df = df.sort_values(by='profit_before_tax').reset_index(drop=True)
+ df = df.sort_values(by="profit_before_tax").reset_index(drop=True)
# Calculate percentages
- df['profit_before_tax_%'] = df['profit_before_tax'] / df['profit_before_tax'].sum()
- df['employees_%'] = df['employees'] / df['employees'].sum()
- df = df.drop(columns=['profit_before_tax', 'employees'])
-
- # data = df.to_dict()
-
- return df
-
-
-def display_pretax_profit_and_employees_rank(
- df: pd.DataFrame, company: str, year: int) -> go.Figure:
- """Display rank of jurisdictions by percentage of profit before and percentage
- of employees.
-
- Args:
- df (pd.DataFrame): CbCRs database.
- company (str): Company name
- year (int): fiscal year.
- """
-
- # Compute data
- df = compute_pretax_profit_and_employees_rank(df=df, company=company, year=year)
-
- # Create DataFrame
- # df = pd.DataFrame(data)
+ df["profit_before_tax_%"] = df["profit_before_tax"] / df["profit_before_tax"].sum()
+ df["employees_%"] = df["employees"] / df["employees"].sum()
+ df = df.drop(columns=["profit_before_tax", "employees"])
# Rename columns
- df = df.rename(columns={
- 'profit_before_tax_%': '% profit',
- 'employees_%': '% employees'
- })
-
- # Bar color sequence
- bar_colors = ['#D9D9D9', '#1E2E5C']
+ df = df.rename(columns={"profit_before_tax_%": "% profit", "employees_%": "% employees"})
# Create figure
fig = px.bar(
df,
- x=['% employees', '% profit'],
- y='jur_name',
- barmode='group',
- orientation='h',
- text_auto='.1%',
- color_discrete_sequence=bar_colors
+ x=["% employees", "% profit"],
+ y="jur_name",
+ barmode="group",
+ orientation="h",
+ text_auto=".1%",
+ color_discrete_sequence=COLOR_SEQUENCE,
)
# Set figure height (min. 640) depending on the number of jurisdictions
- fig_height = max(480, (48 * len(df['jur_name'])))
+ fig_height = max(480, (48 * len(df["jur_name"])))
# Set maximum value for x axis
- if not df[['% profit', '% employees']].isna().all().all():
- max_x_value = max(df[['% profit', '% employees']].max(axis='columns')) + 0.1
+ if not df[["% profit", "% employees"]].isna().all().all():
+ max_x_value = max(df[["% profit", "% employees"]].max(axis="columns")) + 0.1
else:
max_x_value = 1
+ # Update axis layout
+ fig.update_xaxes(title=None, tickformat=".0%", range=[0, max_x_value])
+ fig.update_yaxes(title=None)
+
# Update layout settings
fig.update_layout(
- font_family='Roboto',
- title=None,
- xaxis=dict(
- title=None,
- tickformat='.0%',
- range=[0, max_x_value]
- ),
- yaxis_title=None,
legend=dict(
- x=0.1,
- y=1.05,
- xanchor='center',
- yanchor='top',
- title=dict(text=''),
- orientation='h'
+ orientation="h", yanchor="bottom", y=1.01, xanchor="left", x=0, title=dict(text="")
),
- plot_bgcolor='white',
+ template=custom_template,
height=fig_height,
- margin=dict(l=0, r=0, t=10, b=0)
)
# Add annotations for NaN values where there should have been a bar
for index, row in df.iterrows():
- if pd.isna(row['% employees']):
+ if pd.isna(row["% employees"]):
fig.add_annotation(
- xanchor='left',
+ xanchor="left",
x=0.001,
y=df.index[index],
yshift=-10,
- text='Information not provided',
+ text="Information not provided",
showarrow=False,
- font=dict(size=12)
+ font=dict(size=12),
)
- if pd.isna(row['% profit']):
+ if pd.isna(row["% profit"]):
fig.add_annotation(
- xanchor='left',
+ xanchor="left",
x=0.001,
y=df.index[index],
yshift=10,
- text='Information not provided',
+ text="Information not provided",
showarrow=False,
- font=dict(size=12)
+ font=dict(size=12),
)
# Loop through each bar trace and hide the text if the value is NaN
for trace in fig.data:
values = df[trace.name]
- text_position = ['outside' if not np.isnan(value) else 'none' for value in values]
+ text_position = ["outside" if not np.isnan(value) else "none" for value in values]
trace.textposition = text_position
- if trace.name == '% employees':
- trace.hovertemplate = '%{y}
Employees : %{x:.3%}'
- elif trace.name == '% profit':
- trace.hovertemplate = '%{y}
Profit : %{x:.3%}'
+ if trace.name == "% employees":
+ trace.hovertemplate = "%{y}
Employees : %{x:.3%}"
+ elif trace.name == "% profit":
+ trace.hovertemplate = "%{y}
Profit : %{x:.3%}"
return go.Figure(fig)
-# Viz 16
-def compute_pretax_profit_and_profit_per_employee(
- df: pd.DataFrame, company: str, year: int) -> pd.DataFrame:
+# Viz 16 : company’s % pre-tax profit and profit per employee
+def pretax_profit_and_profit_per_employee(
+ df: pd.DataFrame, company: str, year: int
+) -> go.Figure:
+ """Compute and plot jurisdictions percentage of profit before tax and profit by employee.
+
+ Args:
+ df (pd.DataFrame): CbCRs database.
+ company (str): Company name
+ year (int): Fiscal year.
+
+ Returns:
+ go.Figure: Percentage of profit and profit/employee in a Plotly Figure.
+ """
+
# Filter rows with selected company/year and subset with necessary features
- features = ['jur_name', 'profit_before_tax', 'employees', 'jur_tax_haven']
- df = df.loc[(df['mnc'] == company) & (df['year'] == year), features]
+ features = ["jur_name", "profit_before_tax", "employees", "jur_tax_haven"]
+ df = df.loc[(df["mnc"] == company) & (df["year"] == year), features]
# Keep only profitable jurisdictions
- df = df.loc[df['profit_before_tax'] >= 0]
+ df = df.loc[df["profit_before_tax"] >= 0]
# Sort jurisdictions by profits
- df = df.sort_values(by='profit_before_tax').reset_index(drop=True)
+ df = df.sort_values(by="profit_before_tax").reset_index(drop=True)
# Replace 0 employees by 1
- df.loc[df['employees'] == 0, 'employees'] = 1
+ df.loc[df["employees"] == 0, "employees"] = 1
# Calculate percentages
- df['profit_before_tax_%'] = df['profit_before_tax'] / df['profit_before_tax'].sum()
- df['profit_per_employee'] = df['profit_before_tax'] / df['employees']
- df = df.drop(columns=['profit_before_tax', 'employees'])
-
- # print('compute_pretax_profit_and_profit_per_employee df.head():\n', df.head())
- # data = df.to_dict()
-
- return df
-
-
-def display_pretax_profit_and_profit_per_employee(df: pd.DataFrame, company: str, year: int) -> go.Figure:
- # Compute data
- df = compute_pretax_profit_and_profit_per_employee(df=df, company=company, year=year)
-
- # Create DataFrame
- # df = pd.DataFrame(data)
+ df["profit_before_tax_%"] = df["profit_before_tax"] / df["profit_before_tax"].sum()
+ df["profit_per_employee"] = df["profit_before_tax"] / df["employees"]
+ df = df.drop(columns=["profit_before_tax", "employees"])
# Replace bool values of Tax haven by string values
- df['jur_tax_haven'] = df['jur_tax_haven'].map({True: 'Tax haven', False: 'Non tax haven'})
-
+ df["jur_tax_haven"] = df["jur_tax_haven"].map({True: "Tax haven", False: "Non tax haven"})
+
# Create figure
fig = px.scatter(
df,
- x='profit_before_tax_%',
- y='profit_per_employee',
- size='profit_before_tax_%',
- color='jur_tax_haven',
+ x="profit_before_tax_%",
+ y="profit_per_employee",
+ size="profit_before_tax_%",
+ color="jur_tax_haven",
color_discrete_sequence=COLOR_SEQUENCE,
- custom_data=['jur_name']
+ custom_data=["jur_name"],
)
+ # Update axis layout
+ fig.update_xaxes(title="Percentage of profit", tickformat=".0%")
+ fig.update_yaxes(title="Profit per employee")
+
# Update layout settings
fig.update_layout(
- title=None,
- font_family='Roboto',
- autosize=True,
- height=360,
- xaxis=dict(
- title='% profit',
- tickformat='.0%',
- ),
- yaxis=dict(
- title='Profit/employee',
- ),
legend=dict(
- x=0.1,
- y=1.05,
- xanchor='center',
- yanchor='top',
- title=dict(text=''),
- orientation='h'),
- plot_bgcolor='white',
- margin=dict(l=0, r=0, t=0, b=0)
+ orientation="h", yanchor="bottom", y=1.01, xanchor="left", x=0, title=dict(text="")
+ ),
+ template=custom_template,
+ height=380,
)
-
-
+
# Define hover
fig.update_traces(
hovertemplate=f"{company} reports %{{x:.1%}} of profit and %{{y:.3s}}€ profits per employee in %{{customdata[0]}}"
)
-
+
return go.Figure(fig)
-# Viz 18
+# Viz 17 : company’s % pre-tax profit and % employees in TH vs domestic vs non TH
+# TODO add code
+
-def compute_related_and_unrelated_revenues_breakdown(
- df: pd.DataFrame, company: str, year: int) -> dict:
- """Compute related and unrelated revenues in tax heaven, non tax heaven and
+# Viz 18 : breakdown of revenue between related party and unrelated party in TH vs domestic vs non TH
+def related_and_unrelated_revenues_breakdown(
+ df: pd.DataFrame, company: str, year: int
+) -> go.Figure:
+ """Compute and plot related and unrelated revenues in tax heaven, non tax heaven and
domestic jurisdictions.
Args:
@@ -897,101 +679,75 @@ def compute_related_and_unrelated_revenues_breakdown(
year (int): fiscal year to filter the results with.
Returns:
- dict: revenues percentage for different type of jurisdictions.
+ go.Figure: related and unrelated revenues in a Plotly Figure.
"""
# Filter rows with selected company/year and subset with necessary features
- features = ['upe_code', 'jur_code', 'jur_name', 'jur_tax_haven',
- 'unrelated_revenues', 'related_revenues']
+ features = [
+ "upe_code",
+ "jur_code",
+ "jur_name",
+ "jur_tax_haven",
+ "unrelated_revenues",
+ "related_revenues",
+ ]
- df = df.loc[(df['mnc'] == company) & (df['year'] == year), features]
+ df = df.loc[(df["mnc"] == company) & (df["year"] == year), features]
# Drop rows where either unrelated or related revenues are missing
- df = df.dropna(subset=['unrelated_revenues', 'related_revenues'])
+ df = df.dropna(subset=["unrelated_revenues", "related_revenues"])
# 'total_revenues' is recreated using related and unrelated revenues since the one
# reported by companies is not always reliable
- df['total_revenues'] = df['unrelated_revenues'] + df['related_revenues']
+ df["total_revenues"] = df["unrelated_revenues"] + df["related_revenues"]
# Create a column to check if 'jur_code' is the domestic country
- df['domestic'] = df.apply(lambda row: row['jur_code'] == row['upe_code'], axis='columns')
+ df["domestic"] = df.apply(lambda row: row["jur_code"] == row["upe_code"], axis="columns")
# Compute kpis in a new DataFrame
data = pd.DataFrame()
- data['tax_haven'] = df.loc[df['jur_tax_haven'] == True, ['unrelated_revenues', 'related_revenues']].sum()
- data['non_tax_haven'] = df.loc[df['jur_tax_haven'] == False, ['unrelated_revenues', 'related_revenues']].sum()
- data['domestic'] = df.loc[df['domestic'] == True, ['unrelated_revenues', 'related_revenues']].sum()
+ data["tax_haven"] = df.loc[
+ df["jur_tax_haven"], ["unrelated_revenues", "related_revenues"]
+ ].sum()
+ data["non_tax_haven"] = df.loc[
+ ~df["jur_tax_haven"], ["unrelated_revenues", "related_revenues"]
+ ].sum()
+ data["domestic"] = df.loc[df["domestic"], ["unrelated_revenues", "related_revenues"]].sum()
# Replace values with share (%) of 'unrelated/related revenues'
- data = data.div(data.sum(axis='rows'), axis='columns')
+ data = data.div(data.sum(axis="rows"), axis="columns")
# Rename indexes
- data = data.rename(index={
- 'unrelated_revenues': 'unrelated_revenues_percentage',
- 'related_revenues': 'related_revenues_percentage'
- })
-
- # Convert DataFrame to dictionary
- data = data.to_dict()
-
- return data
-
-
-def display_related_and_unrelated_revenues_breakdown(df: pd.DataFrame, company: str, year: int) -> tuple[pd.DataFrame, go.Figure]:
- """Display related and unrelated revenues in tax heaven, non tax heaven and
- domestic jurisdictions.
-
- Args:
- df (pd.DataFrame): CbCRs database.
- company (str): Company name
- year (int): fiscal year to filter the results with.
- """
-
- # Compute data
- data = compute_related_and_unrelated_revenues_breakdown(df=df, company=company, year=year)
-
- # Create DataFrame
- df = pd.DataFrame.from_dict(data, orient='index')
-
- # Rename columns and indexes
- df.columns = df.columns.str.replace('_', ' ').str.capitalize()
- df.index = df.index.str.replace('_', ' ').str.capitalize()
+ data = data.rename(
+ index={
+ "unrelated_revenues": "Unrelated revenues",
+ "related_revenues": "Related revenues",
+ }
+ )
# Create figure
fig = px.bar(
df,
- x=['Unrelated revenues percentage', 'Related revenues percentage'],
+ x=["Unrelated revenues", "Related revenues"],
y=df.index,
- orientation='h',
- text_auto='.0%'
+ orientation="h",
+ text_auto=".0%",
)
+ # Update axis layout
+ fig.update_xaxes(title=None, tickformat=".0%")
+ fig.update_yaxes(title=None)
+
# Update layout settings
fig.update_layout(
- title='Breakdown of revenue',
- xaxis=dict(
- title=None,
- tickformat='.0%'
- ),
- yaxis_title=None,
- legend=dict(
- title=dict(text=''),
- orientation='h'
- ),
- plot_bgcolor='white',
- width=800,
- height=480
+ legend=dict(title=dict(text=""), orientation="h"), template=custom_template
)
# Define position of text values
- for col in ['Unrelated revenues percentage', 'Related revenues percentage']:
- values_positions = ['outside' if value <= 0.05 else 'inside' for value in df[col]]
+ for col in ["Unrelated revenues", "Related revenues"]:
+ values_positions = ["outside" if value <= 0.05 else "inside" for value in df[col]]
- fig.update_traces(
- textangle=0,
- textposition=values_positions,
- selector=dict(name=col)
- )
+ fig.update_traces(textangle=0, textposition=values_positions, selector=dict(name=col))
# Add annotation if no values are availables (no bar displayed)
for i, index in enumerate(df.index):
@@ -999,17 +755,101 @@ def display_related_and_unrelated_revenues_breakdown(df: pd.DataFrame, company:
fig.add_annotation(
x=0.5,
y=df.index[i],
- text='No information to display',
+ text="No information to display",
showarrow=False,
- font=dict(size=13)
+ font=dict(size=13),
)
- # fig.show()
- return pd.DataFrame.from_dict(data, orient='index'), go.Figure(fig)
+ return go.Figure(fig)
-# Viz 21 - evolution of tax havens use over time : % profit vs % employees in TH over time
-def compute_tax_havens_use_evolution(df: pd.DataFrame, company: str) -> dict:
+# Viz 19 : what are the tax havens being used by the company
+def tax_haven_used_by_company(df_selected_company):
+ company_upe_code = df_selected_company["upe_code"].unique()[0]
+ pc_list = ["employees", "profit_before_tax", "related_revenues"]
+ # grouper = df_selected_company.groupby('jur_name')
+
+ df = pd.DataFrame(df_selected_company)
+
+ df_domestic_company = df[df["jur_code"] == company_upe_code]
+ df_selected_company_th = df[df["jur_tax_haven"] != "not.TH"]
+ df_selected_company_nth = df[df["jur_tax_haven"] == "not.TH"]
+
+ for col in pc_list:
+ df.insert(
+ len(df_selected_company.columns),
+ col + "_domestic_sum",
+ df_domestic_company[col].sum(),
+ )
+
+ df.insert(
+ len(df_selected_company.columns), col + "_th_sum", df_selected_company_th[col].sum()
+ )
+
+ df.insert(len(df.columns), col + "_nth_sum", df_selected_company_nth[col].sum())
+
+ df.insert(len(df.columns), col + "_sum", df_selected_company[col].sum())
+
+ df.insert(len(df.columns), col + "_pc", 100 * df[col] / df[col + "_sum"])
+ # df_selected_company[col + '_pc'] = 100 * df_selected_company[col] / df_selected_company[col+'_sum']
+
+ df_selected_company_th = df[df["jur_tax_haven"] != "not.TH"]
+ df_selected_company_th_agg = df_selected_company_th.groupby(["mnc", "jur_name"]).agg(
+ profit_before_tax=("profit_before_tax", "sum"),
+ profit_before_tax_pc=("profit_before_tax_pc", "sum"),
+ employees_pc=("employees_pc", "sum"),
+ employees=("employees", "sum"),
+ related_revenues_pc=("related_revenues_pc", "sum"),
+ )
+ df_selected_company_th_agg = df_selected_company_th_agg.reset_index()
+ df_selected_company_th_agg["profit per employee"] = (
+ df_selected_company_th_agg["profit_before_tax"]
+ / df_selected_company_th_agg["employees"]
+ )
+ df_selected_company_th_agg["profit per employee"] = df_selected_company_th_agg[
+ "profit per employee"
+ ].replace([np.inf, -np.inf], None)
+
+ return df_selected_company, df_selected_company_th_agg
+
+
+# Viz 20 : complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for
+# each (color code for tax havens)
+def company_table(df_selected_company):
+ # company_upe_code = df_selected_company['upe_code'].unique()[0]
+ pc_list = [
+ "employees",
+ "profit_before_tax",
+ "unrelated_revenues",
+ "related_revenues",
+ "total_revenues",
+ "tax_paid",
+ ]
+
+ df = pd.DataFrame(df_selected_company)
+ for col in pc_list:
+ if col + "_sum" not in df.columns:
+ df.insert(len(df.columns), col + "_sum", df[col].sum())
+
+ df.insert(len(df.columns), col + "_pc", 100 * df[col] / df[col + "_sum"])
+ # f_selected_company[col + '_sum'] = df_selected_company[col].sum()
+ # df_selected_company[col + '_pc'] = 100 * df_selected_company[col] / df_selected_company[col + '_sum']
+
+ # complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for each (color code for tax havens)
+ df_selected_company_by_jur = df.groupby(["mnc", "jur_name"]).agg(
+ related_revenues_pc=("related_revenues_pc", "sum"),
+ unrelated_revenues=("unrelated_revenues", "sum"),
+ total_revenues=("total_revenues", "sum"),
+ profit_before_tax=("profit_before_tax", "sum"),
+ employees_pc=("employees_pc", "sum"),
+ tax_paid=("tax_paid", "sum"),
+ tax_paid_pc=("tax_paid_pc", "sum"),
+ )
+ return df_selected_company_by_jur.reset_index()
+
+
+# Viz 21 : evolution of tax havens use over time : % profit vs % employees in TH over time
+def tax_havens_use_evolution(df: pd.DataFrame, company: str) -> go.Figure:
"""Compute the evolution of tax havens use by company over time.
Args:
@@ -1017,121 +857,100 @@ def compute_tax_havens_use_evolution(df: pd.DataFrame, company: str) -> dict:
company (str): Company name
Returns:
- dict: tax havens percentage of profits and employees for each year.
+ go.Figure: tax havens use evolution in a Plotly Figure.
"""
# Filter rows with selected company and subset with necessary features
- features = ['jur_code', 'year', 'jur_tax_haven', 'profit_before_tax', 'employees']
- df = df.loc[(df['mnc'] == company), features]
+ features = ["jur_code", "year", "jur_tax_haven", "profit_before_tax", "employees"]
+ df = df.loc[(df["mnc"] == company), features]
# Keep jurisdictions with profitable or missing revenues
- df = df.loc[(df['profit_before_tax'] >= 0) | (df['profit_before_tax'].isna())]
+ df = df.loc[(df["profit_before_tax"] >= 0) | (df["profit_before_tax"].isna())]
# For all sum calculations below :
# - Result NA : all jurisdictions values were NA ;
# - Result 0 : at least one jurisdiction was reported as 0.
# Calculate total profit and employees by year and tax haven status
- df = df.groupby(['year', 'jur_tax_haven'], as_index=False)[['profit_before_tax', 'employees']].sum(min_count=1)
+ df = df.groupby(["year", "jur_tax_haven"], as_index=False)[
+ ["profit_before_tax", "employees"]
+ ].sum(min_count=1)
# Calculate total profits and employees for each year
- for year in df['year'].unique():
- df.loc[df['year'] == year, 'total_profit'] = df.loc[df['year'] == year, 'profit_before_tax'].sum(min_count=1)
- df.loc[df['year'] == year, 'total_employees'] = df.loc[df['year'] == year, 'employees'].sum(min_count=1)
+ for year in df["year"].unique():
+ df.loc[df["year"] == year, "total_profit"] = df.loc[
+ df["year"] == year, "profit_before_tax"
+ ].sum(min_count=1)
+ df.loc[df["year"] == year, "total_employees"] = df.loc[
+ df["year"] == year, "employees"
+ ].sum(min_count=1)
# Remove non tax haven jurisdictions
- df = df.loc[df['jur_tax_haven'] == True].reset_index()
+ df = df.loc[df["jur_tax_haven"] == True].reset_index()
# Calculate percentages
- df['tax_havens_profit_%'] = df['profit_before_tax'] / df['total_profit']
- df['tax_havens_employees_%'] = df['employees'] / df['total_employees']
-
- # Convert necessary data to dictionnary
- data = df[['year', 'tax_havens_profit_%', 'tax_havens_employees_%']].to_dict()
-
- return data
-
-
-def display_tax_havens_use_evolution(df: pd.DataFrame, company: str):
- """Display the evolution of tax havens use by company over time.
-
- Args:
- df (pd.DataFrame): CbCRs database.
- company (str): Company name
- """
-
- # Compute data
- data = compute_tax_havens_use_evolution(df=df, company=company)
-
- # Create DataFrame
- df = pd.DataFrame.from_dict(data)
+ df["tax_havens_profit_%"] = df["profit_before_tax"] / df["total_profit"]
+ df["tax_havens_employees_%"] = df["employees"] / df["total_employees"]
# Rename columns
- df = df.rename(columns={
- 'tax_havens_profit_%': 'Percentage of profits in tax havens',
- 'tax_havens_employees_%': 'Percentage of employees in tax havens'
- })
+ df = df.rename(
+ columns={
+ "tax_havens_profit_%": "Percentage of profits in tax havens",
+ "tax_havens_employees_%": "Percentage of employees in tax havens",
+ }
+ )
# Create figure
fig = px.bar(
df,
- x='year',
- y=['Percentage of profits in tax havens', 'Percentage of employees in tax havens'],
- barmode='group',
- text_auto='.1%'
+ x="year",
+ y=["Percentage of profits in tax havens", "Percentage of employees in tax havens"],
+ barmode="group",
+ text_auto=".1%",
)
+ # Update axis layout
+ fig.update_xaxes(title=None)
+ fig.update_yaxes(title=None, tickformat=".0%")
# Update layout settings
fig.update_layout(
- title='Tax havens use in profitables jurisdictions',
- xaxis_title=None,
- yaxis_title=None,
- yaxis_tickformat='.0%',
- legend=dict(
- title=dict(text=''),
- orientation='h'
- ),
- plot_bgcolor='white',
- width=800,
- height=480
+ legend=dict(title=dict(text=""), orientation="h"), template=custom_template
)
- # fig.show()
return go.Figure(fig)
-# Viz 24
-def compute_number_of_tracked_mnc_available(df) -> dict:
- # Drop duplicates to ensure each MNC appears only once per year
- df_unique_mnc = df.drop_duplicates(subset=['year', 'mnc'])
+# Viz 22 : locations of profits booked vs. mean 3Y ETR
+# TODO add code
- # Group the DataFrame by 'mnc' and count the number of reports for each MNC
- df_reports_per_mnc = df_unique_mnc.groupby('mnc').size().reset_index(name='report_count')
- # Convert the DataFrame to a dictionary where MNCs are keys and report counts are values
- mnc_report_count = dict(zip(df_reports_per_mnc['mnc'], df_reports_per_mnc['report_count'], strict=False))
+# Viz 24 : mnc tracked
+def mnc_tracked(df: pd.DataFrame) -> go.Figure:
+ """Compute and plot the list of company name in a word cloud where the size of the font depends of the number
+ of reports available.
- return mnc_report_count
+ Args:
+ df (pd.DataFrame): CbCRs database.
+ Returns:
+ go.Figure: word cloud with company name in a Plotly figure.
+ """
-def display_number_of_tracked_mnc_available(df) -> go.Figure:
- mnc_report_count = compute_number_of_tracked_mnc_available(df=df)
+ # Create dictionnary with company name as key and the number of reports as value
+ data = df.groupby("mnc")["year"].nunique().to_dict()
color_func = get_single_color_func("#B8BEDB")
# Generate the word cloud using the report counts as weights
wordcloud = WordCloud(
- width=1200,
- height=800,
- background_color='white',
- color_func=color_func
- ).generate_from_frequencies(mnc_report_count)
+ width=1200, height=800, background_color="white", color_func=color_func
+ ).generate_from_frequencies(data)
# Display the word cloud
fig = px.imshow(wordcloud)
# Remove hover on image
- fig.update_traces(hoverinfo='skip', hovertemplate='')
+ fig.update_traces(hoverinfo="skip", hovertemplate="")
# Remove colorbar
fig.update_layout(coloraxis_showscale=False)
@@ -1146,20 +965,20 @@ def display_number_of_tracked_mnc_available(df) -> go.Figure:
return go.Figure(fig)
-# Viz 25
+# Viz 25 : company’s average transparency score
# List financial columns
financial_columns = [
- 'total_revenues',
- 'profit_before_tax',
- 'tax_paid',
- 'tax_accrued',
- 'unrelated_revenues',
- 'related_revenues',
- 'stated_capital',
- 'accumulated_earnings',
- 'tangible_assets',
- 'employees'
+ "total_revenues",
+ "profit_before_tax",
+ "tax_paid",
+ "tax_accrued",
+ "unrelated_revenues",
+ "related_revenues",
+ "stated_capital",
+ "accumulated_earnings",
+ "tangible_assets",
+ "employees",
]
@@ -1177,12 +996,12 @@ def compute_geographic_score(df: pd.DataFrame, company: str, year: int) -> float
# Filter rows with selected company and subset with financial columns
df = df.loc[
- (df['mnc'] == company) & (df['year'] == year),
- ['mnc', 'year', 'upe_code', 'jur_code', 'jur_name', *financial_columns]
+ (df["mnc"] == company) & (df["year"] == year),
+ ["mnc", "year", "upe_code", "jur_code", "jur_name", *financial_columns],
]
# Remove columns where data are missing for all jurisdictions
- df = df.dropna(axis='columns', how='all')
+ df = df.dropna(axis="columns", how="all")
# List financial columns left after deleting columns with only missing values
financial_columns_left = [col for col in df.columns if col in financial_columns]
@@ -1197,8 +1016,8 @@ def compute_geographic_score(df: pd.DataFrame, company: str, year: int) -> float
# Calculate percentage of each financial value where jurisdiction is 'OTHER'
# Percentage = 1. Total of 'OTHER' row(s) / 2. Total of all rows
other_percentage = (
- df.loc[df['jur_code'] == 'OTHER', financial_columns_left].sum() # 1
- / df[financial_columns_left].sum() # 2
+ df.loc[df["jur_code"] == "OTHER", financial_columns_left].sum() # 1
+ / df[financial_columns_left].sum() # 2
)
# Calculate geographic score
@@ -1222,12 +1041,12 @@ def compute_completeness_score(df: pd.DataFrame, company: str, year: int) -> flo
# Filter rows with selected company and subset with financial columns
df = df.loc[
- (df['mnc'] == company) & (df['year'] == year),
- ['mnc', 'year', 'upe_code', 'jur_code', 'jur_name', *financial_columns]
+ (df["mnc"] == company) & (df["year"] == year),
+ ["mnc", "year", "upe_code", "jur_code", "jur_name", *financial_columns],
]
# Remove columns where data are missing for all jurisdictions
- df = df.dropna(axis='columns', how='all')
+ df = df.dropna(axis="columns", how="all")
# List financial columns left after deleting columns with only missing values
financial_columns_left = [col for col in df.columns if col in financial_columns]
@@ -1243,7 +1062,7 @@ def compute_completeness_score(df: pd.DataFrame, company: str, year: int) -> flo
score = len(financial_columns_left)
- for variable in ['profit_before_tax', 'tax_paid']:
+ for variable in ["profit_before_tax", "tax_paid"]:
if variable in df.columns:
score += 1
@@ -1267,12 +1086,12 @@ def compute_transparency_score(df: pd.DataFrame, company: str, year: int) -> flo
# Filter rows with selected company and subset with financial columns
df = df.loc[
- (df['mnc'] == company) & (df['year'] == year),
- ['mnc', 'year', 'upe_code', 'jur_code', 'jur_name', *financial_columns]
+ (df["mnc"] == company) & (df["year"] == year),
+ ["mnc", "year", "upe_code", "jur_code", "jur_name", *financial_columns],
]
# Remove columns where data are missing for all jurisdictions
- df = df.dropna(axis='columns', how='all')
+ df = df.dropna(axis="columns", how="all")
# List financial columns left after deleting columns with only missing values
financial_columns_left = [col for col in df.columns if col in financial_columns]
@@ -1287,8 +1106,8 @@ def compute_transparency_score(df: pd.DataFrame, company: str, year: int) -> flo
# Calculate percentage of each financial value where jurisdiction is not 'OTHER'
# Percentage = 1. Total of not 'OTHER' row(s) / 2. Total of all rows
not_other_percentage = (
- df.loc[df['jur_code'] != 'OTHER', financial_columns_left].sum() # 1
- / df[financial_columns_left].sum() # 2
+ df.loc[df["jur_code"] != "OTHER", financial_columns_left].sum() # 1
+ / df[financial_columns_left].sum() # 2
)
# Calculate transparency score
@@ -1310,7 +1129,7 @@ def compute_all_scores(df: pd.DataFrame, company: str) -> dict:
"""
# List all years when the company as reported
- years_list = sorted(df.loc[df['mnc'] == company, 'year'].unique())
+ years_list = sorted(df.loc[df["mnc"] == company, "year"].unique())
# Initialize an empty dictionary
data = dict()
@@ -1323,17 +1142,16 @@ def compute_all_scores(df: pd.DataFrame, company: str) -> dict:
transparency_score = compute_transparency_score(df=df, company=company, year=year)
data[year] = {
- 'mnc': company,
- 'geographic_score': geographic_score,
- 'completeness_score': completeness_score,
- 'transparency_score': transparency_score
+ "mnc": company,
+ "geographic_score": geographic_score,
+ "completeness_score": completeness_score,
+ "transparency_score": transparency_score,
}
return data
-def transparency_scores_to_csv(
- df: pd.DataFrame, csv_path: str = './') -> pd.DataFrame:
+def transparency_scores_to_csv(df: pd.DataFrame, csv_path: str = "./") -> pd.DataFrame:
"""Compute transparency score for all companies and all years into a
DataFrame and export it to a csv file (optional).
@@ -1346,30 +1164,29 @@ def transparency_scores_to_csv(
"""
# List all companies
- mnc_list = df['mnc'].unique()
+ mnc_list = df["mnc"].unique()
# Initialize an empty DataFrame
mnc_df = pd.DataFrame()
# Calculate transparency scores for all companies and add them to the DataFrame
for mnc in mnc_list:
- temp_df = pd.DataFrame.from_dict(
- compute_all_scores(df=df, company=mnc), orient='index')
+ temp_df = pd.DataFrame.from_dict(compute_all_scores(df=df, company=mnc), orient="index")
mnc_df = pd.concat([mnc_df, temp_df])
# Reset index and move 'mnc' columns in first position
- mnc_df = mnc_df.reset_index().rename(columns={'index': 'year'})
- mnc_df.insert(0, 'mnc', mnc_df.pop('mnc'))
+ mnc_df = mnc_df.reset_index().rename(columns={"index": "year"})
+ mnc_df.insert(0, "mnc", mnc_df.pop("mnc"))
if csv_path:
- mnc_df.to_csv(csv_path + 'transparency_scores.csv', index=False)
+ mnc_df.to_csv(csv_path + "transparency_scores.csv", index=False)
return mnc_df
-def display_transparency_score(df: pd.DataFrame, company: str, year: int = None):
- """Display transparency score for specific company in a metric.
+def transparency_score(df: pd.DataFrame, company: str, year: int = None):
+ """Compute transparency score for specific company in a metric.
Args:
df (pd.DataFrame): CbCRs database.
@@ -1381,51 +1198,25 @@ def display_transparency_score(df: pd.DataFrame, company: str, year: int = None)
data = compute_all_scores(df=df, company=company)
# Create DataFrame
- df = pd.DataFrame.from_dict(data, orient='index')
+ df = pd.DataFrame.from_dict(data, orient="index")
# Reset index and move 'mnc' columns in first position
- df = df.reset_index().rename(columns={'index': 'year'})
+ df = df.reset_index().rename(columns={"index": "year"})
# When data are not filtered by year, the score is the average of all years
score = round(
- df.loc[df['year'] == year, 'transparency_score'].iloc[0] if year
- else df['transparency_score'].mean(),
- 0
- )
-
- # Create figure
- fig = go.Figure()
-
- # Add circular background
- fig.add_shape(
- type='circle',
- x0=0, y0=0, x1=1, y1=1,
- line_color='blue',
- fillcolor='blue',
- opacity=0.3
+ df.loc[df["year"] == year, "transparency_score"].iloc[0]
+ if year
+ else df["transparency_score"].mean(),
+ 0,
)
- # Add indicator
- fig.add_trace(go.Indicator(
- mode='number',
- value=score,
- number={'suffix': '%', 'valueformat': '.0f', 'font': {'size': 54}},
- domain={'x': [0, 1], 'y': [0, 1]}
- ))
-
- # Update layout
- fig.update_layout(
- width=360,
- height=360)
-
return score
-# Viz 26
-
+# Viz 26 : company’s transparency score over time + details for each component of the score
# Functions below use the same computation function (compute_all_scores) as used for Viz 25.
-
-def display_transparency_score_over_time(df: pd.DataFrame, company: str):
+def transparency_score_over_time(df: pd.DataFrame, company: str):
"""Display transparency scores over time for a specific company in a bar
chart.
@@ -1438,82 +1229,76 @@ def display_transparency_score_over_time(df: pd.DataFrame, company: str):
data = compute_all_scores(df=df, company=company)
# Create DataFrame
- df = pd.DataFrame.from_dict(data, orient='index')
+ df = pd.DataFrame.from_dict(data, orient="index")
# Reset index and move 'mnc' columns in first position
- df = df.reset_index().rename(columns={'index': 'year'})
+ df = df.reset_index().rename(columns={"index": "year"})
# Create figure
- fig = px.bar(df, x='year', y='transparency_score',
- text_auto='.0f'
- )
+ fig = px.bar(df, x="year", y="transparency_score", text_auto=".0f")
# Update layout settings
fig.update_layout(
- title='Transparency score over time',
- xaxis=dict(
- title=None,
- tickvals=df['year'].unique()
- ),
+ template=custom_template,
+ xaxis=dict(title=None, tickvals=df["year"].unique()),
yaxis=dict(
title=None,
showline=True,
- ticks='outside',
- linecolor='grey',
- tickcolor='grey',
+ ticks="outside",
+ linecolor="grey",
+ tickcolor="grey",
range=[0, 101],
tickvals=[0, 25, 50, 75, 100],
- ticktext=[0, '', '', '', 100]
+ ticktext=[0, "", "", "", 100],
),
- plot_bgcolor='white',
- width=800,
- height=480
)
# Force position and color of bar values
- fig.update_traces(
- textposition='outside', textfont=dict(color=fig.data[0].marker.color)
- )
+ fig.update_traces(textposition="outside", textfont=dict(color=fig.data[0].marker.color))
fig.show()
-def display_transparency_score_over_time_details(
- df: pd.DataFrame, company: str) -> pd.DataFrame:
- """Display details of components of transparency scores over time
- for a specific company in a table.
+def transparency_scores_over_time_details(df: pd.DataFrame, company: str) -> pd.DataFrame:
+ """Compute all geographic, completeness and general transparency scores over time for a specific company in a table.
Args:
df (pd.DataFrame): CbCRs database.
company (str): Company name.
Returns:
- pd.DataFrame: Table with details of components over years.
+ pd.DataFrame: Table with details of scores over years.
"""
# Compute data
data = compute_all_scores(df=df, company=company)
# Create DataFrame
- df = pd.DataFrame.from_dict(data, orient='index')
+ df = pd.DataFrame.from_dict(data, orient="index")
# Drop 'mnc' column
- df = df.drop(columns='mnc')
+ df = df.drop(columns="mnc")
# Round and convert percentage to string with '/100' annotation
- df = df.apply(lambda x: round(x).astype(int).astype('string') + '/100')
+ df = df.apply(lambda x: round(x).astype(int).astype("string") + "/100")
# Reset index and rename 'year' column
- df = df.reset_index().rename(columns={'index': 'Fiscal year'})
+ df = df.reset_index().rename(columns={"index": "Fiscal year"})
# Move 'transparency_score' before other score columns
- df.insert(1, 'transparency_score', df.pop('transparency_score'))
+ df.insert(1, "transparency_score", df.pop("transparency_score"))
# Rename columns
- df = df.rename(columns={
- 'geographic_score': 'Score on geographical disaggretion',
- 'completeness_score': 'Score on variable exhaustiveness',
- 'transparency_score': 'Transparency score',
- })
+ df = df.rename(
+ columns={
+ "geographic_score": "Score on geographical disaggretion",
+ "completeness_score": "Score on variable exhaustiveness",
+ "transparency_score": "Transparency score",
+ }
+ )
return df
+
+
+# Viz 27 : average transparency score over time
+# TODO add code
diff --git a/app/pages/company/company.md b/app/pages/company/company.md
index bc34ce9..a6823f7 100644
--- a/app/pages/company/company.md
+++ b/app/pages/company/company.md
@@ -129,15 +129,15 @@ Financial profile
<|part|class_name=viz-container|
<|{viz["fin_key_financials_kpis"].title}|text|class_name=text-weight400|>
-<|{viz["fin_key_financials_kpis"].sub_title}|text|class_name=text-small text-weight300 text-transparent|>
+<|{viz["fin_key_financials_kpis"].sub_title}|text|class_name=text-small text-weight300|>
<|{viz["fin_key_financials_kpis"].data}|table|show_all|sortable=False|dynamic=True|style=table-cell|class_name=rows-similar table-top|>
|>
<|part|class_name=viz-container|
-<|{viz["fin_jurisdictions_top_revenue"].title}|text|class_name=text-weight400|>
+<|{viz["fin_top_jurisdictions_revenue"].title}|text|class_name=text-weight400|>
-<|{viz["fin_jurisdictions_top_revenue"].sub_title}|text|class_name=text-small text-weight300|>
-<|chart|figure={viz["fin_jurisdictions_top_revenue"].fig}|>
+<|{viz["fin_top_jurisdictions_revenue"].sub_title}|text|class_name=text-small text-weight300|>
+<|chart|figure={viz["fin_top_jurisdictions_revenue"].fig}|>
|>
|>
diff --git a/app/pages/company/company.py b/app/pages/company/company.py
index 6871428..0ce5b11 100644
--- a/app/pages/company/company.py
+++ b/app/pages/company/company.py
@@ -35,7 +35,7 @@
"fin_transparency_score",
"fin_transparency_score_over_time_details",
"fin_key_financials_kpis",
- "fin_jurisdictions_top_revenue",
+ "fin_top_jurisdictions_revenue",
"fin_pretax_profit_and_employees_rank",
"fin_pretax_profit_and_profit_per_employee",
}
@@ -92,8 +92,8 @@ def update_state(state: State):
# print(f'company state selected_year:{state.selected_year}')
# Calculate number of reports for all companies
- state.df_count_company = algo.number_of_tracked_reports_over_time_company(
- state.df_selected_company
+ state.df_count_company = algo.number_of_tracked_reports(
+ state.df_selected_company, "mnc", state.selected_company
)
# print(f'company state df_count_company:{state.df_count_company.head()}')
@@ -128,7 +128,9 @@ def update_viz_company(state: State):
state.viz[id] = Viz(
id=id,
state=state,
- data=algo.number_of_tracked_reports_company(state.df_selected_company),
+ data=algo.number_of_tracked_reports(
+ state.df_selected_company, "mnc", state.selected_company
+ ),
title="Number of reports",
).to_state()
# print(f'update viz id:{id} title:{state.viz[id].title}')
@@ -137,7 +139,7 @@ def update_viz_company(state: State):
state.viz[id] = Viz(
id=id,
state=state,
- data=algo.display_transparency_score(state.data, state.selected_company),
+ data=algo.transparency_score(state.data, state.selected_company),
title="Transparency Score",
sub_title="average over all reports",
).to_state()
@@ -163,7 +165,7 @@ def update_viz_year(state: State):
state.viz[id] = Viz(
id=id,
state=state,
- data=algo.display_transparency_score(
+ data=algo.transparency_score(
state.data, state.selected_company, int(state.selected_year)
),
title="Transparency Score",
@@ -175,7 +177,7 @@ def update_viz_year(state: State):
state.viz[id] = Viz(
id=id,
state=state,
- data=algo.display_transparency_score_over_time_details(
+ data=algo.transparency_scores_over_time_details(
state.data, state.selected_company
),
title="Transparency score over time ",
@@ -187,7 +189,7 @@ def update_viz_year(state: State):
state.viz[id] = Viz(
id=id,
state=state,
- data=algo.display_company_key_financials_kpis(
+ data=algo.company_key_financials_kpis(
state.data, state.selected_company, int(state.selected_year)
),
title="Key metrics",
@@ -195,11 +197,11 @@ def update_viz_year(state: State):
).to_state()
# print(f'update viz id:{id} title:{state.viz[id].title}')
- id = "fin_jurisdictions_top_revenue"
+ id = "fin_top_jurisdictions_revenue"
state.viz[id] = Viz(
id=id,
state=state,
- fig=algo.display_jurisdictions_top_revenue(
+ fig=algo.top_jurisdictions_revenue(
state.data, state.selected_company, int(state.selected_year)
),
title="Distribution of revenues across countries",
@@ -212,7 +214,7 @@ def update_viz_year(state: State):
state.viz[id] = Viz(
id=id,
state=state,
- fig=algo.display_pretax_profit_and_employees_rank(
+ fig=algo.pretax_profit_and_employees_rank(
state.data, state.selected_company, int(state.selected_year)
),
title="% profit and employees by country",
@@ -224,7 +226,7 @@ def update_viz_year(state: State):
state.viz[id] = Viz(
id=id,
state=state,
- fig=algo.display_pretax_profit_and_profit_per_employee(
+ fig=algo.pretax_profit_and_profit_per_employee(
state.data, state.selected_company, int(state.selected_year)
),
title="% profit and profit / employee by country",
diff --git a/app/pages/home/home.md b/app/pages/home/home.md
index 0177c81..8ac7581 100644
--- a/app/pages/home/home.md
+++ b/app/pages/home/home.md
@@ -108,10 +108,10 @@ Our database is growing
|>
<|part|class_name=viz-container|
-<|{viz["general_number_of_tracked_mnc_available"].title}|text|class_name=text-weight400|>
+<|{viz["general_list_of_tracked_mnc_available"].title}|text|class_name=text-weight400|>
-<|{viz["general_number_of_tracked_mnc_available"].sub_title}|text|class_name=text-small text-weight300|>
-<|chart|figure={viz["general_number_of_tracked_mnc_available"].fig}|>
+<|{viz["general_list_of_tracked_mnc_available"].sub_title}|text|class_name=text-small text-weight300|>
+<|chart|figure={viz["general_list_of_tracked_mnc_available"].fig}|>
|>
|>
diff --git a/app/pages/home/home.py b/app/pages/home/home.py
index 3188cea..baeac96 100644
--- a/app/pages/home/home.py
+++ b/app/pages/home/home.py
@@ -29,7 +29,7 @@ def on_init(state: State):
"general_number_of_tracked_reports",
"general_number_of_tracked_reports_over_time",
"general_number_of_tracked_mnc",
- "general_number_of_tracked_mnc_available",
+ "general_list_of_tracked_mnc_available",
)
)
@@ -50,8 +50,7 @@ def update_viz(state: State):
state.viz[id] = Viz(
id=id,
state=state,
- data=algo.number_of_tracked_reports_over_time(state.data),
- fig=algo.display_number_of_tracked_reports_over_time(state.data),
+ fig=algo.number_of_tracked_reports_over_time(state.data),
title="Number of reports over time",
).to_state()
@@ -64,12 +63,11 @@ def update_viz(state: State):
sub_title="with 1+ report tracked",
).to_state()
- id = "general_number_of_tracked_mnc_available"
+ id = "general_list_of_tracked_mnc_available"
state.viz[id] = Viz(
id=id,
state=state,
- data=algo.compute_number_of_tracked_mnc_available(state.data),
- fig=algo.display_number_of_tracked_mnc_available(state.data),
+ fig=algo.mnc_tracked(state.data),
title="Multinationals available",
sub_title="with 1+ report tracked",
).to_state()