diff --git a/app/algo.py b/app/algo.py index 6f9354c..6e4df32 100644 --- a/app/algo.py +++ b/app/algo.py @@ -1,3 +1,8 @@ +""" +This module contains functions to compute and/or display the visualizations, defined by EU Tax Observatory, which +are needed in Taxplorer tool. Below functions will be used in different pages of the website. +""" + import pandas as pd import numpy as np import plotly.express as px @@ -5,89 +10,105 @@ import humanize from wordcloud import WordCloud, get_single_color_func + +# Define custom template +custom_template = { + "layout": { + "autosize": True, + "plot_bgcolor": "white", + "font": {"family": "Roboto, sans-serif"}, + "title": None, + "margin": dict(l=0, r=0, b=0, t=0), + } +} + + # Define color sequence for plots COLOR_SEQUENCE = ["#D9D9D9", "#1E2E5C"] -# TODO add viz comment -# Viz 1 - -def number_of_tracked_reports(df): - number_of_tracked_reports = len(df.groupby(["year", "mnc"])["mnc"]) - return number_of_tracked_reports +# Viz 1 : Number of tracked reports +def number_of_tracked_reports( + df: pd.DataFrame, filter_name: str = None, filter_value: str = None +) -> int: + """Calculate the number of tracked reports with possibility to filter on company name, sector or headquarter + location. -# TODO add viz comment -def number_of_tracked_reports_company(df_selected_company): - number_of_tracked_reports_company = len( - df_selected_company.groupby(["year"])["year"] - ) - return number_of_tracked_reports_company + Args: + df (pd.DataFrame): CbCRs database. + filter_name (str, optional): Filter to apply, could be "mnc", "sector" or "upe_name". Defaults to None. + filter_value (str, optional): Value to filter with. Defaults to None. + + Returns: + int: number of tracked reports. + """ + # Initialise available filters + filter_values = [None, "mnc", "sector", "upe_name"] -def number_of_tracked_mnc(df: pd.DataFrame) -> int: - return df["mnc"].nunique() + # Raise an error if "filter_value" not in list + if filter_name not in filter_values: + raise ValueError(f"Filter '{filter_name}' is not a valid filter.") + # Compute number of reports + if filter_name: + n_reports = ( + df.loc[df[filter_name] == filter_value].groupby("mnc")["year"].nunique().sum() + ) + else: + n_reports = df.groupby("mnc")["year"].nunique().sum() -# TODO add viz comment -def number_of_tracked_reports_sector(df_selected_sector): - number_of_tracked_reports_sector = len( - df_selected_sector.groupby(["year", "mnc"])["year"] - ) - return number_of_tracked_reports_sector + return int(n_reports) -# TODO add viz comment -def number_of_tracked_reports_country(df_selected_country): - number_of_tracked_reports_country = len( - df_selected_country.groupby(["year", "mnc"])["year"] - ) - return number_of_tracked_reports_country +# Viz 2 : Number of tracked reports over time +def number_of_tracked_reports_over_time( + df: pd.DataFrame, filter_name: str = None, filter_value: str = None +) -> go.Figure: + """Compute and plot the number of tracked reports over time with possibility to filter on company name, sector or + headquarter location. + Args: + df (pd.DataFrame): CbCRs database. + filter_name (str, optional): Filter to apply, could be "mnc", "sector" or "upe_name". Defaults to None. + filter_value (str, optional): Value to filter with. Defaults to None. -# TODO add viz comment -# Viz 2 - Number of tracked reports over time -def number_of_tracked_reports_over_time(df): - df_count = df.groupby(["year"])["mnc"].nunique().reset_index() - return df_count + Returns: + go.Figure: number of tracked reports over time in a Plotly figure. + """ + # Initialise available filters + filter_values = [None, "mnc", "sector", "upe_name"] -def display_number_of_tracked_reports_over_time(df): - # Calculate number of companies per year - data = number_of_tracked_reports_over_time(df=df) + # Raise an error if "filter_value" not in list + if filter_name not in filter_values: + raise ValueError(f"Filter '{filter_name}' is not a valid filter.") - # Bar color sequence - bar_color = '#D9D9D9' + # Compute number of reports + if filter_name: + data = ( + df.loc[df[filter_name] == filter_value] + .groupby("year")["mnc"] + .nunique() + .reset_index() + ) + else: + data = df.groupby("year")["mnc"].nunique().reset_index() # Create figure fig = px.bar( - data, - x='year', - y='mnc', - color_discrete_sequence=[bar_color], - text_auto=True + data, x="year", y="mnc", text_auto=True, color_discrete_sequence=COLOR_SEQUENCE ) # Force position and color of bar values - fig.update_traces( - textposition='outside', textfont=dict(color='black') - ) + fig.update_traces(textposition="outside", textfont=dict(color="black")) + + # Define axes settings + fig.update_xaxes(title=None, tickvals=data["year"].unique()) + fig.update_yaxes(title=None, visible=False, range=[0, data["mnc"].max() * 1.1]) # Update layout settings - fig.update_layout( - autosize=True, - height=360, - font_family='Roboto', - title=None, - xaxis=dict( - title=None, - tickvals=data['year'].unique() - ), - yaxis=dict( - title=None, - visible=False, - ), - plot_bgcolor='white', - margin=dict(l=0, r=0, b=0, t=0) - ) + fig.update_layout(template=custom_template, height=360) # Define style of hover on bars fig.update_traces( @@ -97,328 +118,214 @@ def display_number_of_tracked_reports_over_time(df): return go.Figure(fig) -# TODO add viz comment -def number_of_tracked_reports_over_time_company(df_selected_company): - df_count_company = ( - df_selected_company.groupby(["year"])["mnc"].nunique().reset_index() - ) - # df_count_all_company = df.groupby(["year"])["mnc"].nunique().reset_index() - - # row[3].line_chart(df_count_all_company, x="year", y="mnc") - - # row[4].write("selected sector") - # row[4].write( - # "df_selected_sector.groupby(['year'])['mnc'].nunique().reset_index()" - # ) - return df_count_company - - -# TODO add viz comment -def number_of_tracked_reports_over_time_sector(df_selected_sector): - df_count_sector = ( - df_selected_sector.groupby(["year"])["mnc"].nunique().reset_index() - ) - - # df_count_all_sector = ( - # df.groupby(["year", "sector"])["mnc"].nunique().reset_index() - # ) - - # row[4].line_chart(df_count_all_sector, x="year", y="mnc", color="sector") - - # row[5].write("selected country") - # row[5].write( - # "df_selected_country.groupby(['year'])['mnc'].nunique().reset_index()" - # ) - return df_count_sector +# Viz 3 : Number of tracked mnc +def number_of_tracked_mnc( + df: pd.DataFrame, filter_name: str = None, filter_value: str = None +) -> int: + """Calculate the number of tracked reports with possibility to filter on company name, sector or headquarter + location. + Args: + df (pd.DataFrame): CbCRs database. + filter_name (str, optional): Filter to apply, could be "sector" or "upe_name". Defaults to None. + filter_value (str, optional): Value to filter with. Defaults to None. -# TODO add viz comment -def number_of_tracked_reports_over_time_country(df_selected_country): - df_count_country = ( - df_selected_country.groupby(["year"])["mnc"].nunique().reset_index() - ) - # df_count_all_country = ( - # df.groupby(["year", "jur_name"])["mnc"].nunique().reset_index() - # ) - - # row[5].line_chart(df_count_all_country, x="year", y="mnc", color="jur_name") - return df_count_country - - -# Viz 16 - -# company’s % pre-tax profit and profit per employee -# plot chart : x-axis = % profit, y axis = profit / employee -# size of the bubble based on % profit and a color code for -# tax havens vs others -def company_pourcentage_pretax_profit_and_profit_per_employee(df_selected_company): - # pretax_profit_col_name = 'profit_before_tax' - profit_col_name = '' - employee_col_name = 'employees' - df_selected_company[profit_col_name] / df_selected_company[employee_col_name] - - -# Viz 19 -# what are the tax havens being used by the company -# to test but could be a table with one row per jurisdiction (filtering on TH) with -# % profit -# % employee -# profit per employee -# % related party revenue -# for domestic vs tax havens vs. non havens -def tax_haven_used_by_company(df_selected_company): - company_upe_code = df_selected_company['upe_code'].unique()[0] - pc_list = ['employees', 'profit_before_tax', 'related_revenues'] - # grouper = df_selected_company.groupby('jur_name') - - df = pd.DataFrame(df_selected_company) - - df_domestic_company = df[df['jur_code'] == company_upe_code] - df_selected_company_th = df[df['jur_tax_haven'] != 'not.TH'] - df_selected_company_nth = df[df['jur_tax_haven'] == 'not.TH'] - - for col in pc_list: - - df.insert( - len(df_selected_company.columns), - col + '_domestic_sum', - df_domestic_company[col].sum()) - - df.insert( - len(df_selected_company.columns), - col + '_th_sum', - df_selected_company_th[col].sum()) - - df.insert( - len(df.columns), - col + '_nth_sum', - df_selected_company_nth[col].sum()) - - df.insert( - len(df.columns), - col + '_sum', - df_selected_company[col].sum()) + Returns: + int: number of companies in the database. + """ - df.insert( - len(df.columns), - col + '_pc', - 100 * df[col] / df[col + '_sum']) - # df_selected_company[col + '_pc'] = 100 * df_selected_company[col] / df_selected_company[col+'_sum'] + # Initialise available filters + filter_values = [None, "sector", "upe_name"] - df_selected_company_th = df[df['jur_tax_haven'] != 'not.TH'] - df_selected_company_th_agg = df_selected_company_th.groupby(['mnc', 'jur_name']).agg( - profit_before_tax=('profit_before_tax', 'sum'), - profit_before_tax_pc=('profit_before_tax_pc', 'sum'), - employees_pc=('employees_pc', 'sum'), - employees=('employees', 'sum'), - related_revenues_pc=('related_revenues_pc', 'sum') - ) - df_selected_company_th_agg = df_selected_company_th_agg.reset_index() - df_selected_company_th_agg['profit per employee'] = \ - df_selected_company_th_agg['profit_before_tax'] / df_selected_company_th_agg['employees'] - df_selected_company_th_agg['profit per employee'] = df_selected_company_th_agg['profit per employee'].replace( - [np.inf, -np.inf], None) + # Raise an error if "filter_value" not in list + if filter_name not in filter_values: + raise ValueError(f"Filter '{filter_name}' is not a valid filter.") - return df_selected_company, df_selected_company_th_agg + # Compute number of reports + if filter_name: + n_company = df.loc[df[filter_name] == filter_value, "mnc"].nunique() + else: + n_company = df["mnc"].nunique() + return int(n_company) -# TODO add viz comment -# complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for each (color code for tax havens) -def company_table(df_selected_company): - # company_upe_code = df_selected_company['upe_code'].unique()[0] - pc_list = ['employees', 'profit_before_tax', 'unrelated_revenues', 'related_revenues', 'total_revenues', 'tax_paid'] - - df = pd.DataFrame(df_selected_company) - for col in pc_list: - if col + '_sum' not in df.columns: - df.insert( - len(df.columns), - col + '_sum', - df[col].sum()) - - df.insert( - len(df.columns), - col + '_pc', - 100 * df[col] / df[col + '_sum']) - # f_selected_company[col + '_sum'] = df_selected_company[col].sum() - # df_selected_company[col + '_pc'] = 100 * df_selected_company[col] / df_selected_company[col + '_sum'] - # complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for each (color code for tax havens) - df_selected_company_by_jur = df.groupby(['mnc', 'jur_name']).agg( - related_revenues_pc=('related_revenues_pc', 'sum'), - unrelated_revenues=('unrelated_revenues', 'sum'), - total_revenues=('total_revenues', 'sum'), - profit_before_tax=('profit_before_tax', 'sum'), - employees_pc=('employees_pc', 'sum'), - tax_paid=('tax_paid', 'sum'), - tax_paid_pc=('tax_paid_pc', 'sum'), +# Viz 4 : Breakdown of reports by sector +def breakdown_of_reports_by_sector(df: pd.DataFrame) -> go.Figure: + # Dataframe called df + df_reports_per_sector_year = ( + df.groupby(["sector", "year"])["mnc"].nunique().reset_index(name="unique_company_count") ) - return df_selected_company_by_jur.reset_index() - - -# Viz 4 - Breakdown of reports by sector (pie chart) -def breakdown_of_reports_by_sector(df): - #Dataframe called df - df_reports_per_sector_year = df.groupby(['sector', 'year'])['mnc'].nunique().reset_index( - name='unique_company_count') # Aggregate the counts of unique companies across all years for each sector - df_reports_per_sector = df_reports_per_sector_year.groupby('sector')['unique_company_count'].sum().reset_index() + df_reports_per_sector = ( + df_reports_per_sector_year.groupby("sector")["unique_company_count"].sum().reset_index() + ) # Calculate the total count of unique companies across all sectors - total_companies = df_reports_per_sector['unique_company_count'].sum() + total_companies = df_reports_per_sector["unique_company_count"].sum() # Calculate the percentage of each sector's count relative to the total count and round to 2 decimals - df_reports_per_sector['percent'] = ((df_reports_per_sector['unique_company_count'] / total_companies) * 100).round( - 2) + df_reports_per_sector["percent"] = ( + (df_reports_per_sector["unique_company_count"] / total_companies) * 100 + ).round(2) # Sort the DataFrame by the count of unique companies in ascending order - df_reports_per_sector = df_reports_per_sector.sort_values(by='unique_company_count', ascending=True) + df_reports_per_sector = df_reports_per_sector.sort_values( + by="unique_company_count", ascending=True + ) - return df_reports_per_sector + # Plotting the horizontal bar chart with Plotly Express + fig = px.bar( + df_reports_per_sector, + y="sector", + x="percent", + orientation="h", # Horizontal orientation + labels={"percent": "Percentage of Companies (%)", "sector": "Sector"}, + text="percent", # Show the percentage as text label + hover_data={ + "unique_company_count": True, + "percent": ":.2f%", + }, # Add tooltip for count and rounded percentage + ) + # Update layout settings + fig.update_layout(template=custom_template) -def breakdown_of_reports_by_sector_viz(df_reports_per_sector): - # Plotting the horizontal bar chart with Plotly Express - fig = px.bar(df_reports_per_sector, y='sector', x='percent', - orientation='h', # Horizontal orientation - title='Breakdown of Reports by Sector (All Years)', - labels={'percent': 'Percentage of Companies (%)', 'sector': 'Sector'}, - text='percent', # Show the percentage as text label - hover_data={'unique_company_count': True, 'percent': ':.2f%'}, - # Add tooltip for count and rounded percentage - ) - - # Update layout to display the title above the chart - fig.update_layout(title='Breakdown of Reports by Sector', - title_x=0.5, title_y=0.9, # Adjust position - title_font_size=20) # Adjust font size - - # Show the horizontal bar chart return go.Figure(fig) -# Viz 5 - Breakdown of reports by HQ country (pie chart) -def breakdown_of_reports_by_hq_country(df): - # Group the DataFrame by 'upe_name' (HQ country) and 'year' and count the number of unique companies for each HQ country and year - df_reports_per_country_year = df.groupby(['upe_name', 'year'])['mnc'].nunique().reset_index( - name='unique_company_count') +# Viz 5 : Breakdown of reports by hq country +def breakdown_of_reports_by_hq_country(df: pd.DataFrame) -> go.Figure: + # Group the DataFrame by 'upe_name' (HQ country) and 'year' and count the number of unique companies for each HQ + # country and year + df_reports_per_country_year = ( + df.groupby(["upe_name", "year"])["mnc"] + .nunique() + .reset_index(name="unique_company_count") + ) # Aggregate the counts of unique companies across all years for each HQ country - df_reports_per_country = df_reports_per_country_year.groupby('upe_name')['unique_company_count'].sum().reset_index() + df_reports_per_country = ( + df_reports_per_country_year.groupby("upe_name")["unique_company_count"] + .sum() + .reset_index() + ) # Calculate the total count of unique companies across all HQ countries - total_companies = df_reports_per_country['unique_company_count'].sum() + total_companies = df_reports_per_country["unique_company_count"].sum() # Calculate the percentage of each HQ country's count relative to the total count and round to 2 decimals - df_reports_per_country['percent'] = ( - (df_reports_per_country['unique_company_count'] / total_companies) * 100).round(2) + df_reports_per_country["percent"] = ( + (df_reports_per_country["unique_company_count"] / total_companies) * 100 + ).round(2) # Sort the DataFrame by the count of unique companies in ascending order - df_reports_per_country = df_reports_per_country.sort_values(by='unique_company_count', ascending=True) - - return df_reports_per_country - + df_reports_per_country = df_reports_per_country.sort_values( + by="unique_company_count", ascending=True + ) -def breakdown_of_reports_by_hq_country_viz(df_reports_per_country): # Plotting the horizontal bar chart with Plotly Express - fig = px.bar(df_reports_per_country, y='upe_name', x='percent', - orientation='h', # Horizontal orientation - title='Breakdown of Reports by HQ Country over Time', - labels={'percent': 'Percentage of Companies (%)', 'upe_name': 'HQ Country'}, - text='percent', # Show the percentage as text label - hover_data={'unique_company_count': True, 'percent': ':.2f%'}, - # Add tooltip for count and rounded percentage - ) - - # Update layout to display the title above the chart - fig.update_layout(title='Breakdown of Reports by HQ Country over Time', - title_x=0.5, title_y=0.95, # Adjust position - title_font_size=20) # Adjust font size - - # Show the horizontal bar chart - # fig.show() - return go.Figure(fig) - + fig = px.bar( + df_reports_per_country, + y="upe_name", + x="percent", + orientation="h", # Horizontal orientation + labels={"percent": "Percentage of Companies (%)", "upe_name": "HQ Country"}, + text="percent", # Show the percentage as text label + hover_data={"unique_company_count": True, "percent": ":.2f%"}, + # Add tooltip for count and rounded percentage + ) -## Viz 6 - Breakdown of reports by sector over time (bar chart) + # Update layout settings + fig.update_layout(template=custom_template) + return go.Figure(fig) -def breakdown_of_reports_by_sector_over_time(df): - # df_reports_per_sector_over_time = df - # return df_reports_per_sector_over_time +# Viz 6 : Breakdown of reports by sector over time +def breakdown_of_reports_by_sector_over_time(df: pd.DataFrame) -> go.Figure: # Step 1: Determine the top 10 sectors that released reports - top_10_sectors = df['sector'].value_counts().nlargest(10).index.tolist() + top_10_sectors = df["sector"].value_counts().nlargest(10).index.tolist() # Step 2: Group all other sectors as "Others" - df['Sectors'] = df['sector'].apply(lambda x: x if x in top_10_sectors else 'Others') + df["Sectors"] = df["sector"].apply(lambda x: x if x in top_10_sectors else "Others") # Step 3: Group the DataFrame by 'year', 'Sectors', and count the number of unique companies for each year and sector - df_reports_per_year_sector = df.groupby(['year', 'Sectors'])['mnc'].nunique().reset_index( - name='unique_company_count') + df_reports_per_year_sector = ( + df.groupby(["year", "Sectors"])["mnc"] + .nunique() + .reset_index(name="unique_company_count") + ) # Sort sectors alphabetically - df_reports_per_year_sector = df_reports_per_year_sector.sort_values(by='Sectors', ascending=False) - - return df_reports_per_year_sector, top_10_sectors - + df_reports_per_year_sector = df_reports_per_year_sector.sort_values( + by="Sectors", ascending=False + ) -def breakdown_of_reports_by_sector_over_time_viz(df_reports_per_year_sector, top_10_sectors): # Define the order of sectors for the stacked bar chart and legend, reversed - chart_order = ['Others'] + top_10_sectors[::-1] - legend_order = ['Others'] + top_10_sectors[::-1] + chart_order = ["Others"] + top_10_sectors[::-1] + legend_order = ["Others"] + top_10_sectors[::-1] # Plotting the bar chart using Plotly Express - fig = px.bar(df_reports_per_year_sector, x='year', y='unique_company_count', color='Sectors', - title='Breakdown of Reports by Sector over Time', - labels={'unique_company_count': 'Number of Companies Reporting', 'year': 'Year'}, - barmode='stack', - category_orders={'Sectors': chart_order}) + fig = px.bar( + df_reports_per_year_sector, + x="year", + y="unique_company_count", + color="Sectors", + labels={"unique_company_count": "Number of Companies Reporting", "year": "Year"}, + barmode="stack", + category_orders={"Sectors": chart_order}, + ) - # Reverse the order of legend items - fig.update_layout(legend=dict(traceorder='reversed')) + # Update layout settings + fig.update_layout(template=custom_template, legend=dict(traceorder="reversed")) # Adjusting the legend order and formatting the legend labels for i, trace in enumerate(fig.data): trace.name = legend_order[i] # Change color of the "Others" bar to grey - if trace.name == 'Others': - trace.marker.color = 'grey' + if trace.name == "Others": + trace.marker.color = "grey" - # Show the plot - # fig.show() return go.Figure(fig) -## Viz 7 - Breakdown of reports by HQ country over time (bar chart) +# Viz 7 : Breakdown of reports by hq country over time # TODO add code -## Viz 8 - Breakdown of MNC by sector (pie chart - changed to bar chart for more visibility) + +# Viz 8 : Breakdown of MNC by sector # TODO add code -## Viz 9 - Breakdown of MNC by HQ country (pie chart - changed to bar chart for more visibility) + +# Viz 9 : Breakdown of MNC by HQ country # TODO add code -## Viz 10/11 - Breakdown of MNC by sector + +# Viz 10/11 : Breakdown of MNC by sector # TODO add code -## Viz 11 - Breakdown of MNC by HQ country + +# Viz 11 : Breakdown of MNC by HQ country # TODO add code -# Viz 12 - available reports by company -def compute_company_available_reports(df: pd.DataFrame, company: str) -> dict: - """Compute the number of reports tracked for a specific company and the - available fiscal years. + +# Viz 12 : available reports by company +def company_available_reports( + df: pd.DataFrame, company: str, hide_company: bool = True +) -> pd.DataFrame: + """Compute the number of reports tracked and the available fiscal years for a specific company. Args: df (pd.DataFrame): CbCRs database. - company (str): company name. + company (str): Company name. + hide_company (bool, optional): Hide company name in final table. Defaults to True. Returns: - dict: numbers of reports and fiscal years. + pd.DataFrame: numbers of reports and fiscal years in a table. """ - available_years = df.loc[df['mnc'] == company, 'year'].unique() + available_years = df.loc[df["mnc"] == company, "year"].unique() n_reports = len(available_years) # Convert type of items from 'int' to 'str' in available years list @@ -428,467 +335,342 @@ def compute_company_available_reports(df: pd.DataFrame, company: str) -> dict: if len(years_string_list) == 1: years_string = years_string_list[0] elif len(years_string_list) > 1: - years_string = ', '.join(years_string_list[:-1]) - years_string += ' and ' + years_string_list[-1] - - # Create a dictionnary with the results - data = { - 'Company': company, - 'Reports': n_reports, - 'Fiscal year(s) available': years_string - } - - return data - - -def display_company_available_reports( - df: pd.DataFrame, company: str, hide_company: bool = True) -> pd.DataFrame: - """Display the number of reports tracked for a specific company and the - available fiscal years. - - Args: - df (pd.DataFrame): CbCRs database. - company (str): company name. - hide_company (bool, optional): hide company name in final table. Defaults to True. - - Returns: - pd.DataFrame: numbers of reports and fiscal years. - """ - - # Compute data - data = compute_company_available_reports(df=df, company=company) + years_string = ", ".join(years_string_list[:-1]) + years_string += " and " + years_string_list[-1] # Create the table - df = pd.DataFrame.from_dict(data=data, orient='index') + table = pd.DataFrame( + data=[company, n_reports, years_string], index=["Company", "Reports", "Fiscal year(s)"] + ) + # Hide the company name in the table if hide_company: - return df[1:].style.hide(axis='columns') + return table[1:].style.hide(axis="columns") - return df.style.hide(axis='columns') + return table.style.hide(axis="columns") -# Viz 13 - company key financials kpis -def compute_company_key_financials_kpis( - df: pd.DataFrame, - company: str, - year: int = None) -> dict: - """Compute key financial KPIs for a company. +# Viz 13 : Company key financials kpis +def company_key_financials_kpis(df: pd.DataFrame, company: str, year: int = None) -> dict: + """Compute key financial KPIs for a company in a table. Args: df (pd.DataFrame): CbCRs database. - company (str): Company name + company (str): company name. year (int, optional): fiscal year to filter the results with. Defaults to None. Returns: - dict: company key financial KPIs. + pd.DataFrame: table with company key financial KPIs. """ - kpis_list = ['total_revenues', 'unrelated_revenues', 'related_revenues', - 'profit_before_tax', 'tax_paid', 'employees'] + kpis_list = [ + "total_revenues", + "unrelated_revenues", + "related_revenues", + "profit_before_tax", + "tax_paid", + "employees", + ] - years_list = df.loc[df['mnc'] == company, 'year'].unique() + years_list = df.loc[df["mnc"] == company, "year"].unique() # Compute sum of kpis if not year or year not in years_list: - df = (df.loc[df['mnc'] == company] - .groupby(['year', 'upe_name'], as_index=False)[kpis_list] - .sum() - ) + df = ( + df.loc[df["mnc"] == company] + .groupby(["year", "upe_name"], as_index=False)[kpis_list] + .sum() + ) else: - df = (df.loc[(df['mnc'] == company) & (df['year'] == year)] - .groupby(['year', 'upe_name'], as_index=False)[kpis_list] - .sum()) - - # df = df.set_index('year') + df = ( + df.loc[(df["mnc"] == company) & (df["year"] == year)] + .groupby(["year", "upe_name"], as_index=False)[kpis_list] + .sum() + ) # Make financial numbers easily readable with 'humanize' package for column in df.columns: - if column not in ['employees', 'upe_name']: + if column not in ["employees", "upe_name"]: df[column] = df[column].apply( - lambda x: humanize.intword(x) if isinstance(x, (int, float)) else x) - df[column] = '€ ' + df[column] - elif column == 'employees': + lambda x: humanize.intword(x) if isinstance(x, (int, float)) else x + ) + df[column] = "€ " + df[column] + elif column == "employees": df[column] = df[column].astype(int) # Remove 'upe_name' and 'year'' - df = df.drop(columns=['upe_name', 'year']) + df = df.drop(columns=["upe_name", "year"]) # Clean columns string - df.columns = df.columns.str.replace('_', ' ').str.capitalize() - - # Create a dictionary with the results - data = df.to_dict(orient='index') - - return data - - -def display_company_key_financials_kpis( - df: pd.DataFrame, company: str, year: int = None): - """Display key financial KPIs for a company. - - Args: - df (pd.DataFrame): CbCRs database. - company (str): Company name - year (int, optional): fiscal year to filter the results with. Defaults to None. - - Returns: - pd.DataFrame: company key financial KPIs. - """ + df.columns = df.columns.str.replace("_", " ").str.capitalize() - # Compute data - data = compute_company_key_financials_kpis(df=df, company=company, year=year) - - # Create the table - df = pd.DataFrame.from_dict(data) - df = df.reset_index() + # Transpose DataFrame + df = df.T.reset_index() # Rename columns - df = df.rename(columns={'index': 'Variable', 0: 'Value'}) + df = df.rename(columns={"index": "Variable", 0: "Value"}) # Replace 0 values with 'N/A' - df.loc[df['Value'] == '€ 0', 'Value'] = 'N/A' + df.loc[df["Value"] == "€ 0", "Value"] = "N/A" return df -# Viz 14 -def compute_top_jurisdictions_revenue( - df: pd.DataFrame, company: str, year: int) -> dict: - """Rank jurisdictions on their percentage of total revenues. +# Viz 14 : company top jurisdictions for revenue +def top_jurisdictions_revenue(df: pd.DataFrame, company: str, year: int) -> go.Figure: + """Compute and plot top jurisdictions on their percentage of total revenues. Args: df (pd.DataFrame): CbCRs database. company (str): Company name - year (int): fiscal year. + year (int): Fiscal year. Returns: - dict: Rank of jurisdictions by percentage of total revenues. + go.Figure: Jurisdictions by percentage of total revenues in a Plotly figure. """ df = df.loc[ - (df['mnc'] == company) & (df['year'] == year), - ['jur_name', 'related_revenues', 'unrelated_revenues', 'total_revenues'] + (df["mnc"] == company) & (df["year"] == year), + ["jur_name", "related_revenues", "unrelated_revenues", "total_revenues"], ] # Calculate missing values in 'total_revenues' if 'related_revenues' and # 'unrelated_revenues' are available df.loc[ - df['related_revenues'].notna() - & df['unrelated_revenues'].notna() - & df['total_revenues'].isna(), - 'total_revenues' - ] = df['related_revenues'] + df['unrelated_revenues'] + df["related_revenues"].notna() + & df["unrelated_revenues"].notna() + & df["total_revenues"].isna(), + "total_revenues", + ] = df["related_revenues"] + df["unrelated_revenues"] # Subset DataFrame - df = df[['jur_name', 'total_revenues']] + df = df[["jur_name", "total_revenues"]] # Remove rows where 'total_revenues' is missing - df = df.dropna(subset=['total_revenues']) + df = df.dropna(subset=["total_revenues"]) # Compute percentage of revenue - df['total_revenues_%'] = df['total_revenues'] / df['total_revenues'].sum() - - # Convert DataFrame to dictionnary - data = df.to_dict() - - return data - - -def display_jurisdictions_top_revenue(df: pd.DataFrame, company: str, year: int): - """Display jurisdictions by percentage of total revenues in an - horizontal bar chart. - - Args: - df (pd.DataFrame): CbCRs database. - company (str): Company name - year (int): fiscal year. - """ - - # Compute data - data = compute_top_jurisdictions_revenue(df=df, company=company, year=year) - - # Create DataFrame - df = pd.DataFrame.from_dict(data) - df = df.sort_values(by='total_revenues_%') + df["total_revenues_%"] = df["total_revenues"] / df["total_revenues"].sum() - # Bar color sequence - bar_color = '#D9D9D9' + # Sort jurisdictions by percentage of total revenues + df = df.sort_values(by="total_revenues_%") # Create figure fig = px.bar( df, - x='total_revenues_%', - y='jur_name', - orientation='h', - color_discrete_sequence=[bar_color], - text_auto='.1%' + x="total_revenues_%", + y="jur_name", + orientation="h", + text_auto=".1%", + color_discrete_sequence=COLOR_SEQUENCE, ) # Set figure height (min. 480) depending on the number of jurisdictions - fig_height = max(480, (48 * len(df['jur_name']))) + fig_height = max(480, (48 * len(df["jur_name"]))) + + # Update axis layout + fig.update_xaxes(title="Percentage of total revenue", tickformat=".0%") + fig.update_yaxes(title=None) # Update layout settings - fig.update_layout( - font_family='Roboto', - xaxis=dict( - title='Percentage of total revenue', - tickformat='.0%' - ), - yaxis_title=None, - plot_bgcolor='white', - height=fig_height, - margin=dict(l=0, r=0, t=0, b=0) - ) + fig.update_layout(template=custom_template, height=fig_height) # Define position of text values values_positions = [ - 'outside' if value <= 0.05 else 'inside' for value in df['total_revenues_%']] + "outside" if value <= 0.05 else "inside" for value in df["total_revenues_%"] + ] - fig.update_traces( - textangle=0, - textposition=values_positions, - selector=dict(name='') - ) + fig.update_traces(textangle=0, textposition=values_positions, selector=dict(name="")) # Define style of hover on bars fig.update_traces( - hovertemplate=( - "%{hovertext}

% revenue: %{x:.3%}
" - ), - hovertext=df['jur_name'] + hovertemplate=("%{hovertext}

% revenue: %{x:.3%}
"), + hovertext=df["jur_name"], ) return go.Figure(fig) -# Viz 15 -def compute_pretax_profit_and_employees_rank( - df: pd.DataFrame, company: str, year: int) -> pd.DataFrame: - """Compute jurisdictions percentage of profit before tax and percentage - of employees and rank by percentage of profit. +# Viz 15 : company’s % pre-tax profit and % employees by jurisdiction +def pretax_profit_and_employees_rank(df: pd.DataFrame, company: str, year: int) -> go.Figure: + """Compute and plot jurisdictions percentage of profit before tax and percentage of employees then rank by + percentage of profit. Args: df (pd.DataFrame): CbCRs database. company (str): Company name - year (int): fiscal year. + year (int): Fiscal year. Returns: - dict: rank of jurisdictions with percentage of profit before and percentage - of employees. + go.Figure:: rank of jurisdictions with percentage of profit before and percentage of employees in a Plotly + figure. """ # Filter rows with selected company/year and subset with necessary features - features = ['jur_name', 'profit_before_tax', 'employees'] - df = df.loc[(df['mnc'] == company) & (df['year'] == year), features] + features = ["jur_name", "profit_before_tax", "employees"] + df = df.loc[(df["mnc"] == company) & (df["year"] == year), features] # Keep only profitable jurisdictions - df = df.loc[df['profit_before_tax'] >= 0] + df = df.loc[df["profit_before_tax"] >= 0] # Sort jurisdictions by profits - df = df.sort_values(by='profit_before_tax').reset_index(drop=True) + df = df.sort_values(by="profit_before_tax").reset_index(drop=True) # Calculate percentages - df['profit_before_tax_%'] = df['profit_before_tax'] / df['profit_before_tax'].sum() - df['employees_%'] = df['employees'] / df['employees'].sum() - df = df.drop(columns=['profit_before_tax', 'employees']) - - # data = df.to_dict() - - return df - - -def display_pretax_profit_and_employees_rank( - df: pd.DataFrame, company: str, year: int) -> go.Figure: - """Display rank of jurisdictions by percentage of profit before and percentage - of employees. - - Args: - df (pd.DataFrame): CbCRs database. - company (str): Company name - year (int): fiscal year. - """ - - # Compute data - df = compute_pretax_profit_and_employees_rank(df=df, company=company, year=year) - - # Create DataFrame - # df = pd.DataFrame(data) + df["profit_before_tax_%"] = df["profit_before_tax"] / df["profit_before_tax"].sum() + df["employees_%"] = df["employees"] / df["employees"].sum() + df = df.drop(columns=["profit_before_tax", "employees"]) # Rename columns - df = df.rename(columns={ - 'profit_before_tax_%': '% profit', - 'employees_%': '% employees' - }) - - # Bar color sequence - bar_colors = ['#D9D9D9', '#1E2E5C'] + df = df.rename(columns={"profit_before_tax_%": "% profit", "employees_%": "% employees"}) # Create figure fig = px.bar( df, - x=['% employees', '% profit'], - y='jur_name', - barmode='group', - orientation='h', - text_auto='.1%', - color_discrete_sequence=bar_colors + x=["% employees", "% profit"], + y="jur_name", + barmode="group", + orientation="h", + text_auto=".1%", + color_discrete_sequence=COLOR_SEQUENCE, ) # Set figure height (min. 640) depending on the number of jurisdictions - fig_height = max(480, (48 * len(df['jur_name']))) + fig_height = max(480, (48 * len(df["jur_name"]))) # Set maximum value for x axis - if not df[['% profit', '% employees']].isna().all().all(): - max_x_value = max(df[['% profit', '% employees']].max(axis='columns')) + 0.1 + if not df[["% profit", "% employees"]].isna().all().all(): + max_x_value = max(df[["% profit", "% employees"]].max(axis="columns")) + 0.1 else: max_x_value = 1 + # Update axis layout + fig.update_xaxes(title=None, tickformat=".0%", range=[0, max_x_value]) + fig.update_yaxes(title=None) + # Update layout settings fig.update_layout( - font_family='Roboto', - title=None, - xaxis=dict( - title=None, - tickformat='.0%', - range=[0, max_x_value] - ), - yaxis_title=None, legend=dict( - x=0.1, - y=1.05, - xanchor='center', - yanchor='top', - title=dict(text=''), - orientation='h' + orientation="h", yanchor="bottom", y=1.01, xanchor="left", x=0, title=dict(text="") ), - plot_bgcolor='white', + template=custom_template, height=fig_height, - margin=dict(l=0, r=0, t=10, b=0) ) # Add annotations for NaN values where there should have been a bar for index, row in df.iterrows(): - if pd.isna(row['% employees']): + if pd.isna(row["% employees"]): fig.add_annotation( - xanchor='left', + xanchor="left", x=0.001, y=df.index[index], yshift=-10, - text='Information not provided', + text="Information not provided", showarrow=False, - font=dict(size=12) + font=dict(size=12), ) - if pd.isna(row['% profit']): + if pd.isna(row["% profit"]): fig.add_annotation( - xanchor='left', + xanchor="left", x=0.001, y=df.index[index], yshift=10, - text='Information not provided', + text="Information not provided", showarrow=False, - font=dict(size=12) + font=dict(size=12), ) # Loop through each bar trace and hide the text if the value is NaN for trace in fig.data: values = df[trace.name] - text_position = ['outside' if not np.isnan(value) else 'none' for value in values] + text_position = ["outside" if not np.isnan(value) else "none" for value in values] trace.textposition = text_position - if trace.name == '% employees': - trace.hovertemplate = '%{y}

Employees : %{x:.3%}' - elif trace.name == '% profit': - trace.hovertemplate = '%{y}

Profit : %{x:.3%}' + if trace.name == "% employees": + trace.hovertemplate = "%{y}

Employees : %{x:.3%}" + elif trace.name == "% profit": + trace.hovertemplate = "%{y}

Profit : %{x:.3%}" return go.Figure(fig) -# Viz 16 -def compute_pretax_profit_and_profit_per_employee( - df: pd.DataFrame, company: str, year: int) -> pd.DataFrame: +# Viz 16 : company’s % pre-tax profit and profit per employee +def pretax_profit_and_profit_per_employee( + df: pd.DataFrame, company: str, year: int +) -> go.Figure: + """Compute and plot jurisdictions percentage of profit before tax and profit by employee. + + Args: + df (pd.DataFrame): CbCRs database. + company (str): Company name + year (int): Fiscal year. + + Returns: + go.Figure: Percentage of profit and profit/employee in a Plotly Figure. + """ + # Filter rows with selected company/year and subset with necessary features - features = ['jur_name', 'profit_before_tax', 'employees', 'jur_tax_haven'] - df = df.loc[(df['mnc'] == company) & (df['year'] == year), features] + features = ["jur_name", "profit_before_tax", "employees", "jur_tax_haven"] + df = df.loc[(df["mnc"] == company) & (df["year"] == year), features] # Keep only profitable jurisdictions - df = df.loc[df['profit_before_tax'] >= 0] + df = df.loc[df["profit_before_tax"] >= 0] # Sort jurisdictions by profits - df = df.sort_values(by='profit_before_tax').reset_index(drop=True) + df = df.sort_values(by="profit_before_tax").reset_index(drop=True) # Replace 0 employees by 1 - df.loc[df['employees'] == 0, 'employees'] = 1 + df.loc[df["employees"] == 0, "employees"] = 1 # Calculate percentages - df['profit_before_tax_%'] = df['profit_before_tax'] / df['profit_before_tax'].sum() - df['profit_per_employee'] = df['profit_before_tax'] / df['employees'] - df = df.drop(columns=['profit_before_tax', 'employees']) - - # print('compute_pretax_profit_and_profit_per_employee df.head():\n', df.head()) - # data = df.to_dict() - - return df - - -def display_pretax_profit_and_profit_per_employee(df: pd.DataFrame, company: str, year: int) -> go.Figure: - # Compute data - df = compute_pretax_profit_and_profit_per_employee(df=df, company=company, year=year) - - # Create DataFrame - # df = pd.DataFrame(data) + df["profit_before_tax_%"] = df["profit_before_tax"] / df["profit_before_tax"].sum() + df["profit_per_employee"] = df["profit_before_tax"] / df["employees"] + df = df.drop(columns=["profit_before_tax", "employees"]) # Replace bool values of Tax haven by string values - df['jur_tax_haven'] = df['jur_tax_haven'].map({True: 'Tax haven', False: 'Non tax haven'}) - + df["jur_tax_haven"] = df["jur_tax_haven"].map({True: "Tax haven", False: "Non tax haven"}) + # Create figure fig = px.scatter( df, - x='profit_before_tax_%', - y='profit_per_employee', - size='profit_before_tax_%', - color='jur_tax_haven', + x="profit_before_tax_%", + y="profit_per_employee", + size="profit_before_tax_%", + color="jur_tax_haven", color_discrete_sequence=COLOR_SEQUENCE, - custom_data=['jur_name'] + custom_data=["jur_name"], ) + # Update axis layout + fig.update_xaxes(title="Percentage of profit", tickformat=".0%") + fig.update_yaxes(title="Profit per employee") + # Update layout settings fig.update_layout( - title=None, - font_family='Roboto', - autosize=True, - height=360, - xaxis=dict( - title='% profit', - tickformat='.0%', - ), - yaxis=dict( - title='Profit/employee', - ), legend=dict( - x=0.1, - y=1.05, - xanchor='center', - yanchor='top', - title=dict(text=''), - orientation='h'), - plot_bgcolor='white', - margin=dict(l=0, r=0, t=0, b=0) + orientation="h", yanchor="bottom", y=1.01, xanchor="left", x=0, title=dict(text="") + ), + template=custom_template, + height=380, ) - - + # Define hover fig.update_traces( hovertemplate=f"{company} reports %{{x:.1%}} of profit and %{{y:.3s}}€ profits per employee in %{{customdata[0]}}" ) - + return go.Figure(fig) -# Viz 18 +# Viz 17 : company’s % pre-tax profit and % employees in TH vs domestic vs non TH +# TODO add code + -def compute_related_and_unrelated_revenues_breakdown( - df: pd.DataFrame, company: str, year: int) -> dict: - """Compute related and unrelated revenues in tax heaven, non tax heaven and +# Viz 18 : breakdown of revenue between related party and unrelated party in TH vs domestic vs non TH +def related_and_unrelated_revenues_breakdown( + df: pd.DataFrame, company: str, year: int +) -> go.Figure: + """Compute and plot related and unrelated revenues in tax heaven, non tax heaven and domestic jurisdictions. Args: @@ -897,101 +679,75 @@ def compute_related_and_unrelated_revenues_breakdown( year (int): fiscal year to filter the results with. Returns: - dict: revenues percentage for different type of jurisdictions. + go.Figure: related and unrelated revenues in a Plotly Figure. """ # Filter rows with selected company/year and subset with necessary features - features = ['upe_code', 'jur_code', 'jur_name', 'jur_tax_haven', - 'unrelated_revenues', 'related_revenues'] + features = [ + "upe_code", + "jur_code", + "jur_name", + "jur_tax_haven", + "unrelated_revenues", + "related_revenues", + ] - df = df.loc[(df['mnc'] == company) & (df['year'] == year), features] + df = df.loc[(df["mnc"] == company) & (df["year"] == year), features] # Drop rows where either unrelated or related revenues are missing - df = df.dropna(subset=['unrelated_revenues', 'related_revenues']) + df = df.dropna(subset=["unrelated_revenues", "related_revenues"]) # 'total_revenues' is recreated using related and unrelated revenues since the one # reported by companies is not always reliable - df['total_revenues'] = df['unrelated_revenues'] + df['related_revenues'] + df["total_revenues"] = df["unrelated_revenues"] + df["related_revenues"] # Create a column to check if 'jur_code' is the domestic country - df['domestic'] = df.apply(lambda row: row['jur_code'] == row['upe_code'], axis='columns') + df["domestic"] = df.apply(lambda row: row["jur_code"] == row["upe_code"], axis="columns") # Compute kpis in a new DataFrame data = pd.DataFrame() - data['tax_haven'] = df.loc[df['jur_tax_haven'] == True, ['unrelated_revenues', 'related_revenues']].sum() - data['non_tax_haven'] = df.loc[df['jur_tax_haven'] == False, ['unrelated_revenues', 'related_revenues']].sum() - data['domestic'] = df.loc[df['domestic'] == True, ['unrelated_revenues', 'related_revenues']].sum() + data["tax_haven"] = df.loc[ + df["jur_tax_haven"], ["unrelated_revenues", "related_revenues"] + ].sum() + data["non_tax_haven"] = df.loc[ + ~df["jur_tax_haven"], ["unrelated_revenues", "related_revenues"] + ].sum() + data["domestic"] = df.loc[df["domestic"], ["unrelated_revenues", "related_revenues"]].sum() # Replace values with share (%) of 'unrelated/related revenues' - data = data.div(data.sum(axis='rows'), axis='columns') + data = data.div(data.sum(axis="rows"), axis="columns") # Rename indexes - data = data.rename(index={ - 'unrelated_revenues': 'unrelated_revenues_percentage', - 'related_revenues': 'related_revenues_percentage' - }) - - # Convert DataFrame to dictionary - data = data.to_dict() - - return data - - -def display_related_and_unrelated_revenues_breakdown(df: pd.DataFrame, company: str, year: int) -> tuple[pd.DataFrame, go.Figure]: - """Display related and unrelated revenues in tax heaven, non tax heaven and - domestic jurisdictions. - - Args: - df (pd.DataFrame): CbCRs database. - company (str): Company name - year (int): fiscal year to filter the results with. - """ - - # Compute data - data = compute_related_and_unrelated_revenues_breakdown(df=df, company=company, year=year) - - # Create DataFrame - df = pd.DataFrame.from_dict(data, orient='index') - - # Rename columns and indexes - df.columns = df.columns.str.replace('_', ' ').str.capitalize() - df.index = df.index.str.replace('_', ' ').str.capitalize() + data = data.rename( + index={ + "unrelated_revenues": "Unrelated revenues", + "related_revenues": "Related revenues", + } + ) # Create figure fig = px.bar( df, - x=['Unrelated revenues percentage', 'Related revenues percentage'], + x=["Unrelated revenues", "Related revenues"], y=df.index, - orientation='h', - text_auto='.0%' + orientation="h", + text_auto=".0%", ) + # Update axis layout + fig.update_xaxes(title=None, tickformat=".0%") + fig.update_yaxes(title=None) + # Update layout settings fig.update_layout( - title='Breakdown of revenue', - xaxis=dict( - title=None, - tickformat='.0%' - ), - yaxis_title=None, - legend=dict( - title=dict(text=''), - orientation='h' - ), - plot_bgcolor='white', - width=800, - height=480 + legend=dict(title=dict(text=""), orientation="h"), template=custom_template ) # Define position of text values - for col in ['Unrelated revenues percentage', 'Related revenues percentage']: - values_positions = ['outside' if value <= 0.05 else 'inside' for value in df[col]] + for col in ["Unrelated revenues", "Related revenues"]: + values_positions = ["outside" if value <= 0.05 else "inside" for value in df[col]] - fig.update_traces( - textangle=0, - textposition=values_positions, - selector=dict(name=col) - ) + fig.update_traces(textangle=0, textposition=values_positions, selector=dict(name=col)) # Add annotation if no values are availables (no bar displayed) for i, index in enumerate(df.index): @@ -999,17 +755,101 @@ def display_related_and_unrelated_revenues_breakdown(df: pd.DataFrame, company: fig.add_annotation( x=0.5, y=df.index[i], - text='No information to display', + text="No information to display", showarrow=False, - font=dict(size=13) + font=dict(size=13), ) - # fig.show() - return pd.DataFrame.from_dict(data, orient='index'), go.Figure(fig) + return go.Figure(fig) -# Viz 21 - evolution of tax havens use over time : % profit vs % employees in TH over time -def compute_tax_havens_use_evolution(df: pd.DataFrame, company: str) -> dict: +# Viz 19 : what are the tax havens being used by the company +def tax_haven_used_by_company(df_selected_company): + company_upe_code = df_selected_company["upe_code"].unique()[0] + pc_list = ["employees", "profit_before_tax", "related_revenues"] + # grouper = df_selected_company.groupby('jur_name') + + df = pd.DataFrame(df_selected_company) + + df_domestic_company = df[df["jur_code"] == company_upe_code] + df_selected_company_th = df[df["jur_tax_haven"] != "not.TH"] + df_selected_company_nth = df[df["jur_tax_haven"] == "not.TH"] + + for col in pc_list: + df.insert( + len(df_selected_company.columns), + col + "_domestic_sum", + df_domestic_company[col].sum(), + ) + + df.insert( + len(df_selected_company.columns), col + "_th_sum", df_selected_company_th[col].sum() + ) + + df.insert(len(df.columns), col + "_nth_sum", df_selected_company_nth[col].sum()) + + df.insert(len(df.columns), col + "_sum", df_selected_company[col].sum()) + + df.insert(len(df.columns), col + "_pc", 100 * df[col] / df[col + "_sum"]) + # df_selected_company[col + '_pc'] = 100 * df_selected_company[col] / df_selected_company[col+'_sum'] + + df_selected_company_th = df[df["jur_tax_haven"] != "not.TH"] + df_selected_company_th_agg = df_selected_company_th.groupby(["mnc", "jur_name"]).agg( + profit_before_tax=("profit_before_tax", "sum"), + profit_before_tax_pc=("profit_before_tax_pc", "sum"), + employees_pc=("employees_pc", "sum"), + employees=("employees", "sum"), + related_revenues_pc=("related_revenues_pc", "sum"), + ) + df_selected_company_th_agg = df_selected_company_th_agg.reset_index() + df_selected_company_th_agg["profit per employee"] = ( + df_selected_company_th_agg["profit_before_tax"] + / df_selected_company_th_agg["employees"] + ) + df_selected_company_th_agg["profit per employee"] = df_selected_company_th_agg[ + "profit per employee" + ].replace([np.inf, -np.inf], None) + + return df_selected_company, df_selected_company_th_agg + + +# Viz 20 : complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for +# each (color code for tax havens) +def company_table(df_selected_company): + # company_upe_code = df_selected_company['upe_code'].unique()[0] + pc_list = [ + "employees", + "profit_before_tax", + "unrelated_revenues", + "related_revenues", + "total_revenues", + "tax_paid", + ] + + df = pd.DataFrame(df_selected_company) + for col in pc_list: + if col + "_sum" not in df.columns: + df.insert(len(df.columns), col + "_sum", df[col].sum()) + + df.insert(len(df.columns), col + "_pc", 100 * df[col] / df[col + "_sum"]) + # f_selected_company[col + '_sum'] = df_selected_company[col].sum() + # df_selected_company[col + '_pc'] = 100 * df_selected_company[col] / df_selected_company[col + '_sum'] + + # complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for each (color code for tax havens) + df_selected_company_by_jur = df.groupby(["mnc", "jur_name"]).agg( + related_revenues_pc=("related_revenues_pc", "sum"), + unrelated_revenues=("unrelated_revenues", "sum"), + total_revenues=("total_revenues", "sum"), + profit_before_tax=("profit_before_tax", "sum"), + employees_pc=("employees_pc", "sum"), + tax_paid=("tax_paid", "sum"), + tax_paid_pc=("tax_paid_pc", "sum"), + ) + return df_selected_company_by_jur.reset_index() + + +# Viz 21 : evolution of tax havens use over time : % profit vs % employees in TH over time +def tax_havens_use_evolution(df: pd.DataFrame, company: str) -> go.Figure: """Compute the evolution of tax havens use by company over time. Args: @@ -1017,121 +857,100 @@ def compute_tax_havens_use_evolution(df: pd.DataFrame, company: str) -> dict: company (str): Company name Returns: - dict: tax havens percentage of profits and employees for each year. + go.Figure: tax havens use evolution in a Plotly Figure. """ # Filter rows with selected company and subset with necessary features - features = ['jur_code', 'year', 'jur_tax_haven', 'profit_before_tax', 'employees'] - df = df.loc[(df['mnc'] == company), features] + features = ["jur_code", "year", "jur_tax_haven", "profit_before_tax", "employees"] + df = df.loc[(df["mnc"] == company), features] # Keep jurisdictions with profitable or missing revenues - df = df.loc[(df['profit_before_tax'] >= 0) | (df['profit_before_tax'].isna())] + df = df.loc[(df["profit_before_tax"] >= 0) | (df["profit_before_tax"].isna())] # For all sum calculations below : # - Result NA : all jurisdictions values were NA ; # - Result 0 : at least one jurisdiction was reported as 0. # Calculate total profit and employees by year and tax haven status - df = df.groupby(['year', 'jur_tax_haven'], as_index=False)[['profit_before_tax', 'employees']].sum(min_count=1) + df = df.groupby(["year", "jur_tax_haven"], as_index=False)[ + ["profit_before_tax", "employees"] + ].sum(min_count=1) # Calculate total profits and employees for each year - for year in df['year'].unique(): - df.loc[df['year'] == year, 'total_profit'] = df.loc[df['year'] == year, 'profit_before_tax'].sum(min_count=1) - df.loc[df['year'] == year, 'total_employees'] = df.loc[df['year'] == year, 'employees'].sum(min_count=1) + for year in df["year"].unique(): + df.loc[df["year"] == year, "total_profit"] = df.loc[ + df["year"] == year, "profit_before_tax" + ].sum(min_count=1) + df.loc[df["year"] == year, "total_employees"] = df.loc[ + df["year"] == year, "employees" + ].sum(min_count=1) # Remove non tax haven jurisdictions - df = df.loc[df['jur_tax_haven'] == True].reset_index() + df = df.loc[df["jur_tax_haven"] == True].reset_index() # Calculate percentages - df['tax_havens_profit_%'] = df['profit_before_tax'] / df['total_profit'] - df['tax_havens_employees_%'] = df['employees'] / df['total_employees'] - - # Convert necessary data to dictionnary - data = df[['year', 'tax_havens_profit_%', 'tax_havens_employees_%']].to_dict() - - return data - - -def display_tax_havens_use_evolution(df: pd.DataFrame, company: str): - """Display the evolution of tax havens use by company over time. - - Args: - df (pd.DataFrame): CbCRs database. - company (str): Company name - """ - - # Compute data - data = compute_tax_havens_use_evolution(df=df, company=company) - - # Create DataFrame - df = pd.DataFrame.from_dict(data) + df["tax_havens_profit_%"] = df["profit_before_tax"] / df["total_profit"] + df["tax_havens_employees_%"] = df["employees"] / df["total_employees"] # Rename columns - df = df.rename(columns={ - 'tax_havens_profit_%': 'Percentage of profits in tax havens', - 'tax_havens_employees_%': 'Percentage of employees in tax havens' - }) + df = df.rename( + columns={ + "tax_havens_profit_%": "Percentage of profits in tax havens", + "tax_havens_employees_%": "Percentage of employees in tax havens", + } + ) # Create figure fig = px.bar( df, - x='year', - y=['Percentage of profits in tax havens', 'Percentage of employees in tax havens'], - barmode='group', - text_auto='.1%' + x="year", + y=["Percentage of profits in tax havens", "Percentage of employees in tax havens"], + barmode="group", + text_auto=".1%", ) + # Update axis layout + fig.update_xaxes(title=None) + fig.update_yaxes(title=None, tickformat=".0%") # Update layout settings fig.update_layout( - title='Tax havens use in profitables jurisdictions', - xaxis_title=None, - yaxis_title=None, - yaxis_tickformat='.0%', - legend=dict( - title=dict(text=''), - orientation='h' - ), - plot_bgcolor='white', - width=800, - height=480 + legend=dict(title=dict(text=""), orientation="h"), template=custom_template ) - # fig.show() return go.Figure(fig) -# Viz 24 -def compute_number_of_tracked_mnc_available(df) -> dict: - # Drop duplicates to ensure each MNC appears only once per year - df_unique_mnc = df.drop_duplicates(subset=['year', 'mnc']) +# Viz 22 : locations of profits booked vs. mean 3Y ETR +# TODO add code - # Group the DataFrame by 'mnc' and count the number of reports for each MNC - df_reports_per_mnc = df_unique_mnc.groupby('mnc').size().reset_index(name='report_count') - # Convert the DataFrame to a dictionary where MNCs are keys and report counts are values - mnc_report_count = dict(zip(df_reports_per_mnc['mnc'], df_reports_per_mnc['report_count'], strict=False)) +# Viz 24 : mnc tracked +def mnc_tracked(df: pd.DataFrame) -> go.Figure: + """Compute and plot the list of company name in a word cloud where the size of the font depends of the number + of reports available. - return mnc_report_count + Args: + df (pd.DataFrame): CbCRs database. + Returns: + go.Figure: word cloud with company name in a Plotly figure. + """ -def display_number_of_tracked_mnc_available(df) -> go.Figure: - mnc_report_count = compute_number_of_tracked_mnc_available(df=df) + # Create dictionnary with company name as key and the number of reports as value + data = df.groupby("mnc")["year"].nunique().to_dict() color_func = get_single_color_func("#B8BEDB") # Generate the word cloud using the report counts as weights wordcloud = WordCloud( - width=1200, - height=800, - background_color='white', - color_func=color_func - ).generate_from_frequencies(mnc_report_count) + width=1200, height=800, background_color="white", color_func=color_func + ).generate_from_frequencies(data) # Display the word cloud fig = px.imshow(wordcloud) # Remove hover on image - fig.update_traces(hoverinfo='skip', hovertemplate='') + fig.update_traces(hoverinfo="skip", hovertemplate="") # Remove colorbar fig.update_layout(coloraxis_showscale=False) @@ -1146,20 +965,20 @@ def display_number_of_tracked_mnc_available(df) -> go.Figure: return go.Figure(fig) -# Viz 25 +# Viz 25 : company’s average transparency score # List financial columns financial_columns = [ - 'total_revenues', - 'profit_before_tax', - 'tax_paid', - 'tax_accrued', - 'unrelated_revenues', - 'related_revenues', - 'stated_capital', - 'accumulated_earnings', - 'tangible_assets', - 'employees' + "total_revenues", + "profit_before_tax", + "tax_paid", + "tax_accrued", + "unrelated_revenues", + "related_revenues", + "stated_capital", + "accumulated_earnings", + "tangible_assets", + "employees", ] @@ -1177,12 +996,12 @@ def compute_geographic_score(df: pd.DataFrame, company: str, year: int) -> float # Filter rows with selected company and subset with financial columns df = df.loc[ - (df['mnc'] == company) & (df['year'] == year), - ['mnc', 'year', 'upe_code', 'jur_code', 'jur_name', *financial_columns] + (df["mnc"] == company) & (df["year"] == year), + ["mnc", "year", "upe_code", "jur_code", "jur_name", *financial_columns], ] # Remove columns where data are missing for all jurisdictions - df = df.dropna(axis='columns', how='all') + df = df.dropna(axis="columns", how="all") # List financial columns left after deleting columns with only missing values financial_columns_left = [col for col in df.columns if col in financial_columns] @@ -1197,8 +1016,8 @@ def compute_geographic_score(df: pd.DataFrame, company: str, year: int) -> float # Calculate percentage of each financial value where jurisdiction is 'OTHER' # Percentage = 1. Total of 'OTHER' row(s) / 2. Total of all rows other_percentage = ( - df.loc[df['jur_code'] == 'OTHER', financial_columns_left].sum() # 1 - / df[financial_columns_left].sum() # 2 + df.loc[df["jur_code"] == "OTHER", financial_columns_left].sum() # 1 + / df[financial_columns_left].sum() # 2 ) # Calculate geographic score @@ -1222,12 +1041,12 @@ def compute_completeness_score(df: pd.DataFrame, company: str, year: int) -> flo # Filter rows with selected company and subset with financial columns df = df.loc[ - (df['mnc'] == company) & (df['year'] == year), - ['mnc', 'year', 'upe_code', 'jur_code', 'jur_name', *financial_columns] + (df["mnc"] == company) & (df["year"] == year), + ["mnc", "year", "upe_code", "jur_code", "jur_name", *financial_columns], ] # Remove columns where data are missing for all jurisdictions - df = df.dropna(axis='columns', how='all') + df = df.dropna(axis="columns", how="all") # List financial columns left after deleting columns with only missing values financial_columns_left = [col for col in df.columns if col in financial_columns] @@ -1243,7 +1062,7 @@ def compute_completeness_score(df: pd.DataFrame, company: str, year: int) -> flo score = len(financial_columns_left) - for variable in ['profit_before_tax', 'tax_paid']: + for variable in ["profit_before_tax", "tax_paid"]: if variable in df.columns: score += 1 @@ -1267,12 +1086,12 @@ def compute_transparency_score(df: pd.DataFrame, company: str, year: int) -> flo # Filter rows with selected company and subset with financial columns df = df.loc[ - (df['mnc'] == company) & (df['year'] == year), - ['mnc', 'year', 'upe_code', 'jur_code', 'jur_name', *financial_columns] + (df["mnc"] == company) & (df["year"] == year), + ["mnc", "year", "upe_code", "jur_code", "jur_name", *financial_columns], ] # Remove columns where data are missing for all jurisdictions - df = df.dropna(axis='columns', how='all') + df = df.dropna(axis="columns", how="all") # List financial columns left after deleting columns with only missing values financial_columns_left = [col for col in df.columns if col in financial_columns] @@ -1287,8 +1106,8 @@ def compute_transparency_score(df: pd.DataFrame, company: str, year: int) -> flo # Calculate percentage of each financial value where jurisdiction is not 'OTHER' # Percentage = 1. Total of not 'OTHER' row(s) / 2. Total of all rows not_other_percentage = ( - df.loc[df['jur_code'] != 'OTHER', financial_columns_left].sum() # 1 - / df[financial_columns_left].sum() # 2 + df.loc[df["jur_code"] != "OTHER", financial_columns_left].sum() # 1 + / df[financial_columns_left].sum() # 2 ) # Calculate transparency score @@ -1310,7 +1129,7 @@ def compute_all_scores(df: pd.DataFrame, company: str) -> dict: """ # List all years when the company as reported - years_list = sorted(df.loc[df['mnc'] == company, 'year'].unique()) + years_list = sorted(df.loc[df["mnc"] == company, "year"].unique()) # Initialize an empty dictionary data = dict() @@ -1323,17 +1142,16 @@ def compute_all_scores(df: pd.DataFrame, company: str) -> dict: transparency_score = compute_transparency_score(df=df, company=company, year=year) data[year] = { - 'mnc': company, - 'geographic_score': geographic_score, - 'completeness_score': completeness_score, - 'transparency_score': transparency_score + "mnc": company, + "geographic_score": geographic_score, + "completeness_score": completeness_score, + "transparency_score": transparency_score, } return data -def transparency_scores_to_csv( - df: pd.DataFrame, csv_path: str = './') -> pd.DataFrame: +def transparency_scores_to_csv(df: pd.DataFrame, csv_path: str = "./") -> pd.DataFrame: """Compute transparency score for all companies and all years into a DataFrame and export it to a csv file (optional). @@ -1346,30 +1164,29 @@ def transparency_scores_to_csv( """ # List all companies - mnc_list = df['mnc'].unique() + mnc_list = df["mnc"].unique() # Initialize an empty DataFrame mnc_df = pd.DataFrame() # Calculate transparency scores for all companies and add them to the DataFrame for mnc in mnc_list: - temp_df = pd.DataFrame.from_dict( - compute_all_scores(df=df, company=mnc), orient='index') + temp_df = pd.DataFrame.from_dict(compute_all_scores(df=df, company=mnc), orient="index") mnc_df = pd.concat([mnc_df, temp_df]) # Reset index and move 'mnc' columns in first position - mnc_df = mnc_df.reset_index().rename(columns={'index': 'year'}) - mnc_df.insert(0, 'mnc', mnc_df.pop('mnc')) + mnc_df = mnc_df.reset_index().rename(columns={"index": "year"}) + mnc_df.insert(0, "mnc", mnc_df.pop("mnc")) if csv_path: - mnc_df.to_csv(csv_path + 'transparency_scores.csv', index=False) + mnc_df.to_csv(csv_path + "transparency_scores.csv", index=False) return mnc_df -def display_transparency_score(df: pd.DataFrame, company: str, year: int = None): - """Display transparency score for specific company in a metric. +def transparency_score(df: pd.DataFrame, company: str, year: int = None): + """Compute transparency score for specific company in a metric. Args: df (pd.DataFrame): CbCRs database. @@ -1381,51 +1198,25 @@ def display_transparency_score(df: pd.DataFrame, company: str, year: int = None) data = compute_all_scores(df=df, company=company) # Create DataFrame - df = pd.DataFrame.from_dict(data, orient='index') + df = pd.DataFrame.from_dict(data, orient="index") # Reset index and move 'mnc' columns in first position - df = df.reset_index().rename(columns={'index': 'year'}) + df = df.reset_index().rename(columns={"index": "year"}) # When data are not filtered by year, the score is the average of all years score = round( - df.loc[df['year'] == year, 'transparency_score'].iloc[0] if year - else df['transparency_score'].mean(), - 0 - ) - - # Create figure - fig = go.Figure() - - # Add circular background - fig.add_shape( - type='circle', - x0=0, y0=0, x1=1, y1=1, - line_color='blue', - fillcolor='blue', - opacity=0.3 + df.loc[df["year"] == year, "transparency_score"].iloc[0] + if year + else df["transparency_score"].mean(), + 0, ) - # Add indicator - fig.add_trace(go.Indicator( - mode='number', - value=score, - number={'suffix': '%', 'valueformat': '.0f', 'font': {'size': 54}}, - domain={'x': [0, 1], 'y': [0, 1]} - )) - - # Update layout - fig.update_layout( - width=360, - height=360) - return score -# Viz 26 - +# Viz 26 : company’s transparency score over time + details for each component of the score # Functions below use the same computation function (compute_all_scores) as used for Viz 25. - -def display_transparency_score_over_time(df: pd.DataFrame, company: str): +def transparency_score_over_time(df: pd.DataFrame, company: str): """Display transparency scores over time for a specific company in a bar chart. @@ -1438,82 +1229,76 @@ def display_transparency_score_over_time(df: pd.DataFrame, company: str): data = compute_all_scores(df=df, company=company) # Create DataFrame - df = pd.DataFrame.from_dict(data, orient='index') + df = pd.DataFrame.from_dict(data, orient="index") # Reset index and move 'mnc' columns in first position - df = df.reset_index().rename(columns={'index': 'year'}) + df = df.reset_index().rename(columns={"index": "year"}) # Create figure - fig = px.bar(df, x='year', y='transparency_score', - text_auto='.0f' - ) + fig = px.bar(df, x="year", y="transparency_score", text_auto=".0f") # Update layout settings fig.update_layout( - title='Transparency score over time', - xaxis=dict( - title=None, - tickvals=df['year'].unique() - ), + template=custom_template, + xaxis=dict(title=None, tickvals=df["year"].unique()), yaxis=dict( title=None, showline=True, - ticks='outside', - linecolor='grey', - tickcolor='grey', + ticks="outside", + linecolor="grey", + tickcolor="grey", range=[0, 101], tickvals=[0, 25, 50, 75, 100], - ticktext=[0, '', '', '', 100] + ticktext=[0, "", "", "", 100], ), - plot_bgcolor='white', - width=800, - height=480 ) # Force position and color of bar values - fig.update_traces( - textposition='outside', textfont=dict(color=fig.data[0].marker.color) - ) + fig.update_traces(textposition="outside", textfont=dict(color=fig.data[0].marker.color)) fig.show() -def display_transparency_score_over_time_details( - df: pd.DataFrame, company: str) -> pd.DataFrame: - """Display details of components of transparency scores over time - for a specific company in a table. +def transparency_scores_over_time_details(df: pd.DataFrame, company: str) -> pd.DataFrame: + """Compute all geographic, completeness and general transparency scores over time for a specific company in a table. Args: df (pd.DataFrame): CbCRs database. company (str): Company name. Returns: - pd.DataFrame: Table with details of components over years. + pd.DataFrame: Table with details of scores over years. """ # Compute data data = compute_all_scores(df=df, company=company) # Create DataFrame - df = pd.DataFrame.from_dict(data, orient='index') + df = pd.DataFrame.from_dict(data, orient="index") # Drop 'mnc' column - df = df.drop(columns='mnc') + df = df.drop(columns="mnc") # Round and convert percentage to string with '/100' annotation - df = df.apply(lambda x: round(x).astype(int).astype('string') + '/100') + df = df.apply(lambda x: round(x).astype(int).astype("string") + "/100") # Reset index and rename 'year' column - df = df.reset_index().rename(columns={'index': 'Fiscal year'}) + df = df.reset_index().rename(columns={"index": "Fiscal year"}) # Move 'transparency_score' before other score columns - df.insert(1, 'transparency_score', df.pop('transparency_score')) + df.insert(1, "transparency_score", df.pop("transparency_score")) # Rename columns - df = df.rename(columns={ - 'geographic_score': 'Score on geographical disaggretion', - 'completeness_score': 'Score on variable exhaustiveness', - 'transparency_score': 'Transparency score', - }) + df = df.rename( + columns={ + "geographic_score": "Score on geographical disaggretion", + "completeness_score": "Score on variable exhaustiveness", + "transparency_score": "Transparency score", + } + ) return df + + +# Viz 27 : average transparency score over time +# TODO add code diff --git a/app/pages/company/company.md b/app/pages/company/company.md index bc34ce9..a6823f7 100644 --- a/app/pages/company/company.md +++ b/app/pages/company/company.md @@ -129,15 +129,15 @@ Financial profile <|part|class_name=viz-container| <|{viz["fin_key_financials_kpis"].title}|text|class_name=text-weight400|>
-<|{viz["fin_key_financials_kpis"].sub_title}|text|class_name=text-small text-weight300 text-transparent|> +<|{viz["fin_key_financials_kpis"].sub_title}|text|class_name=text-small text-weight300|> <|{viz["fin_key_financials_kpis"].data}|table|show_all|sortable=False|dynamic=True|style=table-cell|class_name=rows-similar table-top|> |> <|part|class_name=viz-container| -<|{viz["fin_jurisdictions_top_revenue"].title}|text|class_name=text-weight400|> +<|{viz["fin_top_jurisdictions_revenue"].title}|text|class_name=text-weight400|>
-<|{viz["fin_jurisdictions_top_revenue"].sub_title}|text|class_name=text-small text-weight300|> -<|chart|figure={viz["fin_jurisdictions_top_revenue"].fig}|> +<|{viz["fin_top_jurisdictions_revenue"].sub_title}|text|class_name=text-small text-weight300|> +<|chart|figure={viz["fin_top_jurisdictions_revenue"].fig}|> |> |> diff --git a/app/pages/company/company.py b/app/pages/company/company.py index 6871428..0ce5b11 100644 --- a/app/pages/company/company.py +++ b/app/pages/company/company.py @@ -35,7 +35,7 @@ "fin_transparency_score", "fin_transparency_score_over_time_details", "fin_key_financials_kpis", - "fin_jurisdictions_top_revenue", + "fin_top_jurisdictions_revenue", "fin_pretax_profit_and_employees_rank", "fin_pretax_profit_and_profit_per_employee", } @@ -92,8 +92,8 @@ def update_state(state: State): # print(f'company state selected_year:{state.selected_year}') # Calculate number of reports for all companies - state.df_count_company = algo.number_of_tracked_reports_over_time_company( - state.df_selected_company + state.df_count_company = algo.number_of_tracked_reports( + state.df_selected_company, "mnc", state.selected_company ) # print(f'company state df_count_company:{state.df_count_company.head()}') @@ -128,7 +128,9 @@ def update_viz_company(state: State): state.viz[id] = Viz( id=id, state=state, - data=algo.number_of_tracked_reports_company(state.df_selected_company), + data=algo.number_of_tracked_reports( + state.df_selected_company, "mnc", state.selected_company + ), title="Number of reports", ).to_state() # print(f'update viz id:{id} title:{state.viz[id].title}') @@ -137,7 +139,7 @@ def update_viz_company(state: State): state.viz[id] = Viz( id=id, state=state, - data=algo.display_transparency_score(state.data, state.selected_company), + data=algo.transparency_score(state.data, state.selected_company), title="Transparency Score", sub_title="average over all reports", ).to_state() @@ -163,7 +165,7 @@ def update_viz_year(state: State): state.viz[id] = Viz( id=id, state=state, - data=algo.display_transparency_score( + data=algo.transparency_score( state.data, state.selected_company, int(state.selected_year) ), title="Transparency Score", @@ -175,7 +177,7 @@ def update_viz_year(state: State): state.viz[id] = Viz( id=id, state=state, - data=algo.display_transparency_score_over_time_details( + data=algo.transparency_scores_over_time_details( state.data, state.selected_company ), title="Transparency score over time ", @@ -187,7 +189,7 @@ def update_viz_year(state: State): state.viz[id] = Viz( id=id, state=state, - data=algo.display_company_key_financials_kpis( + data=algo.company_key_financials_kpis( state.data, state.selected_company, int(state.selected_year) ), title="Key metrics", @@ -195,11 +197,11 @@ def update_viz_year(state: State): ).to_state() # print(f'update viz id:{id} title:{state.viz[id].title}') - id = "fin_jurisdictions_top_revenue" + id = "fin_top_jurisdictions_revenue" state.viz[id] = Viz( id=id, state=state, - fig=algo.display_jurisdictions_top_revenue( + fig=algo.top_jurisdictions_revenue( state.data, state.selected_company, int(state.selected_year) ), title="Distribution of revenues across countries", @@ -212,7 +214,7 @@ def update_viz_year(state: State): state.viz[id] = Viz( id=id, state=state, - fig=algo.display_pretax_profit_and_employees_rank( + fig=algo.pretax_profit_and_employees_rank( state.data, state.selected_company, int(state.selected_year) ), title="% profit and employees by country", @@ -224,7 +226,7 @@ def update_viz_year(state: State): state.viz[id] = Viz( id=id, state=state, - fig=algo.display_pretax_profit_and_profit_per_employee( + fig=algo.pretax_profit_and_profit_per_employee( state.data, state.selected_company, int(state.selected_year) ), title="% profit and profit / employee by country", diff --git a/app/pages/home/home.md b/app/pages/home/home.md index 0177c81..8ac7581 100644 --- a/app/pages/home/home.md +++ b/app/pages/home/home.md @@ -108,10 +108,10 @@ Our database is growing |> <|part|class_name=viz-container| -<|{viz["general_number_of_tracked_mnc_available"].title}|text|class_name=text-weight400|> +<|{viz["general_list_of_tracked_mnc_available"].title}|text|class_name=text-weight400|>
-<|{viz["general_number_of_tracked_mnc_available"].sub_title}|text|class_name=text-small text-weight300|> -<|chart|figure={viz["general_number_of_tracked_mnc_available"].fig}|> +<|{viz["general_list_of_tracked_mnc_available"].sub_title}|text|class_name=text-small text-weight300|> +<|chart|figure={viz["general_list_of_tracked_mnc_available"].fig}|> |> |> diff --git a/app/pages/home/home.py b/app/pages/home/home.py index 3188cea..baeac96 100644 --- a/app/pages/home/home.py +++ b/app/pages/home/home.py @@ -29,7 +29,7 @@ def on_init(state: State): "general_number_of_tracked_reports", "general_number_of_tracked_reports_over_time", "general_number_of_tracked_mnc", - "general_number_of_tracked_mnc_available", + "general_list_of_tracked_mnc_available", ) ) @@ -50,8 +50,7 @@ def update_viz(state: State): state.viz[id] = Viz( id=id, state=state, - data=algo.number_of_tracked_reports_over_time(state.data), - fig=algo.display_number_of_tracked_reports_over_time(state.data), + fig=algo.number_of_tracked_reports_over_time(state.data), title="Number of reports over time", ).to_state() @@ -64,12 +63,11 @@ def update_viz(state: State): sub_title="with 1+ report tracked", ).to_state() - id = "general_number_of_tracked_mnc_available" + id = "general_list_of_tracked_mnc_available" state.viz[id] = Viz( id=id, state=state, - data=algo.compute_number_of_tracked_mnc_available(state.data), - fig=algo.display_number_of_tracked_mnc_available(state.data), + fig=algo.mnc_tracked(state.data), title="Multinationals available", sub_title="with 1+ report tracked", ).to_state()