From f6ecfdbd920d93f6e6d0b795d7a9f4aef96d27ac Mon Sep 17 00:00:00 2001 From: anquetos Date: Mon, 17 Jun 2024 14:19:49 +0200 Subject: [PATCH 1/6] Apply Ruff --- app/algo.py | 928 ++++++++++++++++++++++++++-------------------------- 1 file changed, 465 insertions(+), 463 deletions(-) diff --git a/app/algo.py b/app/algo.py index 6f9354c..5ca4795 100644 --- a/app/algo.py +++ b/app/algo.py @@ -8,6 +8,7 @@ # Define color sequence for plots COLOR_SEQUENCE = ["#D9D9D9", "#1E2E5C"] + # TODO add viz comment # Viz 1 - def number_of_tracked_reports(df): @@ -17,9 +18,7 @@ def number_of_tracked_reports(df): # TODO add viz comment def number_of_tracked_reports_company(df_selected_company): - number_of_tracked_reports_company = len( - df_selected_company.groupby(["year"])["year"] - ) + number_of_tracked_reports_company = len(df_selected_company.groupby(["year"])["year"]) return number_of_tracked_reports_company @@ -29,9 +28,7 @@ def number_of_tracked_mnc(df: pd.DataFrame) -> int: # TODO add viz comment def number_of_tracked_reports_sector(df_selected_sector): - number_of_tracked_reports_sector = len( - df_selected_sector.groupby(["year", "mnc"])["year"] - ) + number_of_tracked_reports_sector = len(df_selected_sector.groupby(["year", "mnc"])["year"]) return number_of_tracked_reports_sector @@ -55,38 +52,27 @@ def display_number_of_tracked_reports_over_time(df): data = number_of_tracked_reports_over_time(df=df) # Bar color sequence - bar_color = '#D9D9D9' + bar_color = "#D9D9D9" # Create figure - fig = px.bar( - data, - x='year', - y='mnc', - color_discrete_sequence=[bar_color], - text_auto=True - ) + fig = px.bar(data, x="year", y="mnc", color_discrete_sequence=[bar_color], text_auto=True) # Force position and color of bar values - fig.update_traces( - textposition='outside', textfont=dict(color='black') - ) + fig.update_traces(textposition="outside", textfont=dict(color="black")) # Update layout settings fig.update_layout( autosize=True, height=360, - font_family='Roboto', + font_family="Roboto", title=None, - xaxis=dict( - title=None, - tickvals=data['year'].unique() - ), + xaxis=dict(title=None, tickvals=data["year"].unique()), yaxis=dict( title=None, visible=False, ), - plot_bgcolor='white', - margin=dict(l=0, r=0, b=0, t=0) + plot_bgcolor="white", + margin=dict(l=0, r=0, b=0, t=0), ) # Define style of hover on bars @@ -99,9 +85,7 @@ def display_number_of_tracked_reports_over_time(df): # TODO add viz comment def number_of_tracked_reports_over_time_company(df_selected_company): - df_count_company = ( - df_selected_company.groupby(["year"])["mnc"].nunique().reset_index() - ) + df_count_company = df_selected_company.groupby(["year"])["mnc"].nunique().reset_index() # df_count_all_company = df.groupby(["year"])["mnc"].nunique().reset_index() # row[3].line_chart(df_count_all_company, x="year", y="mnc") @@ -115,9 +99,7 @@ def number_of_tracked_reports_over_time_company(df_selected_company): # TODO add viz comment def number_of_tracked_reports_over_time_sector(df_selected_sector): - df_count_sector = ( - df_selected_sector.groupby(["year"])["mnc"].nunique().reset_index() - ) + df_count_sector = df_selected_sector.groupby(["year"])["mnc"].nunique().reset_index() # df_count_all_sector = ( # df.groupby(["year", "sector"])["mnc"].nunique().reset_index() @@ -134,9 +116,7 @@ def number_of_tracked_reports_over_time_sector(df_selected_sector): # TODO add viz comment def number_of_tracked_reports_over_time_country(df_selected_country): - df_count_country = ( - df_selected_country.groupby(["year"])["mnc"].nunique().reset_index() - ) + df_count_country = df_selected_country.groupby(["year"])["mnc"].nunique().reset_index() # df_count_all_country = ( # df.groupby(["year", "jur_name"])["mnc"].nunique().reset_index() # ) @@ -147,14 +127,15 @@ def number_of_tracked_reports_over_time_country(df_selected_country): # Viz 16 + # company’s % pre-tax profit and profit per employee # plot chart : x-axis = % profit, y axis = profit / employee # size of the bubble based on % profit and a color code for # tax havens vs others def company_pourcentage_pretax_profit_and_profit_per_employee(df_selected_company): # pretax_profit_col_name = 'profit_before_tax' - profit_col_name = '' - employee_col_name = 'employees' + profit_col_name = "" + employee_col_name = "employees" df_selected_company[profit_col_name] / df_selected_company[employee_col_name] @@ -167,57 +148,50 @@ def company_pourcentage_pretax_profit_and_profit_per_employee(df_selected_compan # % related party revenue # for domestic vs tax havens vs. non havens def tax_haven_used_by_company(df_selected_company): - company_upe_code = df_selected_company['upe_code'].unique()[0] - pc_list = ['employees', 'profit_before_tax', 'related_revenues'] + company_upe_code = df_selected_company["upe_code"].unique()[0] + pc_list = ["employees", "profit_before_tax", "related_revenues"] # grouper = df_selected_company.groupby('jur_name') - + df = pd.DataFrame(df_selected_company) - - df_domestic_company = df[df['jur_code'] == company_upe_code] - df_selected_company_th = df[df['jur_tax_haven'] != 'not.TH'] - df_selected_company_nth = df[df['jur_tax_haven'] == 'not.TH'] - + + df_domestic_company = df[df["jur_code"] == company_upe_code] + df_selected_company_th = df[df["jur_tax_haven"] != "not.TH"] + df_selected_company_nth = df[df["jur_tax_haven"] == "not.TH"] + for col in pc_list: - df.insert( len(df_selected_company.columns), - col + '_domestic_sum', - df_domestic_company[col].sum()) + col + "_domestic_sum", + df_domestic_company[col].sum(), + ) df.insert( - len(df_selected_company.columns), - col + '_th_sum', - df_selected_company_th[col].sum()) + len(df_selected_company.columns), col + "_th_sum", df_selected_company_th[col].sum() + ) - df.insert( - len(df.columns), - col + '_nth_sum', - df_selected_company_nth[col].sum()) + df.insert(len(df.columns), col + "_nth_sum", df_selected_company_nth[col].sum()) - df.insert( - len(df.columns), - col + '_sum', - df_selected_company[col].sum()) + df.insert(len(df.columns), col + "_sum", df_selected_company[col].sum()) - df.insert( - len(df.columns), - col + '_pc', - 100 * df[col] / df[col + '_sum']) + df.insert(len(df.columns), col + "_pc", 100 * df[col] / df[col + "_sum"]) # df_selected_company[col + '_pc'] = 100 * df_selected_company[col] / df_selected_company[col+'_sum'] - df_selected_company_th = df[df['jur_tax_haven'] != 'not.TH'] - df_selected_company_th_agg = df_selected_company_th.groupby(['mnc', 'jur_name']).agg( - profit_before_tax=('profit_before_tax', 'sum'), - profit_before_tax_pc=('profit_before_tax_pc', 'sum'), - employees_pc=('employees_pc', 'sum'), - employees=('employees', 'sum'), - related_revenues_pc=('related_revenues_pc', 'sum') + df_selected_company_th = df[df["jur_tax_haven"] != "not.TH"] + df_selected_company_th_agg = df_selected_company_th.groupby(["mnc", "jur_name"]).agg( + profit_before_tax=("profit_before_tax", "sum"), + profit_before_tax_pc=("profit_before_tax_pc", "sum"), + employees_pc=("employees_pc", "sum"), + employees=("employees", "sum"), + related_revenues_pc=("related_revenues_pc", "sum"), ) df_selected_company_th_agg = df_selected_company_th_agg.reset_index() - df_selected_company_th_agg['profit per employee'] = \ - df_selected_company_th_agg['profit_before_tax'] / df_selected_company_th_agg['employees'] - df_selected_company_th_agg['profit per employee'] = df_selected_company_th_agg['profit per employee'].replace( - [np.inf, -np.inf], None) + df_selected_company_th_agg["profit per employee"] = ( + df_selected_company_th_agg["profit_before_tax"] + / df_selected_company_th_agg["employees"] + ) + df_selected_company_th_agg["profit per employee"] = df_selected_company_th_agg[ + "profit per employee" + ].replace([np.inf, -np.inf], None) return df_selected_company, df_selected_company_th_agg @@ -226,73 +200,86 @@ def tax_haven_used_by_company(df_selected_company): # complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for each (color code for tax havens) def company_table(df_selected_company): # company_upe_code = df_selected_company['upe_code'].unique()[0] - pc_list = ['employees', 'profit_before_tax', 'unrelated_revenues', 'related_revenues', 'total_revenues', 'tax_paid'] - + pc_list = [ + "employees", + "profit_before_tax", + "unrelated_revenues", + "related_revenues", + "total_revenues", + "tax_paid", + ] + df = pd.DataFrame(df_selected_company) for col in pc_list: - if col + '_sum' not in df.columns: - df.insert( - len(df.columns), - col + '_sum', - df[col].sum()) - - df.insert( - len(df.columns), - col + '_pc', - 100 * df[col] / df[col + '_sum']) + if col + "_sum" not in df.columns: + df.insert(len(df.columns), col + "_sum", df[col].sum()) + + df.insert(len(df.columns), col + "_pc", 100 * df[col] / df[col + "_sum"]) # f_selected_company[col + '_sum'] = df_selected_company[col].sum() # df_selected_company[col + '_pc'] = 100 * df_selected_company[col] / df_selected_company[col + '_sum'] # complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for each (color code for tax havens) - df_selected_company_by_jur = df.groupby(['mnc', 'jur_name']).agg( - related_revenues_pc=('related_revenues_pc', 'sum'), - unrelated_revenues=('unrelated_revenues', 'sum'), - total_revenues=('total_revenues', 'sum'), - profit_before_tax=('profit_before_tax', 'sum'), - employees_pc=('employees_pc', 'sum'), - tax_paid=('tax_paid', 'sum'), - tax_paid_pc=('tax_paid_pc', 'sum'), + df_selected_company_by_jur = df.groupby(["mnc", "jur_name"]).agg( + related_revenues_pc=("related_revenues_pc", "sum"), + unrelated_revenues=("unrelated_revenues", "sum"), + total_revenues=("total_revenues", "sum"), + profit_before_tax=("profit_before_tax", "sum"), + employees_pc=("employees_pc", "sum"), + tax_paid=("tax_paid", "sum"), + tax_paid_pc=("tax_paid_pc", "sum"), ) return df_selected_company_by_jur.reset_index() # Viz 4 - Breakdown of reports by sector (pie chart) def breakdown_of_reports_by_sector(df): - #Dataframe called df - df_reports_per_sector_year = df.groupby(['sector', 'year'])['mnc'].nunique().reset_index( - name='unique_company_count') + # Dataframe called df + df_reports_per_sector_year = ( + df.groupby(["sector", "year"])["mnc"].nunique().reset_index(name="unique_company_count") + ) # Aggregate the counts of unique companies across all years for each sector - df_reports_per_sector = df_reports_per_sector_year.groupby('sector')['unique_company_count'].sum().reset_index() + df_reports_per_sector = ( + df_reports_per_sector_year.groupby("sector")["unique_company_count"].sum().reset_index() + ) # Calculate the total count of unique companies across all sectors - total_companies = df_reports_per_sector['unique_company_count'].sum() + total_companies = df_reports_per_sector["unique_company_count"].sum() # Calculate the percentage of each sector's count relative to the total count and round to 2 decimals - df_reports_per_sector['percent'] = ((df_reports_per_sector['unique_company_count'] / total_companies) * 100).round( - 2) + df_reports_per_sector["percent"] = ( + (df_reports_per_sector["unique_company_count"] / total_companies) * 100 + ).round(2) # Sort the DataFrame by the count of unique companies in ascending order - df_reports_per_sector = df_reports_per_sector.sort_values(by='unique_company_count', ascending=True) + df_reports_per_sector = df_reports_per_sector.sort_values( + by="unique_company_count", ascending=True + ) return df_reports_per_sector def breakdown_of_reports_by_sector_viz(df_reports_per_sector): # Plotting the horizontal bar chart with Plotly Express - fig = px.bar(df_reports_per_sector, y='sector', x='percent', - orientation='h', # Horizontal orientation - title='Breakdown of Reports by Sector (All Years)', - labels={'percent': 'Percentage of Companies (%)', 'sector': 'Sector'}, - text='percent', # Show the percentage as text label - hover_data={'unique_company_count': True, 'percent': ':.2f%'}, - # Add tooltip for count and rounded percentage - ) + fig = px.bar( + df_reports_per_sector, + y="sector", + x="percent", + orientation="h", # Horizontal orientation + title="Breakdown of Reports by Sector (All Years)", + labels={"percent": "Percentage of Companies (%)", "sector": "Sector"}, + text="percent", # Show the percentage as text label + hover_data={"unique_company_count": True, "percent": ":.2f%"}, + # Add tooltip for count and rounded percentage + ) # Update layout to display the title above the chart - fig.update_layout(title='Breakdown of Reports by Sector', - title_x=0.5, title_y=0.9, # Adjust position - title_font_size=20) # Adjust font size + fig.update_layout( + title="Breakdown of Reports by Sector", + title_x=0.5, + title_y=0.9, # Adjust position + title_font_size=20, + ) # Adjust font size # Show the horizontal bar chart return go.Figure(fig) @@ -301,40 +288,56 @@ def breakdown_of_reports_by_sector_viz(df_reports_per_sector): # Viz 5 - Breakdown of reports by HQ country (pie chart) def breakdown_of_reports_by_hq_country(df): # Group the DataFrame by 'upe_name' (HQ country) and 'year' and count the number of unique companies for each HQ country and year - df_reports_per_country_year = df.groupby(['upe_name', 'year'])['mnc'].nunique().reset_index( - name='unique_company_count') + df_reports_per_country_year = ( + df.groupby(["upe_name", "year"])["mnc"] + .nunique() + .reset_index(name="unique_company_count") + ) # Aggregate the counts of unique companies across all years for each HQ country - df_reports_per_country = df_reports_per_country_year.groupby('upe_name')['unique_company_count'].sum().reset_index() + df_reports_per_country = ( + df_reports_per_country_year.groupby("upe_name")["unique_company_count"] + .sum() + .reset_index() + ) # Calculate the total count of unique companies across all HQ countries - total_companies = df_reports_per_country['unique_company_count'].sum() + total_companies = df_reports_per_country["unique_company_count"].sum() # Calculate the percentage of each HQ country's count relative to the total count and round to 2 decimals - df_reports_per_country['percent'] = ( - (df_reports_per_country['unique_company_count'] / total_companies) * 100).round(2) + df_reports_per_country["percent"] = ( + (df_reports_per_country["unique_company_count"] / total_companies) * 100 + ).round(2) # Sort the DataFrame by the count of unique companies in ascending order - df_reports_per_country = df_reports_per_country.sort_values(by='unique_company_count', ascending=True) + df_reports_per_country = df_reports_per_country.sort_values( + by="unique_company_count", ascending=True + ) return df_reports_per_country def breakdown_of_reports_by_hq_country_viz(df_reports_per_country): # Plotting the horizontal bar chart with Plotly Express - fig = px.bar(df_reports_per_country, y='upe_name', x='percent', - orientation='h', # Horizontal orientation - title='Breakdown of Reports by HQ Country over Time', - labels={'percent': 'Percentage of Companies (%)', 'upe_name': 'HQ Country'}, - text='percent', # Show the percentage as text label - hover_data={'unique_company_count': True, 'percent': ':.2f%'}, - # Add tooltip for count and rounded percentage - ) + fig = px.bar( + df_reports_per_country, + y="upe_name", + x="percent", + orientation="h", # Horizontal orientation + title="Breakdown of Reports by HQ Country over Time", + labels={"percent": "Percentage of Companies (%)", "upe_name": "HQ Country"}, + text="percent", # Show the percentage as text label + hover_data={"unique_company_count": True, "percent": ":.2f%"}, + # Add tooltip for count and rounded percentage + ) # Update layout to display the title above the chart - fig.update_layout(title='Breakdown of Reports by HQ Country over Time', - title_x=0.5, title_y=0.95, # Adjust position - title_font_size=20) # Adjust font size + fig.update_layout( + title="Breakdown of Reports by HQ Country over Time", + title_x=0.5, + title_y=0.95, # Adjust position + title_font_size=20, + ) # Adjust font size # Show the horizontal bar chart # fig.show() @@ -349,42 +352,52 @@ def breakdown_of_reports_by_sector_over_time(df): # return df_reports_per_sector_over_time # Step 1: Determine the top 10 sectors that released reports - top_10_sectors = df['sector'].value_counts().nlargest(10).index.tolist() + top_10_sectors = df["sector"].value_counts().nlargest(10).index.tolist() # Step 2: Group all other sectors as "Others" - df['Sectors'] = df['sector'].apply(lambda x: x if x in top_10_sectors else 'Others') + df["Sectors"] = df["sector"].apply(lambda x: x if x in top_10_sectors else "Others") # Step 3: Group the DataFrame by 'year', 'Sectors', and count the number of unique companies for each year and sector - df_reports_per_year_sector = df.groupby(['year', 'Sectors'])['mnc'].nunique().reset_index( - name='unique_company_count') + df_reports_per_year_sector = ( + df.groupby(["year", "Sectors"])["mnc"] + .nunique() + .reset_index(name="unique_company_count") + ) # Sort sectors alphabetically - df_reports_per_year_sector = df_reports_per_year_sector.sort_values(by='Sectors', ascending=False) + df_reports_per_year_sector = df_reports_per_year_sector.sort_values( + by="Sectors", ascending=False + ) return df_reports_per_year_sector, top_10_sectors def breakdown_of_reports_by_sector_over_time_viz(df_reports_per_year_sector, top_10_sectors): # Define the order of sectors for the stacked bar chart and legend, reversed - chart_order = ['Others'] + top_10_sectors[::-1] - legend_order = ['Others'] + top_10_sectors[::-1] + chart_order = ["Others"] + top_10_sectors[::-1] + legend_order = ["Others"] + top_10_sectors[::-1] # Plotting the bar chart using Plotly Express - fig = px.bar(df_reports_per_year_sector, x='year', y='unique_company_count', color='Sectors', - title='Breakdown of Reports by Sector over Time', - labels={'unique_company_count': 'Number of Companies Reporting', 'year': 'Year'}, - barmode='stack', - category_orders={'Sectors': chart_order}) + fig = px.bar( + df_reports_per_year_sector, + x="year", + y="unique_company_count", + color="Sectors", + title="Breakdown of Reports by Sector over Time", + labels={"unique_company_count": "Number of Companies Reporting", "year": "Year"}, + barmode="stack", + category_orders={"Sectors": chart_order}, + ) # Reverse the order of legend items - fig.update_layout(legend=dict(traceorder='reversed')) + fig.update_layout(legend=dict(traceorder="reversed")) # Adjusting the legend order and formatting the legend labels for i, trace in enumerate(fig.data): trace.name = legend_order[i] # Change color of the "Others" bar to grey - if trace.name == 'Others': - trace.marker.color = 'grey' + if trace.name == "Others": + trace.marker.color = "grey" # Show the plot # fig.show() @@ -406,6 +419,7 @@ def breakdown_of_reports_by_sector_over_time_viz(df_reports_per_year_sector, top ## Viz 11 - Breakdown of MNC by HQ country # TODO add code + # Viz 12 - available reports by company def compute_company_available_reports(df: pd.DataFrame, company: str) -> dict: """Compute the number of reports tracked for a specific company and the @@ -418,7 +432,7 @@ def compute_company_available_reports(df: pd.DataFrame, company: str) -> dict: Returns: dict: numbers of reports and fiscal years. """ - available_years = df.loc[df['mnc'] == company, 'year'].unique() + available_years = df.loc[df["mnc"] == company, "year"].unique() n_reports = len(available_years) # Convert type of items from 'int' to 'str' in available years list @@ -428,21 +442,18 @@ def compute_company_available_reports(df: pd.DataFrame, company: str) -> dict: if len(years_string_list) == 1: years_string = years_string_list[0] elif len(years_string_list) > 1: - years_string = ', '.join(years_string_list[:-1]) - years_string += ' and ' + years_string_list[-1] + years_string = ", ".join(years_string_list[:-1]) + years_string += " and " + years_string_list[-1] # Create a dictionnary with the results - data = { - 'Company': company, - 'Reports': n_reports, - 'Fiscal year(s) available': years_string - } + data = {"Company": company, "Reports": n_reports, "Fiscal year(s) available": years_string} return data def display_company_available_reports( - df: pd.DataFrame, company: str, hide_company: bool = True) -> pd.DataFrame: + df: pd.DataFrame, company: str, hide_company: bool = True +) -> pd.DataFrame: """Display the number of reports tracked for a specific company and the available fiscal years. @@ -459,19 +470,18 @@ def display_company_available_reports( data = compute_company_available_reports(df=df, company=company) # Create the table - df = pd.DataFrame.from_dict(data=data, orient='index') + df = pd.DataFrame.from_dict(data=data, orient="index") if hide_company: - return df[1:].style.hide(axis='columns') + return df[1:].style.hide(axis="columns") - return df.style.hide(axis='columns') + return df.style.hide(axis="columns") # Viz 13 - company key financials kpis def compute_company_key_financials_kpis( - df: pd.DataFrame, - company: str, - year: int = None) -> dict: + df: pd.DataFrame, company: str, year: int = None +) -> dict: """Compute key financial KPIs for a company. Args: @@ -483,47 +493,56 @@ def compute_company_key_financials_kpis( dict: company key financial KPIs. """ - kpis_list = ['total_revenues', 'unrelated_revenues', 'related_revenues', - 'profit_before_tax', 'tax_paid', 'employees'] + kpis_list = [ + "total_revenues", + "unrelated_revenues", + "related_revenues", + "profit_before_tax", + "tax_paid", + "employees", + ] - years_list = df.loc[df['mnc'] == company, 'year'].unique() + years_list = df.loc[df["mnc"] == company, "year"].unique() # Compute sum of kpis if not year or year not in years_list: - df = (df.loc[df['mnc'] == company] - .groupby(['year', 'upe_name'], as_index=False)[kpis_list] - .sum() - ) + df = ( + df.loc[df["mnc"] == company] + .groupby(["year", "upe_name"], as_index=False)[kpis_list] + .sum() + ) else: - df = (df.loc[(df['mnc'] == company) & (df['year'] == year)] - .groupby(['year', 'upe_name'], as_index=False)[kpis_list] - .sum()) + df = ( + df.loc[(df["mnc"] == company) & (df["year"] == year)] + .groupby(["year", "upe_name"], as_index=False)[kpis_list] + .sum() + ) # df = df.set_index('year') # Make financial numbers easily readable with 'humanize' package for column in df.columns: - if column not in ['employees', 'upe_name']: + if column not in ["employees", "upe_name"]: df[column] = df[column].apply( - lambda x: humanize.intword(x) if isinstance(x, (int, float)) else x) - df[column] = '€ ' + df[column] - elif column == 'employees': + lambda x: humanize.intword(x) if isinstance(x, (int, float)) else x + ) + df[column] = "€ " + df[column] + elif column == "employees": df[column] = df[column].astype(int) # Remove 'upe_name' and 'year'' - df = df.drop(columns=['upe_name', 'year']) + df = df.drop(columns=["upe_name", "year"]) # Clean columns string - df.columns = df.columns.str.replace('_', ' ').str.capitalize() + df.columns = df.columns.str.replace("_", " ").str.capitalize() # Create a dictionary with the results - data = df.to_dict(orient='index') + data = df.to_dict(orient="index") return data -def display_company_key_financials_kpis( - df: pd.DataFrame, company: str, year: int = None): +def display_company_key_financials_kpis(df: pd.DataFrame, company: str, year: int = None): """Display key financial KPIs for a company. Args: @@ -543,17 +562,16 @@ def display_company_key_financials_kpis( df = df.reset_index() # Rename columns - df = df.rename(columns={'index': 'Variable', 0: 'Value'}) + df = df.rename(columns={"index": "Variable", 0: "Value"}) # Replace 0 values with 'N/A' - df.loc[df['Value'] == '€ 0', 'Value'] = 'N/A' + df.loc[df["Value"] == "€ 0", "Value"] = "N/A" return df # Viz 14 -def compute_top_jurisdictions_revenue( - df: pd.DataFrame, company: str, year: int) -> dict: +def compute_top_jurisdictions_revenue(df: pd.DataFrame, company: str, year: int) -> dict: """Rank jurisdictions on their percentage of total revenues. Args: @@ -566,27 +584,27 @@ def compute_top_jurisdictions_revenue( """ df = df.loc[ - (df['mnc'] == company) & (df['year'] == year), - ['jur_name', 'related_revenues', 'unrelated_revenues', 'total_revenues'] + (df["mnc"] == company) & (df["year"] == year), + ["jur_name", "related_revenues", "unrelated_revenues", "total_revenues"], ] # Calculate missing values in 'total_revenues' if 'related_revenues' and # 'unrelated_revenues' are available df.loc[ - df['related_revenues'].notna() - & df['unrelated_revenues'].notna() - & df['total_revenues'].isna(), - 'total_revenues' - ] = df['related_revenues'] + df['unrelated_revenues'] + df["related_revenues"].notna() + & df["unrelated_revenues"].notna() + & df["total_revenues"].isna(), + "total_revenues", + ] = df["related_revenues"] + df["unrelated_revenues"] # Subset DataFrame - df = df[['jur_name', 'total_revenues']] + df = df[["jur_name", "total_revenues"]] # Remove rows where 'total_revenues' is missing - df = df.dropna(subset=['total_revenues']) + df = df.dropna(subset=["total_revenues"]) # Compute percentage of revenue - df['total_revenues_%'] = df['total_revenues'] / df['total_revenues'].sum() + df["total_revenues_%"] = df["total_revenues"] / df["total_revenues"].sum() # Convert DataFrame to dictionnary data = df.to_dict() @@ -609,53 +627,45 @@ def display_jurisdictions_top_revenue(df: pd.DataFrame, company: str, year: int) # Create DataFrame df = pd.DataFrame.from_dict(data) - df = df.sort_values(by='total_revenues_%') + df = df.sort_values(by="total_revenues_%") # Bar color sequence - bar_color = '#D9D9D9' + bar_color = "#D9D9D9" # Create figure fig = px.bar( df, - x='total_revenues_%', - y='jur_name', - orientation='h', + x="total_revenues_%", + y="jur_name", + orientation="h", color_discrete_sequence=[bar_color], - text_auto='.1%' + text_auto=".1%", ) # Set figure height (min. 480) depending on the number of jurisdictions - fig_height = max(480, (48 * len(df['jur_name']))) + fig_height = max(480, (48 * len(df["jur_name"]))) # Update layout settings fig.update_layout( - font_family='Roboto', - xaxis=dict( - title='Percentage of total revenue', - tickformat='.0%' - ), + font_family="Roboto", + xaxis=dict(title="Percentage of total revenue", tickformat=".0%"), yaxis_title=None, - plot_bgcolor='white', + plot_bgcolor="white", height=fig_height, - margin=dict(l=0, r=0, t=0, b=0) + margin=dict(l=0, r=0, t=0, b=0), ) # Define position of text values values_positions = [ - 'outside' if value <= 0.05 else 'inside' for value in df['total_revenues_%']] + "outside" if value <= 0.05 else "inside" for value in df["total_revenues_%"] + ] - fig.update_traces( - textangle=0, - textposition=values_positions, - selector=dict(name='') - ) + fig.update_traces(textangle=0, textposition=values_positions, selector=dict(name="")) # Define style of hover on bars fig.update_traces( - hovertemplate=( - "%{hovertext}

% revenue: %{x:.3%}
" - ), - hovertext=df['jur_name'] + hovertemplate=("%{hovertext}

% revenue: %{x:.3%}
"), + hovertext=df["jur_name"], ) return go.Figure(fig) @@ -663,7 +673,8 @@ def display_jurisdictions_top_revenue(df: pd.DataFrame, company: str, year: int) # Viz 15 def compute_pretax_profit_and_employees_rank( - df: pd.DataFrame, company: str, year: int) -> pd.DataFrame: + df: pd.DataFrame, company: str, year: int +) -> pd.DataFrame: """Compute jurisdictions percentage of profit before tax and percentage of employees and rank by percentage of profit. @@ -678,19 +689,19 @@ def compute_pretax_profit_and_employees_rank( """ # Filter rows with selected company/year and subset with necessary features - features = ['jur_name', 'profit_before_tax', 'employees'] - df = df.loc[(df['mnc'] == company) & (df['year'] == year), features] + features = ["jur_name", "profit_before_tax", "employees"] + df = df.loc[(df["mnc"] == company) & (df["year"] == year), features] # Keep only profitable jurisdictions - df = df.loc[df['profit_before_tax'] >= 0] + df = df.loc[df["profit_before_tax"] >= 0] # Sort jurisdictions by profits - df = df.sort_values(by='profit_before_tax').reset_index(drop=True) + df = df.sort_values(by="profit_before_tax").reset_index(drop=True) # Calculate percentages - df['profit_before_tax_%'] = df['profit_before_tax'] / df['profit_before_tax'].sum() - df['employees_%'] = df['employees'] / df['employees'].sum() - df = df.drop(columns=['profit_before_tax', 'employees']) + df["profit_before_tax_%"] = df["profit_before_tax"] / df["profit_before_tax"].sum() + df["employees_%"] = df["employees"] / df["employees"].sum() + df = df.drop(columns=["profit_before_tax", "employees"]) # data = df.to_dict() @@ -698,7 +709,8 @@ def compute_pretax_profit_and_employees_rank( def display_pretax_profit_and_employees_rank( - df: pd.DataFrame, company: str, year: int) -> go.Figure: + df: pd.DataFrame, company: str, year: int +) -> go.Figure: """Display rank of jurisdictions by percentage of profit before and percentage of employees. @@ -715,114 +727,103 @@ def display_pretax_profit_and_employees_rank( # df = pd.DataFrame(data) # Rename columns - df = df.rename(columns={ - 'profit_before_tax_%': '% profit', - 'employees_%': '% employees' - }) + df = df.rename(columns={"profit_before_tax_%": "% profit", "employees_%": "% employees"}) # Bar color sequence - bar_colors = ['#D9D9D9', '#1E2E5C'] + bar_colors = ["#D9D9D9", "#1E2E5C"] # Create figure fig = px.bar( df, - x=['% employees', '% profit'], - y='jur_name', - barmode='group', - orientation='h', - text_auto='.1%', - color_discrete_sequence=bar_colors + x=["% employees", "% profit"], + y="jur_name", + barmode="group", + orientation="h", + text_auto=".1%", + color_discrete_sequence=bar_colors, ) # Set figure height (min. 640) depending on the number of jurisdictions - fig_height = max(480, (48 * len(df['jur_name']))) + fig_height = max(480, (48 * len(df["jur_name"]))) # Set maximum value for x axis - if not df[['% profit', '% employees']].isna().all().all(): - max_x_value = max(df[['% profit', '% employees']].max(axis='columns')) + 0.1 + if not df[["% profit", "% employees"]].isna().all().all(): + max_x_value = max(df[["% profit", "% employees"]].max(axis="columns")) + 0.1 else: max_x_value = 1 # Update layout settings fig.update_layout( - font_family='Roboto', + font_family="Roboto", title=None, - xaxis=dict( - title=None, - tickformat='.0%', - range=[0, max_x_value] - ), + xaxis=dict(title=None, tickformat=".0%", range=[0, max_x_value]), yaxis_title=None, legend=dict( - x=0.1, - y=1.05, - xanchor='center', - yanchor='top', - title=dict(text=''), - orientation='h' + x=0.1, y=1.05, xanchor="center", yanchor="top", title=dict(text=""), orientation="h" ), - plot_bgcolor='white', + plot_bgcolor="white", height=fig_height, - margin=dict(l=0, r=0, t=10, b=0) + margin=dict(l=0, r=0, t=10, b=0), ) # Add annotations for NaN values where there should have been a bar for index, row in df.iterrows(): - if pd.isna(row['% employees']): + if pd.isna(row["% employees"]): fig.add_annotation( - xanchor='left', + xanchor="left", x=0.001, y=df.index[index], yshift=-10, - text='Information not provided', + text="Information not provided", showarrow=False, - font=dict(size=12) + font=dict(size=12), ) - if pd.isna(row['% profit']): + if pd.isna(row["% profit"]): fig.add_annotation( - xanchor='left', + xanchor="left", x=0.001, y=df.index[index], yshift=10, - text='Information not provided', + text="Information not provided", showarrow=False, - font=dict(size=12) + font=dict(size=12), ) # Loop through each bar trace and hide the text if the value is NaN for trace in fig.data: values = df[trace.name] - text_position = ['outside' if not np.isnan(value) else 'none' for value in values] + text_position = ["outside" if not np.isnan(value) else "none" for value in values] trace.textposition = text_position - if trace.name == '% employees': - trace.hovertemplate = '%{y}

Employees : %{x:.3%}' - elif trace.name == '% profit': - trace.hovertemplate = '%{y}

Profit : %{x:.3%}' + if trace.name == "% employees": + trace.hovertemplate = "%{y}

Employees : %{x:.3%}" + elif trace.name == "% profit": + trace.hovertemplate = "%{y}

Profit : %{x:.3%}" return go.Figure(fig) # Viz 16 def compute_pretax_profit_and_profit_per_employee( - df: pd.DataFrame, company: str, year: int) -> pd.DataFrame: + df: pd.DataFrame, company: str, year: int +) -> pd.DataFrame: # Filter rows with selected company/year and subset with necessary features - features = ['jur_name', 'profit_before_tax', 'employees', 'jur_tax_haven'] - df = df.loc[(df['mnc'] == company) & (df['year'] == year), features] + features = ["jur_name", "profit_before_tax", "employees", "jur_tax_haven"] + df = df.loc[(df["mnc"] == company) & (df["year"] == year), features] # Keep only profitable jurisdictions - df = df.loc[df['profit_before_tax'] >= 0] + df = df.loc[df["profit_before_tax"] >= 0] # Sort jurisdictions by profits - df = df.sort_values(by='profit_before_tax').reset_index(drop=True) + df = df.sort_values(by="profit_before_tax").reset_index(drop=True) # Replace 0 employees by 1 - df.loc[df['employees'] == 0, 'employees'] = 1 + df.loc[df["employees"] == 0, "employees"] = 1 # Calculate percentages - df['profit_before_tax_%'] = df['profit_before_tax'] / df['profit_before_tax'].sum() - df['profit_per_employee'] = df['profit_before_tax'] / df['employees'] - df = df.drop(columns=['profit_before_tax', 'employees']) + df["profit_before_tax_%"] = df["profit_before_tax"] / df["profit_before_tax"].sum() + df["profit_per_employee"] = df["profit_before_tax"] / df["employees"] + df = df.drop(columns=["profit_before_tax", "employees"]) # print('compute_pretax_profit_and_profit_per_employee df.head():\n', df.head()) # data = df.to_dict() @@ -830,64 +831,63 @@ def compute_pretax_profit_and_profit_per_employee( return df -def display_pretax_profit_and_profit_per_employee(df: pd.DataFrame, company: str, year: int) -> go.Figure: +def display_pretax_profit_and_profit_per_employee( + df: pd.DataFrame, company: str, year: int +) -> go.Figure: # Compute data df = compute_pretax_profit_and_profit_per_employee(df=df, company=company, year=year) - + # Create DataFrame # df = pd.DataFrame(data) # Replace bool values of Tax haven by string values - df['jur_tax_haven'] = df['jur_tax_haven'].map({True: 'Tax haven', False: 'Non tax haven'}) - + df["jur_tax_haven"] = df["jur_tax_haven"].map({True: "Tax haven", False: "Non tax haven"}) + # Create figure fig = px.scatter( df, - x='profit_before_tax_%', - y='profit_per_employee', - size='profit_before_tax_%', - color='jur_tax_haven', + x="profit_before_tax_%", + y="profit_per_employee", + size="profit_before_tax_%", + color="jur_tax_haven", color_discrete_sequence=COLOR_SEQUENCE, - custom_data=['jur_name'] + custom_data=["jur_name"], ) # Update layout settings fig.update_layout( title=None, - font_family='Roboto', + font_family="Roboto", autosize=True, height=360, xaxis=dict( - title='% profit', - tickformat='.0%', + title="% profit", + tickformat=".0%", ), yaxis=dict( - title='Profit/employee', + title="Profit/employee", ), legend=dict( - x=0.1, - y=1.05, - xanchor='center', - yanchor='top', - title=dict(text=''), - orientation='h'), - plot_bgcolor='white', - margin=dict(l=0, r=0, t=0, b=0) + x=0.1, y=1.05, xanchor="center", yanchor="top", title=dict(text=""), orientation="h" + ), + plot_bgcolor="white", + margin=dict(l=0, r=0, t=0, b=0), ) - - + # Define hover fig.update_traces( hovertemplate=f"{company} reports %{{x:.1%}} of profit and %{{y:.3s}}€ profits per employee in %{{customdata[0]}}" ) - + return go.Figure(fig) # Viz 18 + def compute_related_and_unrelated_revenues_breakdown( - df: pd.DataFrame, company: str, year: int) -> dict: + df: pd.DataFrame, company: str, year: int +) -> dict: """Compute related and unrelated revenues in tax heaven, non tax heaven and domestic jurisdictions. @@ -901,35 +901,49 @@ def compute_related_and_unrelated_revenues_breakdown( """ # Filter rows with selected company/year and subset with necessary features - features = ['upe_code', 'jur_code', 'jur_name', 'jur_tax_haven', - 'unrelated_revenues', 'related_revenues'] + features = [ + "upe_code", + "jur_code", + "jur_name", + "jur_tax_haven", + "unrelated_revenues", + "related_revenues", + ] - df = df.loc[(df['mnc'] == company) & (df['year'] == year), features] + df = df.loc[(df["mnc"] == company) & (df["year"] == year), features] # Drop rows where either unrelated or related revenues are missing - df = df.dropna(subset=['unrelated_revenues', 'related_revenues']) + df = df.dropna(subset=["unrelated_revenues", "related_revenues"]) # 'total_revenues' is recreated using related and unrelated revenues since the one # reported by companies is not always reliable - df['total_revenues'] = df['unrelated_revenues'] + df['related_revenues'] + df["total_revenues"] = df["unrelated_revenues"] + df["related_revenues"] # Create a column to check if 'jur_code' is the domestic country - df['domestic'] = df.apply(lambda row: row['jur_code'] == row['upe_code'], axis='columns') + df["domestic"] = df.apply(lambda row: row["jur_code"] == row["upe_code"], axis="columns") # Compute kpis in a new DataFrame data = pd.DataFrame() - data['tax_haven'] = df.loc[df['jur_tax_haven'] == True, ['unrelated_revenues', 'related_revenues']].sum() - data['non_tax_haven'] = df.loc[df['jur_tax_haven'] == False, ['unrelated_revenues', 'related_revenues']].sum() - data['domestic'] = df.loc[df['domestic'] == True, ['unrelated_revenues', 'related_revenues']].sum() + data["tax_haven"] = df.loc[ + df["jur_tax_haven"] == True, ["unrelated_revenues", "related_revenues"] + ].sum() + data["non_tax_haven"] = df.loc[ + df["jur_tax_haven"] == False, ["unrelated_revenues", "related_revenues"] + ].sum() + data["domestic"] = df.loc[ + df["domestic"] == True, ["unrelated_revenues", "related_revenues"] + ].sum() # Replace values with share (%) of 'unrelated/related revenues' - data = data.div(data.sum(axis='rows'), axis='columns') + data = data.div(data.sum(axis="rows"), axis="columns") # Rename indexes - data = data.rename(index={ - 'unrelated_revenues': 'unrelated_revenues_percentage', - 'related_revenues': 'related_revenues_percentage' - }) + data = data.rename( + index={ + "unrelated_revenues": "unrelated_revenues_percentage", + "related_revenues": "related_revenues_percentage", + } + ) # Convert DataFrame to dictionary data = data.to_dict() @@ -937,7 +951,9 @@ def compute_related_and_unrelated_revenues_breakdown( return data -def display_related_and_unrelated_revenues_breakdown(df: pd.DataFrame, company: str, year: int) -> tuple[pd.DataFrame, go.Figure]: +def display_related_and_unrelated_revenues_breakdown( + df: pd.DataFrame, company: str, year: int +) -> tuple[pd.DataFrame, go.Figure]: """Display related and unrelated revenues in tax heaven, non tax heaven and domestic jurisdictions. @@ -951,47 +967,37 @@ def display_related_and_unrelated_revenues_breakdown(df: pd.DataFrame, company: data = compute_related_and_unrelated_revenues_breakdown(df=df, company=company, year=year) # Create DataFrame - df = pd.DataFrame.from_dict(data, orient='index') + df = pd.DataFrame.from_dict(data, orient="index") # Rename columns and indexes - df.columns = df.columns.str.replace('_', ' ').str.capitalize() - df.index = df.index.str.replace('_', ' ').str.capitalize() + df.columns = df.columns.str.replace("_", " ").str.capitalize() + df.index = df.index.str.replace("_", " ").str.capitalize() # Create figure fig = px.bar( df, - x=['Unrelated revenues percentage', 'Related revenues percentage'], + x=["Unrelated revenues percentage", "Related revenues percentage"], y=df.index, - orientation='h', - text_auto='.0%' + orientation="h", + text_auto=".0%", ) # Update layout settings fig.update_layout( - title='Breakdown of revenue', - xaxis=dict( - title=None, - tickformat='.0%' - ), + title="Breakdown of revenue", + xaxis=dict(title=None, tickformat=".0%"), yaxis_title=None, - legend=dict( - title=dict(text=''), - orientation='h' - ), - plot_bgcolor='white', + legend=dict(title=dict(text=""), orientation="h"), + plot_bgcolor="white", width=800, - height=480 + height=480, ) # Define position of text values - for col in ['Unrelated revenues percentage', 'Related revenues percentage']: - values_positions = ['outside' if value <= 0.05 else 'inside' for value in df[col]] + for col in ["Unrelated revenues percentage", "Related revenues percentage"]: + values_positions = ["outside" if value <= 0.05 else "inside" for value in df[col]] - fig.update_traces( - textangle=0, - textposition=values_positions, - selector=dict(name=col) - ) + fig.update_traces(textangle=0, textposition=values_positions, selector=dict(name=col)) # Add annotation if no values are availables (no bar displayed) for i, index in enumerate(df.index): @@ -999,13 +1005,13 @@ def display_related_and_unrelated_revenues_breakdown(df: pd.DataFrame, company: fig.add_annotation( x=0.5, y=df.index[i], - text='No information to display', + text="No information to display", showarrow=False, - font=dict(size=13) + font=dict(size=13), ) # fig.show() - return pd.DataFrame.from_dict(data, orient='index'), go.Figure(fig) + return pd.DataFrame.from_dict(data, orient="index"), go.Figure(fig) # Viz 21 - evolution of tax havens use over time : % profit vs % employees in TH over time @@ -1021,33 +1027,39 @@ def compute_tax_havens_use_evolution(df: pd.DataFrame, company: str) -> dict: """ # Filter rows with selected company and subset with necessary features - features = ['jur_code', 'year', 'jur_tax_haven', 'profit_before_tax', 'employees'] - df = df.loc[(df['mnc'] == company), features] + features = ["jur_code", "year", "jur_tax_haven", "profit_before_tax", "employees"] + df = df.loc[(df["mnc"] == company), features] # Keep jurisdictions with profitable or missing revenues - df = df.loc[(df['profit_before_tax'] >= 0) | (df['profit_before_tax'].isna())] + df = df.loc[(df["profit_before_tax"] >= 0) | (df["profit_before_tax"].isna())] # For all sum calculations below : # - Result NA : all jurisdictions values were NA ; # - Result 0 : at least one jurisdiction was reported as 0. # Calculate total profit and employees by year and tax haven status - df = df.groupby(['year', 'jur_tax_haven'], as_index=False)[['profit_before_tax', 'employees']].sum(min_count=1) + df = df.groupby(["year", "jur_tax_haven"], as_index=False)[ + ["profit_before_tax", "employees"] + ].sum(min_count=1) # Calculate total profits and employees for each year - for year in df['year'].unique(): - df.loc[df['year'] == year, 'total_profit'] = df.loc[df['year'] == year, 'profit_before_tax'].sum(min_count=1) - df.loc[df['year'] == year, 'total_employees'] = df.loc[df['year'] == year, 'employees'].sum(min_count=1) + for year in df["year"].unique(): + df.loc[df["year"] == year, "total_profit"] = df.loc[ + df["year"] == year, "profit_before_tax" + ].sum(min_count=1) + df.loc[df["year"] == year, "total_employees"] = df.loc[ + df["year"] == year, "employees" + ].sum(min_count=1) # Remove non tax haven jurisdictions - df = df.loc[df['jur_tax_haven'] == True].reset_index() + df = df.loc[df["jur_tax_haven"] == True].reset_index() # Calculate percentages - df['tax_havens_profit_%'] = df['profit_before_tax'] / df['total_profit'] - df['tax_havens_employees_%'] = df['employees'] / df['total_employees'] + df["tax_havens_profit_%"] = df["profit_before_tax"] / df["total_profit"] + df["tax_havens_employees_%"] = df["employees"] / df["total_employees"] # Convert necessary data to dictionnary - data = df[['year', 'tax_havens_profit_%', 'tax_havens_employees_%']].to_dict() + data = df[["year", "tax_havens_profit_%", "tax_havens_employees_%"]].to_dict() return data @@ -1067,33 +1079,32 @@ def display_tax_havens_use_evolution(df: pd.DataFrame, company: str): df = pd.DataFrame.from_dict(data) # Rename columns - df = df.rename(columns={ - 'tax_havens_profit_%': 'Percentage of profits in tax havens', - 'tax_havens_employees_%': 'Percentage of employees in tax havens' - }) + df = df.rename( + columns={ + "tax_havens_profit_%": "Percentage of profits in tax havens", + "tax_havens_employees_%": "Percentage of employees in tax havens", + } + ) # Create figure fig = px.bar( df, - x='year', - y=['Percentage of profits in tax havens', 'Percentage of employees in tax havens'], - barmode='group', - text_auto='.1%' + x="year", + y=["Percentage of profits in tax havens", "Percentage of employees in tax havens"], + barmode="group", + text_auto=".1%", ) # Update layout settings fig.update_layout( - title='Tax havens use in profitables jurisdictions', + title="Tax havens use in profitables jurisdictions", xaxis_title=None, yaxis_title=None, - yaxis_tickformat='.0%', - legend=dict( - title=dict(text=''), - orientation='h' - ), - plot_bgcolor='white', + yaxis_tickformat=".0%", + legend=dict(title=dict(text=""), orientation="h"), + plot_bgcolor="white", width=800, - height=480 + height=480, ) # fig.show() @@ -1103,13 +1114,15 @@ def display_tax_havens_use_evolution(df: pd.DataFrame, company: str): # Viz 24 def compute_number_of_tracked_mnc_available(df) -> dict: # Drop duplicates to ensure each MNC appears only once per year - df_unique_mnc = df.drop_duplicates(subset=['year', 'mnc']) + df_unique_mnc = df.drop_duplicates(subset=["year", "mnc"]) # Group the DataFrame by 'mnc' and count the number of reports for each MNC - df_reports_per_mnc = df_unique_mnc.groupby('mnc').size().reset_index(name='report_count') + df_reports_per_mnc = df_unique_mnc.groupby("mnc").size().reset_index(name="report_count") # Convert the DataFrame to a dictionary where MNCs are keys and report counts are values - mnc_report_count = dict(zip(df_reports_per_mnc['mnc'], df_reports_per_mnc['report_count'], strict=False)) + mnc_report_count = dict( + zip(df_reports_per_mnc["mnc"], df_reports_per_mnc["report_count"], strict=False) + ) return mnc_report_count @@ -1121,17 +1134,14 @@ def display_number_of_tracked_mnc_available(df) -> go.Figure: # Generate the word cloud using the report counts as weights wordcloud = WordCloud( - width=1200, - height=800, - background_color='white', - color_func=color_func + width=1200, height=800, background_color="white", color_func=color_func ).generate_from_frequencies(mnc_report_count) # Display the word cloud fig = px.imshow(wordcloud) # Remove hover on image - fig.update_traces(hoverinfo='skip', hovertemplate='') + fig.update_traces(hoverinfo="skip", hovertemplate="") # Remove colorbar fig.update_layout(coloraxis_showscale=False) @@ -1150,16 +1160,16 @@ def display_number_of_tracked_mnc_available(df) -> go.Figure: # List financial columns financial_columns = [ - 'total_revenues', - 'profit_before_tax', - 'tax_paid', - 'tax_accrued', - 'unrelated_revenues', - 'related_revenues', - 'stated_capital', - 'accumulated_earnings', - 'tangible_assets', - 'employees' + "total_revenues", + "profit_before_tax", + "tax_paid", + "tax_accrued", + "unrelated_revenues", + "related_revenues", + "stated_capital", + "accumulated_earnings", + "tangible_assets", + "employees", ] @@ -1177,12 +1187,12 @@ def compute_geographic_score(df: pd.DataFrame, company: str, year: int) -> float # Filter rows with selected company and subset with financial columns df = df.loc[ - (df['mnc'] == company) & (df['year'] == year), - ['mnc', 'year', 'upe_code', 'jur_code', 'jur_name', *financial_columns] + (df["mnc"] == company) & (df["year"] == year), + ["mnc", "year", "upe_code", "jur_code", "jur_name", *financial_columns], ] # Remove columns where data are missing for all jurisdictions - df = df.dropna(axis='columns', how='all') + df = df.dropna(axis="columns", how="all") # List financial columns left after deleting columns with only missing values financial_columns_left = [col for col in df.columns if col in financial_columns] @@ -1197,8 +1207,8 @@ def compute_geographic_score(df: pd.DataFrame, company: str, year: int) -> float # Calculate percentage of each financial value where jurisdiction is 'OTHER' # Percentage = 1. Total of 'OTHER' row(s) / 2. Total of all rows other_percentage = ( - df.loc[df['jur_code'] == 'OTHER', financial_columns_left].sum() # 1 - / df[financial_columns_left].sum() # 2 + df.loc[df["jur_code"] == "OTHER", financial_columns_left].sum() # 1 + / df[financial_columns_left].sum() # 2 ) # Calculate geographic score @@ -1222,12 +1232,12 @@ def compute_completeness_score(df: pd.DataFrame, company: str, year: int) -> flo # Filter rows with selected company and subset with financial columns df = df.loc[ - (df['mnc'] == company) & (df['year'] == year), - ['mnc', 'year', 'upe_code', 'jur_code', 'jur_name', *financial_columns] + (df["mnc"] == company) & (df["year"] == year), + ["mnc", "year", "upe_code", "jur_code", "jur_name", *financial_columns], ] # Remove columns where data are missing for all jurisdictions - df = df.dropna(axis='columns', how='all') + df = df.dropna(axis="columns", how="all") # List financial columns left after deleting columns with only missing values financial_columns_left = [col for col in df.columns if col in financial_columns] @@ -1243,7 +1253,7 @@ def compute_completeness_score(df: pd.DataFrame, company: str, year: int) -> flo score = len(financial_columns_left) - for variable in ['profit_before_tax', 'tax_paid']: + for variable in ["profit_before_tax", "tax_paid"]: if variable in df.columns: score += 1 @@ -1267,12 +1277,12 @@ def compute_transparency_score(df: pd.DataFrame, company: str, year: int) -> flo # Filter rows with selected company and subset with financial columns df = df.loc[ - (df['mnc'] == company) & (df['year'] == year), - ['mnc', 'year', 'upe_code', 'jur_code', 'jur_name', *financial_columns] + (df["mnc"] == company) & (df["year"] == year), + ["mnc", "year", "upe_code", "jur_code", "jur_name", *financial_columns], ] # Remove columns where data are missing for all jurisdictions - df = df.dropna(axis='columns', how='all') + df = df.dropna(axis="columns", how="all") # List financial columns left after deleting columns with only missing values financial_columns_left = [col for col in df.columns if col in financial_columns] @@ -1287,8 +1297,8 @@ def compute_transparency_score(df: pd.DataFrame, company: str, year: int) -> flo # Calculate percentage of each financial value where jurisdiction is not 'OTHER' # Percentage = 1. Total of not 'OTHER' row(s) / 2. Total of all rows not_other_percentage = ( - df.loc[df['jur_code'] != 'OTHER', financial_columns_left].sum() # 1 - / df[financial_columns_left].sum() # 2 + df.loc[df["jur_code"] != "OTHER", financial_columns_left].sum() # 1 + / df[financial_columns_left].sum() # 2 ) # Calculate transparency score @@ -1310,7 +1320,7 @@ def compute_all_scores(df: pd.DataFrame, company: str) -> dict: """ # List all years when the company as reported - years_list = sorted(df.loc[df['mnc'] == company, 'year'].unique()) + years_list = sorted(df.loc[df["mnc"] == company, "year"].unique()) # Initialize an empty dictionary data = dict() @@ -1323,17 +1333,16 @@ def compute_all_scores(df: pd.DataFrame, company: str) -> dict: transparency_score = compute_transparency_score(df=df, company=company, year=year) data[year] = { - 'mnc': company, - 'geographic_score': geographic_score, - 'completeness_score': completeness_score, - 'transparency_score': transparency_score + "mnc": company, + "geographic_score": geographic_score, + "completeness_score": completeness_score, + "transparency_score": transparency_score, } return data -def transparency_scores_to_csv( - df: pd.DataFrame, csv_path: str = './') -> pd.DataFrame: +def transparency_scores_to_csv(df: pd.DataFrame, csv_path: str = "./") -> pd.DataFrame: """Compute transparency score for all companies and all years into a DataFrame and export it to a csv file (optional). @@ -1346,24 +1355,23 @@ def transparency_scores_to_csv( """ # List all companies - mnc_list = df['mnc'].unique() + mnc_list = df["mnc"].unique() # Initialize an empty DataFrame mnc_df = pd.DataFrame() # Calculate transparency scores for all companies and add them to the DataFrame for mnc in mnc_list: - temp_df = pd.DataFrame.from_dict( - compute_all_scores(df=df, company=mnc), orient='index') + temp_df = pd.DataFrame.from_dict(compute_all_scores(df=df, company=mnc), orient="index") mnc_df = pd.concat([mnc_df, temp_df]) # Reset index and move 'mnc' columns in first position - mnc_df = mnc_df.reset_index().rename(columns={'index': 'year'}) - mnc_df.insert(0, 'mnc', mnc_df.pop('mnc')) + mnc_df = mnc_df.reset_index().rename(columns={"index": "year"}) + mnc_df.insert(0, "mnc", mnc_df.pop("mnc")) if csv_path: - mnc_df.to_csv(csv_path + 'transparency_scores.csv', index=False) + mnc_df.to_csv(csv_path + "transparency_scores.csv", index=False) return mnc_df @@ -1381,16 +1389,17 @@ def display_transparency_score(df: pd.DataFrame, company: str, year: int = None) data = compute_all_scores(df=df, company=company) # Create DataFrame - df = pd.DataFrame.from_dict(data, orient='index') + df = pd.DataFrame.from_dict(data, orient="index") # Reset index and move 'mnc' columns in first position - df = df.reset_index().rename(columns={'index': 'year'}) + df = df.reset_index().rename(columns={"index": "year"}) # When data are not filtered by year, the score is the average of all years score = round( - df.loc[df['year'] == year, 'transparency_score'].iloc[0] if year - else df['transparency_score'].mean(), - 0 + df.loc[df["year"] == year, "transparency_score"].iloc[0] + if year + else df["transparency_score"].mean(), + 0, ) # Create figure @@ -1398,25 +1407,21 @@ def display_transparency_score(df: pd.DataFrame, company: str, year: int = None) # Add circular background fig.add_shape( - type='circle', - x0=0, y0=0, x1=1, y1=1, - line_color='blue', - fillcolor='blue', - opacity=0.3 + type="circle", x0=0, y0=0, x1=1, y1=1, line_color="blue", fillcolor="blue", opacity=0.3 ) # Add indicator - fig.add_trace(go.Indicator( - mode='number', - value=score, - number={'suffix': '%', 'valueformat': '.0f', 'font': {'size': 54}}, - domain={'x': [0, 1], 'y': [0, 1]} - )) + fig.add_trace( + go.Indicator( + mode="number", + value=score, + number={"suffix": "%", "valueformat": ".0f", "font": {"size": 54}}, + domain={"x": [0, 1], "y": [0, 1]}, + ) + ) # Update layout - fig.update_layout( - width=360, - height=360) + fig.update_layout(width=360, height=360) return score @@ -1425,6 +1430,7 @@ def display_transparency_score(df: pd.DataFrame, company: str, year: int = None) # Functions below use the same computation function (compute_all_scores) as used for Viz 25. + def display_transparency_score_over_time(df: pd.DataFrame, company: str): """Display transparency scores over time for a specific company in a bar chart. @@ -1438,48 +1444,42 @@ def display_transparency_score_over_time(df: pd.DataFrame, company: str): data = compute_all_scores(df=df, company=company) # Create DataFrame - df = pd.DataFrame.from_dict(data, orient='index') + df = pd.DataFrame.from_dict(data, orient="index") # Reset index and move 'mnc' columns in first position - df = df.reset_index().rename(columns={'index': 'year'}) + df = df.reset_index().rename(columns={"index": "year"}) # Create figure - fig = px.bar(df, x='year', y='transparency_score', - text_auto='.0f' - ) + fig = px.bar(df, x="year", y="transparency_score", text_auto=".0f") # Update layout settings fig.update_layout( - title='Transparency score over time', - xaxis=dict( - title=None, - tickvals=df['year'].unique() - ), + title="Transparency score over time", + xaxis=dict(title=None, tickvals=df["year"].unique()), yaxis=dict( title=None, showline=True, - ticks='outside', - linecolor='grey', - tickcolor='grey', + ticks="outside", + linecolor="grey", + tickcolor="grey", range=[0, 101], tickvals=[0, 25, 50, 75, 100], - ticktext=[0, '', '', '', 100] + ticktext=[0, "", "", "", 100], ), - plot_bgcolor='white', + plot_bgcolor="white", width=800, - height=480 + height=480, ) # Force position and color of bar values - fig.update_traces( - textposition='outside', textfont=dict(color=fig.data[0].marker.color) - ) + fig.update_traces(textposition="outside", textfont=dict(color=fig.data[0].marker.color)) fig.show() def display_transparency_score_over_time_details( - df: pd.DataFrame, company: str) -> pd.DataFrame: + df: pd.DataFrame, company: str +) -> pd.DataFrame: """Display details of components of transparency scores over time for a specific company in a table. @@ -1495,25 +1495,27 @@ def display_transparency_score_over_time_details( data = compute_all_scores(df=df, company=company) # Create DataFrame - df = pd.DataFrame.from_dict(data, orient='index') + df = pd.DataFrame.from_dict(data, orient="index") # Drop 'mnc' column - df = df.drop(columns='mnc') + df = df.drop(columns="mnc") # Round and convert percentage to string with '/100' annotation - df = df.apply(lambda x: round(x).astype(int).astype('string') + '/100') + df = df.apply(lambda x: round(x).astype(int).astype("string") + "/100") # Reset index and rename 'year' column - df = df.reset_index().rename(columns={'index': 'Fiscal year'}) + df = df.reset_index().rename(columns={"index": "Fiscal year"}) # Move 'transparency_score' before other score columns - df.insert(1, 'transparency_score', df.pop('transparency_score')) + df.insert(1, "transparency_score", df.pop("transparency_score")) # Rename columns - df = df.rename(columns={ - 'geographic_score': 'Score on geographical disaggretion', - 'completeness_score': 'Score on variable exhaustiveness', - 'transparency_score': 'Transparency score', - }) + df = df.rename( + columns={ + "geographic_score": "Score on geographical disaggretion", + "completeness_score": "Score on variable exhaustiveness", + "transparency_score": "Transparency score", + } + ) return df From 17032f315393c00e9f32aadd50688527db9d91ce Mon Sep 17 00:00:00 2001 From: anquetos Date: Mon, 17 Jun 2024 14:21:19 +0200 Subject: [PATCH 2/6] Add module description. --- app/algo.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/app/algo.py b/app/algo.py index 5ca4795..0e2d02c 100644 --- a/app/algo.py +++ b/app/algo.py @@ -1,3 +1,8 @@ +""" +This module contains functions to compute and/or display the visualizations, defined by EU Tax Observatory, which +are needed in Taxplorer tool. Below functions will be used in different pages of the website. +""" + import pandas as pd import numpy as np import plotly.express as px From 687ef80e2dd200ff54273973310082cd1351f234 Mon Sep 17 00:00:00 2001 From: anquetos Date: Tue, 18 Jun 2024 17:03:53 +0200 Subject: [PATCH 3/6] Started cleaning algo : removed unused code, sorted functions and merged 'compute' and 'display' for home and company pages plots. --- app/algo.py | 593 +++++++++++++++-------------------- app/pages/company/company.md | 8 +- app/pages/company/company.py | 26 +- app/pages/home/home.md | 6 +- app/pages/home/home.py | 10 +- 5 files changed, 272 insertions(+), 371 deletions(-) diff --git a/app/algo.py b/app/algo.py index 0e2d02c..dc09b09 100644 --- a/app/algo.py +++ b/app/algo.py @@ -14,53 +14,78 @@ COLOR_SEQUENCE = ["#D9D9D9", "#1E2E5C"] -# TODO add viz comment -# Viz 1 - -def number_of_tracked_reports(df): - number_of_tracked_reports = len(df.groupby(["year", "mnc"])["mnc"]) - return number_of_tracked_reports +# Viz 1 : Number of tracked reports +def number_of_tracked_reports( + df: pd.DataFrame, filter_name: str = None, filter_value: str = None +) -> int: + """Calculate the number of tracked reports with possibility to filter on company name, sector or headquarter + location. + Args: + df (pd.DataFrame): CbCRs database. + filter_name (str, optional): Filter to apply, could be "mnc", "sector" or "upe_name". Defaults to None. + filter_value (str, optional): Value to filter with. Defaults to None. -# TODO add viz comment -def number_of_tracked_reports_company(df_selected_company): - number_of_tracked_reports_company = len(df_selected_company.groupby(["year"])["year"]) - return number_of_tracked_reports_company + Returns: + int: number of tracked reports. + """ + # Initialise available filters + filter_values = [None, "mnc", "sector", "upe_name"] -def number_of_tracked_mnc(df: pd.DataFrame) -> int: - return df["mnc"].nunique() + # Raise an error if "filter_value" not in list + if filter_name not in filter_values: + raise ValueError(f"Filter '{filter_name}' is not a valid filter.") + # Compute number of reports + if filter_name: + n_reports = ( + df.loc[df[filter_name] == filter_value].groupby("mnc")["year"].nunique().sum() + ) + else: + n_reports = df.groupby("mnc")["year"].nunique().sum() -# TODO add viz comment -def number_of_tracked_reports_sector(df_selected_sector): - number_of_tracked_reports_sector = len(df_selected_sector.groupby(["year", "mnc"])["year"]) - return number_of_tracked_reports_sector + return int(n_reports) -# TODO add viz comment -def number_of_tracked_reports_country(df_selected_country): - number_of_tracked_reports_country = len( - df_selected_country.groupby(["year", "mnc"])["year"] - ) - return number_of_tracked_reports_country +# Viz 2 : Number of tracked reports over time +def number_of_tracked_reports_over_time( + df: pd.DataFrame, filter_name: str = None, filter_value: str = None +) -> go.Figure: + """Compute and plot the number of tracked reports over time with possibility to filter on company name, sector or + headquarter location. + Args: + df (pd.DataFrame): CbCRs database. + filter_name (str, optional): Filter to apply, could be "mnc", "sector" or "upe_name". Defaults to None. + filter_value (str, optional): Value to filter with. Defaults to None. -# TODO add viz comment -# Viz 2 - Number of tracked reports over time -def number_of_tracked_reports_over_time(df): - df_count = df.groupby(["year"])["mnc"].nunique().reset_index() - return df_count + Returns: + go.Figure: number of tracked reports over time in a Plotly figure. + """ + # Initialise available filters + filter_values = [None, "mnc", "sector", "upe_name"] -def display_number_of_tracked_reports_over_time(df): - # Calculate number of companies per year - data = number_of_tracked_reports_over_time(df=df) + # Raise an error if "filter_value" not in list + if filter_name not in filter_values: + raise ValueError(f"Filter '{filter_name}' is not a valid filter.") - # Bar color sequence - bar_color = "#D9D9D9" + # Compute number of reports + if filter_name: + data = ( + df.loc[df[filter_name] == filter_value] + .groupby("year")["mnc"] + .nunique() + .reset_index() + ) + else: + data = df.groupby("year")["mnc"].nunique().reset_index() # Create figure - fig = px.bar(data, x="year", y="mnc", color_discrete_sequence=[bar_color], text_auto=True) + fig = px.bar( + data, x="year", y="mnc", color_discrete_sequence=COLOR_SEQUENCE, text_auto=True + ) # Force position and color of bar values fig.update_traces(textposition="outside", textfont=dict(color="black")) @@ -88,155 +113,39 @@ def display_number_of_tracked_reports_over_time(df): return go.Figure(fig) -# TODO add viz comment -def number_of_tracked_reports_over_time_company(df_selected_company): - df_count_company = df_selected_company.groupby(["year"])["mnc"].nunique().reset_index() - # df_count_all_company = df.groupby(["year"])["mnc"].nunique().reset_index() - - # row[3].line_chart(df_count_all_company, x="year", y="mnc") - - # row[4].write("selected sector") - # row[4].write( - # "df_selected_sector.groupby(['year'])['mnc'].nunique().reset_index()" - # ) - return df_count_company - - -# TODO add viz comment -def number_of_tracked_reports_over_time_sector(df_selected_sector): - df_count_sector = df_selected_sector.groupby(["year"])["mnc"].nunique().reset_index() - - # df_count_all_sector = ( - # df.groupby(["year", "sector"])["mnc"].nunique().reset_index() - # ) - - # row[4].line_chart(df_count_all_sector, x="year", y="mnc", color="sector") - - # row[5].write("selected country") - # row[5].write( - # "df_selected_country.groupby(['year'])['mnc'].nunique().reset_index()" - # ) - return df_count_sector - - -# TODO add viz comment -def number_of_tracked_reports_over_time_country(df_selected_country): - df_count_country = df_selected_country.groupby(["year"])["mnc"].nunique().reset_index() - # df_count_all_country = ( - # df.groupby(["year", "jur_name"])["mnc"].nunique().reset_index() - # ) - - # row[5].line_chart(df_count_all_country, x="year", y="mnc", color="jur_name") - return df_count_country - - -# Viz 16 - - -# company’s % pre-tax profit and profit per employee -# plot chart : x-axis = % profit, y axis = profit / employee -# size of the bubble based on % profit and a color code for -# tax havens vs others -def company_pourcentage_pretax_profit_and_profit_per_employee(df_selected_company): - # pretax_profit_col_name = 'profit_before_tax' - profit_col_name = "" - employee_col_name = "employees" - df_selected_company[profit_col_name] / df_selected_company[employee_col_name] - - -# Viz 19 -# what are the tax havens being used by the company -# to test but could be a table with one row per jurisdiction (filtering on TH) with -# % profit -# % employee -# profit per employee -# % related party revenue -# for domestic vs tax havens vs. non havens -def tax_haven_used_by_company(df_selected_company): - company_upe_code = df_selected_company["upe_code"].unique()[0] - pc_list = ["employees", "profit_before_tax", "related_revenues"] - # grouper = df_selected_company.groupby('jur_name') - - df = pd.DataFrame(df_selected_company) - - df_domestic_company = df[df["jur_code"] == company_upe_code] - df_selected_company_th = df[df["jur_tax_haven"] != "not.TH"] - df_selected_company_nth = df[df["jur_tax_haven"] == "not.TH"] +# Viz 3 : Number of tracked mnc +def number_of_tracked_mnc( + df: pd.DataFrame, filter_name: str = None, filter_value: str = None +) -> int: + """Calculate the number of tracked reports with possibility to filter on company name, sector or headquarter + location. - for col in pc_list: - df.insert( - len(df_selected_company.columns), - col + "_domestic_sum", - df_domestic_company[col].sum(), - ) - - df.insert( - len(df_selected_company.columns), col + "_th_sum", df_selected_company_th[col].sum() - ) - - df.insert(len(df.columns), col + "_nth_sum", df_selected_company_nth[col].sum()) - - df.insert(len(df.columns), col + "_sum", df_selected_company[col].sum()) - - df.insert(len(df.columns), col + "_pc", 100 * df[col] / df[col + "_sum"]) - # df_selected_company[col + '_pc'] = 100 * df_selected_company[col] / df_selected_company[col+'_sum'] - - df_selected_company_th = df[df["jur_tax_haven"] != "not.TH"] - df_selected_company_th_agg = df_selected_company_th.groupby(["mnc", "jur_name"]).agg( - profit_before_tax=("profit_before_tax", "sum"), - profit_before_tax_pc=("profit_before_tax_pc", "sum"), - employees_pc=("employees_pc", "sum"), - employees=("employees", "sum"), - related_revenues_pc=("related_revenues_pc", "sum"), - ) - df_selected_company_th_agg = df_selected_company_th_agg.reset_index() - df_selected_company_th_agg["profit per employee"] = ( - df_selected_company_th_agg["profit_before_tax"] - / df_selected_company_th_agg["employees"] - ) - df_selected_company_th_agg["profit per employee"] = df_selected_company_th_agg[ - "profit per employee" - ].replace([np.inf, -np.inf], None) + Args: + df (pd.DataFrame): CbCRs database. + filter_name (str, optional): Filter to apply, could be "sector" or "upe_name". Defaults to None. + filter_value (str, optional): Value to filter with. Defaults to None. - return df_selected_company, df_selected_company_th_agg + Returns: + int: number of companies in the database. + """ + # Initialise available filters + filter_values = [None, "sector", "upe_name"] -# TODO add viz comment -# complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for each (color code for tax havens) -def company_table(df_selected_company): - # company_upe_code = df_selected_company['upe_code'].unique()[0] - pc_list = [ - "employees", - "profit_before_tax", - "unrelated_revenues", - "related_revenues", - "total_revenues", - "tax_paid", - ] + # Raise an error if "filter_value" not in list + if filter_name not in filter_values: + raise ValueError(f"Filter '{filter_name}' is not a valid filter.") - df = pd.DataFrame(df_selected_company) - for col in pc_list: - if col + "_sum" not in df.columns: - df.insert(len(df.columns), col + "_sum", df[col].sum()) - - df.insert(len(df.columns), col + "_pc", 100 * df[col] / df[col + "_sum"]) - # f_selected_company[col + '_sum'] = df_selected_company[col].sum() - # df_selected_company[col + '_pc'] = 100 * df_selected_company[col] / df_selected_company[col + '_sum'] + # Compute number of reports + if filter_name: + n_company = df.loc[df[filter_name] == filter_value, "mnc"].nunique() + else: + n_company = df["mnc"].nunique() - # complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for each (color code for tax havens) - df_selected_company_by_jur = df.groupby(["mnc", "jur_name"]).agg( - related_revenues_pc=("related_revenues_pc", "sum"), - unrelated_revenues=("unrelated_revenues", "sum"), - total_revenues=("total_revenues", "sum"), - profit_before_tax=("profit_before_tax", "sum"), - employees_pc=("employees_pc", "sum"), - tax_paid=("tax_paid", "sum"), - tax_paid_pc=("tax_paid_pc", "sum"), - ) - return df_selected_company_by_jur.reset_index() + return int(n_company) -# Viz 4 - Breakdown of reports by sector (pie chart) +# Viz 4 : Breakdown of reports by sector def breakdown_of_reports_by_sector(df): # Dataframe called df df_reports_per_sector_year = ( @@ -290,7 +199,7 @@ def breakdown_of_reports_by_sector_viz(df_reports_per_sector): return go.Figure(fig) -# Viz 5 - Breakdown of reports by HQ country (pie chart) +# Viz 5 : Breakdown of reports by hq country def breakdown_of_reports_by_hq_country(df): # Group the DataFrame by 'upe_name' (HQ country) and 'year' and count the number of unique companies for each HQ country and year df_reports_per_country_year = ( @@ -349,9 +258,7 @@ def breakdown_of_reports_by_hq_country_viz(df_reports_per_country): return go.Figure(fig) -## Viz 6 - Breakdown of reports by sector over time (bar chart) - - +# Viz 6 : Breakdown of reports by sector over time def breakdown_of_reports_by_sector_over_time(df): # df_reports_per_sector_over_time = df # return df_reports_per_sector_over_time @@ -409,23 +316,27 @@ def breakdown_of_reports_by_sector_over_time_viz(df_reports_per_year_sector, top return go.Figure(fig) -## Viz 7 - Breakdown of reports by HQ country over time (bar chart) +# Viz 7 : Breakdown of reports by hq country over time # TODO add code -## Viz 8 - Breakdown of MNC by sector (pie chart - changed to bar chart for more visibility) + +# Viz 8 : Breakdown of MNC by sector # TODO add code -## Viz 9 - Breakdown of MNC by HQ country (pie chart - changed to bar chart for more visibility) + +# Viz 9 : Breakdown of MNC by HQ country # TODO add code -## Viz 10/11 - Breakdown of MNC by sector + +# Viz 10/11 : Breakdown of MNC by sector # TODO add code -## Viz 11 - Breakdown of MNC by HQ country + +# Viz 11 : Breakdown of MNC by HQ country # TODO add code -# Viz 12 - available reports by company +# Viz 12 : available reports by company def compute_company_available_reports(df: pd.DataFrame, company: str) -> dict: """Compute the number of reports tracked for a specific company and the available fiscal years. @@ -483,19 +394,19 @@ def display_company_available_reports( return df.style.hide(axis="columns") -# Viz 13 - company key financials kpis -def compute_company_key_financials_kpis( +# Viz 13 : Company key financials kpis +def company_key_financials_kpis( df: pd.DataFrame, company: str, year: int = None ) -> dict: - """Compute key financial KPIs for a company. + """Compute key financial KPIs for a company in a table. Args: df (pd.DataFrame): CbCRs database. - company (str): Company name + company (str): company name. year (int, optional): fiscal year to filter the results with. Defaults to None. Returns: - dict: company key financial KPIs. + pd.DataFrame: table with company key financial KPIs. """ kpis_list = [ @@ -523,8 +434,6 @@ def compute_company_key_financials_kpis( .sum() ) - # df = df.set_index('year') - # Make financial numbers easily readable with 'humanize' package for column in df.columns: if column not in ["employees", "upe_name"]: @@ -541,30 +450,8 @@ def compute_company_key_financials_kpis( # Clean columns string df.columns = df.columns.str.replace("_", " ").str.capitalize() - # Create a dictionary with the results - data = df.to_dict(orient="index") - - return data - - -def display_company_key_financials_kpis(df: pd.DataFrame, company: str, year: int = None): - """Display key financial KPIs for a company. - - Args: - df (pd.DataFrame): CbCRs database. - company (str): Company name - year (int, optional): fiscal year to filter the results with. Defaults to None. - - Returns: - pd.DataFrame: company key financial KPIs. - """ - - # Compute data - data = compute_company_key_financials_kpis(df=df, company=company, year=year) - - # Create the table - df = pd.DataFrame.from_dict(data) - df = df.reset_index() + # Transpose DataFrame + df = df.T.reset_index() # Rename columns df = df.rename(columns={"index": "Variable", 0: "Value"}) @@ -575,17 +462,17 @@ def display_company_key_financials_kpis(df: pd.DataFrame, company: str, year: in return df -# Viz 14 -def compute_top_jurisdictions_revenue(df: pd.DataFrame, company: str, year: int) -> dict: - """Rank jurisdictions on their percentage of total revenues. +# Viz 14 : company top jurisdictions for revenue +def top_jurisdictions_revenue(df: pd.DataFrame, company: str, year: int) -> go.Figure: + """Compute and plot top jurisdictions on their percentage of total revenues. Args: df (pd.DataFrame): CbCRs database. company (str): Company name - year (int): fiscal year. + year (int): Fiscal year. Returns: - dict: Rank of jurisdictions by percentage of total revenues. + go.Figure: Jurisdictions by percentage of total revenues in a Plotly figure. """ df = df.loc[ @@ -611,39 +498,17 @@ def compute_top_jurisdictions_revenue(df: pd.DataFrame, company: str, year: int) # Compute percentage of revenue df["total_revenues_%"] = df["total_revenues"] / df["total_revenues"].sum() - # Convert DataFrame to dictionnary - data = df.to_dict() - - return data - - -def display_jurisdictions_top_revenue(df: pd.DataFrame, company: str, year: int): - """Display jurisdictions by percentage of total revenues in an - horizontal bar chart. - - Args: - df (pd.DataFrame): CbCRs database. - company (str): Company name - year (int): fiscal year. - """ - - # Compute data - data = compute_top_jurisdictions_revenue(df=df, company=company, year=year) - # Create DataFrame - df = pd.DataFrame.from_dict(data) + # Sort jurisdictions by percentage of total revenues df = df.sort_values(by="total_revenues_%") - # Bar color sequence - bar_color = "#D9D9D9" - # Create figure fig = px.bar( df, x="total_revenues_%", y="jur_name", orientation="h", - color_discrete_sequence=[bar_color], + color_discrete_sequence=COLOR_SEQUENCE, text_auto=".1%", ) @@ -676,21 +541,21 @@ def display_jurisdictions_top_revenue(df: pd.DataFrame, company: str, year: int) return go.Figure(fig) -# Viz 15 -def compute_pretax_profit_and_employees_rank( +# Viz 15 : company’s % pre-tax profit and % employees by jurisdiction +def pretax_profit_and_employees_rank( df: pd.DataFrame, company: str, year: int -) -> pd.DataFrame: - """Compute jurisdictions percentage of profit before tax and percentage - of employees and rank by percentage of profit. +) -> go.Figure: + """Compute and plot jurisdictions percentage of profit before tax and percentage of employees then rank by + percentage of profit. Args: df (pd.DataFrame): CbCRs database. company (str): Company name - year (int): fiscal year. + year (int): Fiscal year. Returns: - dict: rank of jurisdictions with percentage of profit before and percentage - of employees. + go.Figure:: rank of jurisdictions with percentage of profit before and percentage of employees in a Plotly + figure. """ # Filter rows with selected company/year and subset with necessary features @@ -708,35 +573,9 @@ def compute_pretax_profit_and_employees_rank( df["employees_%"] = df["employees"] / df["employees"].sum() df = df.drop(columns=["profit_before_tax", "employees"]) - # data = df.to_dict() - - return df - - -def display_pretax_profit_and_employees_rank( - df: pd.DataFrame, company: str, year: int -) -> go.Figure: - """Display rank of jurisdictions by percentage of profit before and percentage - of employees. - - Args: - df (pd.DataFrame): CbCRs database. - company (str): Company name - year (int): fiscal year. - """ - - # Compute data - df = compute_pretax_profit_and_employees_rank(df=df, company=company, year=year) - - # Create DataFrame - # df = pd.DataFrame(data) - # Rename columns df = df.rename(columns={"profit_before_tax_%": "% profit", "employees_%": "% employees"}) - # Bar color sequence - bar_colors = ["#D9D9D9", "#1E2E5C"] - # Create figure fig = px.bar( df, @@ -745,7 +584,7 @@ def display_pretax_profit_and_employees_rank( barmode="group", orientation="h", text_auto=".1%", - color_discrete_sequence=bar_colors, + color_discrete_sequence=COLOR_SEQUENCE ) # Set figure height (min. 640) depending on the number of jurisdictions @@ -808,10 +647,21 @@ def display_pretax_profit_and_employees_rank( return go.Figure(fig) -# Viz 16 -def compute_pretax_profit_and_profit_per_employee( +# Viz 16 : company’s % pre-tax profit and profit per employee +def pretax_profit_and_profit_per_employee( df: pd.DataFrame, company: str, year: int -) -> pd.DataFrame: +) -> go.Figure: + """Compute and plot jurisdictions percentage of profit before tax and profit by employee. + + Args: + df (pd.DataFrame): CbCRs database. + company (str): Company name + year (int): Fiscal year. + + Returns: + go.Figure: Percentage of profit and profit/employee in a Plotly Figure. + """ + # Filter rows with selected company/year and subset with necessary features features = ["jur_name", "profit_before_tax", "employees", "jur_tax_haven"] df = df.loc[(df["mnc"] == company) & (df["year"] == year), features] @@ -830,21 +680,6 @@ def compute_pretax_profit_and_profit_per_employee( df["profit_per_employee"] = df["profit_before_tax"] / df["employees"] df = df.drop(columns=["profit_before_tax", "employees"]) - # print('compute_pretax_profit_and_profit_per_employee df.head():\n', df.head()) - # data = df.to_dict() - - return df - - -def display_pretax_profit_and_profit_per_employee( - df: pd.DataFrame, company: str, year: int -) -> go.Figure: - # Compute data - df = compute_pretax_profit_and_profit_per_employee(df=df, company=company, year=year) - - # Create DataFrame - # df = pd.DataFrame(data) - # Replace bool values of Tax haven by string values df["jur_tax_haven"] = df["jur_tax_haven"].map({True: "Tax haven", False: "Non tax haven"}) @@ -887,9 +722,11 @@ def display_pretax_profit_and_profit_per_employee( return go.Figure(fig) -# Viz 18 +# Viz 17 : company’s % pre-tax profit and % employees in TH vs domestic vs non TH +# TODO add code +# Viz 18 : breakdown of revenue between related party and unrelated party in TH vs domestic vs non TH def compute_related_and_unrelated_revenues_breakdown( df: pd.DataFrame, company: str, year: int ) -> dict: @@ -1019,7 +856,92 @@ def display_related_and_unrelated_revenues_breakdown( return pd.DataFrame.from_dict(data, orient="index"), go.Figure(fig) -# Viz 21 - evolution of tax havens use over time : % profit vs % employees in TH over time +# Viz 19 : what are the tax havens being used by the company +def tax_haven_used_by_company(df_selected_company): + company_upe_code = df_selected_company["upe_code"].unique()[0] + pc_list = ["employees", "profit_before_tax", "related_revenues"] + # grouper = df_selected_company.groupby('jur_name') + + df = pd.DataFrame(df_selected_company) + + df_domestic_company = df[df["jur_code"] == company_upe_code] + df_selected_company_th = df[df["jur_tax_haven"] != "not.TH"] + df_selected_company_nth = df[df["jur_tax_haven"] == "not.TH"] + + for col in pc_list: + df.insert( + len(df_selected_company.columns), + col + "_domestic_sum", + df_domestic_company[col].sum(), + ) + + df.insert( + len(df_selected_company.columns), col + "_th_sum", df_selected_company_th[col].sum() + ) + + df.insert(len(df.columns), col + "_nth_sum", df_selected_company_nth[col].sum()) + + df.insert(len(df.columns), col + "_sum", df_selected_company[col].sum()) + + df.insert(len(df.columns), col + "_pc", 100 * df[col] / df[col + "_sum"]) + # df_selected_company[col + '_pc'] = 100 * df_selected_company[col] / df_selected_company[col+'_sum'] + + df_selected_company_th = df[df["jur_tax_haven"] != "not.TH"] + df_selected_company_th_agg = df_selected_company_th.groupby(["mnc", "jur_name"]).agg( + profit_before_tax=("profit_before_tax", "sum"), + profit_before_tax_pc=("profit_before_tax_pc", "sum"), + employees_pc=("employees_pc", "sum"), + employees=("employees", "sum"), + related_revenues_pc=("related_revenues_pc", "sum"), + ) + df_selected_company_th_agg = df_selected_company_th_agg.reset_index() + df_selected_company_th_agg["profit per employee"] = ( + df_selected_company_th_agg["profit_before_tax"] + / df_selected_company_th_agg["employees"] + ) + df_selected_company_th_agg["profit per employee"] = df_selected_company_th_agg[ + "profit per employee" + ].replace([np.inf, -np.inf], None) + + return df_selected_company, df_selected_company_th_agg + + +# Viz 20 : complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for +# each (color code for tax havens) +def company_table(df_selected_company): + # company_upe_code = df_selected_company['upe_code'].unique()[0] + pc_list = [ + "employees", + "profit_before_tax", + "unrelated_revenues", + "related_revenues", + "total_revenues", + "tax_paid", + ] + + df = pd.DataFrame(df_selected_company) + for col in pc_list: + if col + "_sum" not in df.columns: + df.insert(len(df.columns), col + "_sum", df[col].sum()) + + df.insert(len(df.columns), col + "_pc", 100 * df[col] / df[col + "_sum"]) + # f_selected_company[col + '_sum'] = df_selected_company[col].sum() + # df_selected_company[col + '_pc'] = 100 * df_selected_company[col] / df_selected_company[col + '_sum'] + + # complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for each (color code for tax havens) + df_selected_company_by_jur = df.groupby(["mnc", "jur_name"]).agg( + related_revenues_pc=("related_revenues_pc", "sum"), + unrelated_revenues=("unrelated_revenues", "sum"), + total_revenues=("total_revenues", "sum"), + profit_before_tax=("profit_before_tax", "sum"), + employees_pc=("employees_pc", "sum"), + tax_paid=("tax_paid", "sum"), + tax_paid_pc=("tax_paid_pc", "sum"), + ) + return df_selected_company_by_jur.reset_index() + + +# Viz 21 : evolution of tax havens use over time : % profit vs % employees in TH over time def compute_tax_havens_use_evolution(df: pd.DataFrame, company: str) -> dict: """Compute the evolution of tax havens use by company over time. @@ -1116,31 +1038,31 @@ def display_tax_havens_use_evolution(df: pd.DataFrame, company: str): return go.Figure(fig) -# Viz 24 -def compute_number_of_tracked_mnc_available(df) -> dict: - # Drop duplicates to ensure each MNC appears only once per year - df_unique_mnc = df.drop_duplicates(subset=["year", "mnc"]) - - # Group the DataFrame by 'mnc' and count the number of reports for each MNC - df_reports_per_mnc = df_unique_mnc.groupby("mnc").size().reset_index(name="report_count") +# Viz 22 : locations of profits booked vs. mean 3Y ETR +# TODO add code - # Convert the DataFrame to a dictionary where MNCs are keys and report counts are values - mnc_report_count = dict( - zip(df_reports_per_mnc["mnc"], df_reports_per_mnc["report_count"], strict=False) - ) - return mnc_report_count +# Viz 24 : mnc tracked +def mnc_tracked(df: pd.DataFrame) -> go.Figure: + """"Compute and plot the list of company name in a word cloud where the size of the font depends of the number + of reports available. + Args: + df (pd.DataFrame): CbCRs database. -def display_number_of_tracked_mnc_available(df) -> go.Figure: - mnc_report_count = compute_number_of_tracked_mnc_available(df=df) + Returns: + go.Figure: word cloud with company name in a Plotly figure. + """ + + # Create dictionnary with company name as key and the number of reports as value + data = df.groupby("mnc")["year"].nunique().to_dict() color_func = get_single_color_func("#B8BEDB") # Generate the word cloud using the report counts as weights wordcloud = WordCloud( width=1200, height=800, background_color="white", color_func=color_func - ).generate_from_frequencies(mnc_report_count) + ).generate_from_frequencies(data) # Display the word cloud fig = px.imshow(wordcloud) @@ -1161,7 +1083,7 @@ def display_number_of_tracked_mnc_available(df) -> go.Figure: return go.Figure(fig) -# Viz 25 +# Viz 25 : company’s average transparency score # List financial columns financial_columns = [ @@ -1381,8 +1303,8 @@ def transparency_scores_to_csv(df: pd.DataFrame, csv_path: str = "./") -> pd.Dat return mnc_df -def display_transparency_score(df: pd.DataFrame, company: str, year: int = None): - """Display transparency score for specific company in a metric. +def transparency_score(df: pd.DataFrame, company: str, year: int = None): + """Compute transparency score for specific company in a metric. Args: df (pd.DataFrame): CbCRs database. @@ -1407,36 +1329,12 @@ def display_transparency_score(df: pd.DataFrame, company: str, year: int = None) 0, ) - # Create figure - fig = go.Figure() - - # Add circular background - fig.add_shape( - type="circle", x0=0, y0=0, x1=1, y1=1, line_color="blue", fillcolor="blue", opacity=0.3 - ) - - # Add indicator - fig.add_trace( - go.Indicator( - mode="number", - value=score, - number={"suffix": "%", "valueformat": ".0f", "font": {"size": 54}}, - domain={"x": [0, 1], "y": [0, 1]}, - ) - ) - - # Update layout - fig.update_layout(width=360, height=360) - return score -# Viz 26 - +# Viz 26 : company’s transparency score over time + details for each component of the score # Functions below use the same computation function (compute_all_scores) as used for Viz 25. - - -def display_transparency_score_over_time(df: pd.DataFrame, company: str): +def transparency_score_over_time(df: pd.DataFrame, company: str): """Display transparency scores over time for a specific company in a bar chart. @@ -1482,18 +1380,17 @@ def display_transparency_score_over_time(df: pd.DataFrame, company: str): fig.show() -def display_transparency_score_over_time_details( +def transparency_scores_over_time_details( df: pd.DataFrame, company: str ) -> pd.DataFrame: - """Display details of components of transparency scores over time - for a specific company in a table. + """Compute all geographic, completeness and general transparency scores over time for a specific company in a table. Args: df (pd.DataFrame): CbCRs database. company (str): Company name. Returns: - pd.DataFrame: Table with details of components over years. + pd.DataFrame: Table with details of scores over years. """ # Compute data @@ -1524,3 +1421,7 @@ def display_transparency_score_over_time_details( ) return df + + +# Viz 27 : average transparency score over time +# TODO add code diff --git a/app/pages/company/company.md b/app/pages/company/company.md index bc34ce9..a6823f7 100644 --- a/app/pages/company/company.md +++ b/app/pages/company/company.md @@ -129,15 +129,15 @@ Financial profile <|part|class_name=viz-container| <|{viz["fin_key_financials_kpis"].title}|text|class_name=text-weight400|>
-<|{viz["fin_key_financials_kpis"].sub_title}|text|class_name=text-small text-weight300 text-transparent|> +<|{viz["fin_key_financials_kpis"].sub_title}|text|class_name=text-small text-weight300|> <|{viz["fin_key_financials_kpis"].data}|table|show_all|sortable=False|dynamic=True|style=table-cell|class_name=rows-similar table-top|> |> <|part|class_name=viz-container| -<|{viz["fin_jurisdictions_top_revenue"].title}|text|class_name=text-weight400|> +<|{viz["fin_top_jurisdictions_revenue"].title}|text|class_name=text-weight400|>
-<|{viz["fin_jurisdictions_top_revenue"].sub_title}|text|class_name=text-small text-weight300|> -<|chart|figure={viz["fin_jurisdictions_top_revenue"].fig}|> +<|{viz["fin_top_jurisdictions_revenue"].sub_title}|text|class_name=text-small text-weight300|> +<|chart|figure={viz["fin_top_jurisdictions_revenue"].fig}|> |> |> diff --git a/app/pages/company/company.py b/app/pages/company/company.py index 6871428..0ce5b11 100644 --- a/app/pages/company/company.py +++ b/app/pages/company/company.py @@ -35,7 +35,7 @@ "fin_transparency_score", "fin_transparency_score_over_time_details", "fin_key_financials_kpis", - "fin_jurisdictions_top_revenue", + "fin_top_jurisdictions_revenue", "fin_pretax_profit_and_employees_rank", "fin_pretax_profit_and_profit_per_employee", } @@ -92,8 +92,8 @@ def update_state(state: State): # print(f'company state selected_year:{state.selected_year}') # Calculate number of reports for all companies - state.df_count_company = algo.number_of_tracked_reports_over_time_company( - state.df_selected_company + state.df_count_company = algo.number_of_tracked_reports( + state.df_selected_company, "mnc", state.selected_company ) # print(f'company state df_count_company:{state.df_count_company.head()}') @@ -128,7 +128,9 @@ def update_viz_company(state: State): state.viz[id] = Viz( id=id, state=state, - data=algo.number_of_tracked_reports_company(state.df_selected_company), + data=algo.number_of_tracked_reports( + state.df_selected_company, "mnc", state.selected_company + ), title="Number of reports", ).to_state() # print(f'update viz id:{id} title:{state.viz[id].title}') @@ -137,7 +139,7 @@ def update_viz_company(state: State): state.viz[id] = Viz( id=id, state=state, - data=algo.display_transparency_score(state.data, state.selected_company), + data=algo.transparency_score(state.data, state.selected_company), title="Transparency Score", sub_title="average over all reports", ).to_state() @@ -163,7 +165,7 @@ def update_viz_year(state: State): state.viz[id] = Viz( id=id, state=state, - data=algo.display_transparency_score( + data=algo.transparency_score( state.data, state.selected_company, int(state.selected_year) ), title="Transparency Score", @@ -175,7 +177,7 @@ def update_viz_year(state: State): state.viz[id] = Viz( id=id, state=state, - data=algo.display_transparency_score_over_time_details( + data=algo.transparency_scores_over_time_details( state.data, state.selected_company ), title="Transparency score over time ", @@ -187,7 +189,7 @@ def update_viz_year(state: State): state.viz[id] = Viz( id=id, state=state, - data=algo.display_company_key_financials_kpis( + data=algo.company_key_financials_kpis( state.data, state.selected_company, int(state.selected_year) ), title="Key metrics", @@ -195,11 +197,11 @@ def update_viz_year(state: State): ).to_state() # print(f'update viz id:{id} title:{state.viz[id].title}') - id = "fin_jurisdictions_top_revenue" + id = "fin_top_jurisdictions_revenue" state.viz[id] = Viz( id=id, state=state, - fig=algo.display_jurisdictions_top_revenue( + fig=algo.top_jurisdictions_revenue( state.data, state.selected_company, int(state.selected_year) ), title="Distribution of revenues across countries", @@ -212,7 +214,7 @@ def update_viz_year(state: State): state.viz[id] = Viz( id=id, state=state, - fig=algo.display_pretax_profit_and_employees_rank( + fig=algo.pretax_profit_and_employees_rank( state.data, state.selected_company, int(state.selected_year) ), title="% profit and employees by country", @@ -224,7 +226,7 @@ def update_viz_year(state: State): state.viz[id] = Viz( id=id, state=state, - fig=algo.display_pretax_profit_and_profit_per_employee( + fig=algo.pretax_profit_and_profit_per_employee( state.data, state.selected_company, int(state.selected_year) ), title="% profit and profit / employee by country", diff --git a/app/pages/home/home.md b/app/pages/home/home.md index 0177c81..8ac7581 100644 --- a/app/pages/home/home.md +++ b/app/pages/home/home.md @@ -108,10 +108,10 @@ Our database is growing |> <|part|class_name=viz-container| -<|{viz["general_number_of_tracked_mnc_available"].title}|text|class_name=text-weight400|> +<|{viz["general_list_of_tracked_mnc_available"].title}|text|class_name=text-weight400|>
-<|{viz["general_number_of_tracked_mnc_available"].sub_title}|text|class_name=text-small text-weight300|> -<|chart|figure={viz["general_number_of_tracked_mnc_available"].fig}|> +<|{viz["general_list_of_tracked_mnc_available"].sub_title}|text|class_name=text-small text-weight300|> +<|chart|figure={viz["general_list_of_tracked_mnc_available"].fig}|> |> |> diff --git a/app/pages/home/home.py b/app/pages/home/home.py index 3188cea..baeac96 100644 --- a/app/pages/home/home.py +++ b/app/pages/home/home.py @@ -29,7 +29,7 @@ def on_init(state: State): "general_number_of_tracked_reports", "general_number_of_tracked_reports_over_time", "general_number_of_tracked_mnc", - "general_number_of_tracked_mnc_available", + "general_list_of_tracked_mnc_available", ) ) @@ -50,8 +50,7 @@ def update_viz(state: State): state.viz[id] = Viz( id=id, state=state, - data=algo.number_of_tracked_reports_over_time(state.data), - fig=algo.display_number_of_tracked_reports_over_time(state.data), + fig=algo.number_of_tracked_reports_over_time(state.data), title="Number of reports over time", ).to_state() @@ -64,12 +63,11 @@ def update_viz(state: State): sub_title="with 1+ report tracked", ).to_state() - id = "general_number_of_tracked_mnc_available" + id = "general_list_of_tracked_mnc_available" state.viz[id] = Viz( id=id, state=state, - data=algo.compute_number_of_tracked_mnc_available(state.data), - fig=algo.display_number_of_tracked_mnc_available(state.data), + fig=algo.mnc_tracked(state.data), title="Multinationals available", sub_title="with 1+ report tracked", ).to_state() From 57158b9c823d219de4b063122bb0f6ceff230242 Mon Sep 17 00:00:00 2001 From: anquetos Date: Thu, 20 Jun 2024 11:57:41 +0200 Subject: [PATCH 4/6] Merged compute and display functions for all remaining visualizations. --- app/algo.py | 185 +++++++++++++--------------------------------------- 1 file changed, 44 insertions(+), 141 deletions(-) diff --git a/app/algo.py b/app/algo.py index dc09b09..4badf68 100644 --- a/app/algo.py +++ b/app/algo.py @@ -146,7 +146,7 @@ def number_of_tracked_mnc( # Viz 4 : Breakdown of reports by sector -def breakdown_of_reports_by_sector(df): +def breakdown_of_reports_by_sector(df: pd.DataFrame) -> go.Figure: # Dataframe called df df_reports_per_sector_year = ( df.groupby(["sector", "year"])["mnc"].nunique().reset_index(name="unique_company_count") @@ -170,10 +170,6 @@ def breakdown_of_reports_by_sector(df): by="unique_company_count", ascending=True ) - return df_reports_per_sector - - -def breakdown_of_reports_by_sector_viz(df_reports_per_sector): # Plotting the horizontal bar chart with Plotly Express fig = px.bar( df_reports_per_sector, @@ -195,13 +191,13 @@ def breakdown_of_reports_by_sector_viz(df_reports_per_sector): title_font_size=20, ) # Adjust font size - # Show the horizontal bar chart return go.Figure(fig) # Viz 5 : Breakdown of reports by hq country -def breakdown_of_reports_by_hq_country(df): - # Group the DataFrame by 'upe_name' (HQ country) and 'year' and count the number of unique companies for each HQ country and year +def breakdown_of_reports_by_hq_country(df: pd.DataFrame) -> go.Figure: + # Group the DataFrame by 'upe_name' (HQ country) and 'year' and count the number of unique companies for each HQ + # country and year df_reports_per_country_year = ( df.groupby(["upe_name", "year"])["mnc"] .nunique() @@ -228,10 +224,6 @@ def breakdown_of_reports_by_hq_country(df): by="unique_company_count", ascending=True ) - return df_reports_per_country - - -def breakdown_of_reports_by_hq_country_viz(df_reports_per_country): # Plotting the horizontal bar chart with Plotly Express fig = px.bar( df_reports_per_country, @@ -253,16 +245,11 @@ def breakdown_of_reports_by_hq_country_viz(df_reports_per_country): title_font_size=20, ) # Adjust font size - # Show the horizontal bar chart - # fig.show() return go.Figure(fig) # Viz 6 : Breakdown of reports by sector over time -def breakdown_of_reports_by_sector_over_time(df): - # df_reports_per_sector_over_time = df - # return df_reports_per_sector_over_time - +def breakdown_of_reports_by_sector_over_time(df: pd.DataFrame) -> go.Figure: # Step 1: Determine the top 10 sectors that released reports top_10_sectors = df["sector"].value_counts().nlargest(10).index.tolist() @@ -281,10 +268,6 @@ def breakdown_of_reports_by_sector_over_time(df): by="Sectors", ascending=False ) - return df_reports_per_year_sector, top_10_sectors - - -def breakdown_of_reports_by_sector_over_time_viz(df_reports_per_year_sector, top_10_sectors): # Define the order of sectors for the stacked bar chart and legend, reversed chart_order = ["Others"] + top_10_sectors[::-1] legend_order = ["Others"] + top_10_sectors[::-1] @@ -311,8 +294,6 @@ def breakdown_of_reports_by_sector_over_time_viz(df_reports_per_year_sector, top if trace.name == "Others": trace.marker.color = "grey" - # Show the plot - # fig.show() return go.Figure(fig) @@ -337,16 +318,18 @@ def breakdown_of_reports_by_sector_over_time_viz(df_reports_per_year_sector, top # Viz 12 : available reports by company -def compute_company_available_reports(df: pd.DataFrame, company: str) -> dict: - """Compute the number of reports tracked for a specific company and the - available fiscal years. +def company_available_reports( + df: pd.DataFrame, company: str, hide_company: bool = True +) -> pd.DataFrame: + """Compute the number of reports tracked and the available fiscal years for a specific company. Args: df (pd.DataFrame): CbCRs database. - company (str): company name. + company (str): Company name. + hide_company (bool, optional): Hide company name in final table. Defaults to True. Returns: - dict: numbers of reports and fiscal years. + pd.DataFrame: numbers of reports and fiscal years in a table. """ available_years = df.loc[df["mnc"] == company, "year"].unique() n_reports = len(available_years) @@ -361,43 +344,20 @@ def compute_company_available_reports(df: pd.DataFrame, company: str) -> dict: years_string = ", ".join(years_string_list[:-1]) years_string += " and " + years_string_list[-1] - # Create a dictionnary with the results - data = {"Company": company, "Reports": n_reports, "Fiscal year(s) available": years_string} - - return data - - -def display_company_available_reports( - df: pd.DataFrame, company: str, hide_company: bool = True -) -> pd.DataFrame: - """Display the number of reports tracked for a specific company and the - available fiscal years. - - Args: - df (pd.DataFrame): CbCRs database. - company (str): company name. - hide_company (bool, optional): hide company name in final table. Defaults to True. - - Returns: - pd.DataFrame: numbers of reports and fiscal years. - """ - - # Compute data - data = compute_company_available_reports(df=df, company=company) - # Create the table - df = pd.DataFrame.from_dict(data=data, orient="index") + table = pd.DataFrame( + data=[company, n_reports, years_string], index=["Company", "Reports", "Fiscal year(s)"] + ) + # Hide the company name in the table if hide_company: - return df[1:].style.hide(axis="columns") + return table[1:].style.hide(axis="columns") - return df.style.hide(axis="columns") + return table.style.hide(axis="columns") # Viz 13 : Company key financials kpis -def company_key_financials_kpis( - df: pd.DataFrame, company: str, year: int = None -) -> dict: +def company_key_financials_kpis(df: pd.DataFrame, company: str, year: int = None) -> dict: """Compute key financial KPIs for a company in a table. Args: @@ -498,7 +458,6 @@ def top_jurisdictions_revenue(df: pd.DataFrame, company: str, year: int) -> go.F # Compute percentage of revenue df["total_revenues_%"] = df["total_revenues"] / df["total_revenues"].sum() - # Sort jurisdictions by percentage of total revenues df = df.sort_values(by="total_revenues_%") @@ -542,10 +501,8 @@ def top_jurisdictions_revenue(df: pd.DataFrame, company: str, year: int) -> go.F # Viz 15 : company’s % pre-tax profit and % employees by jurisdiction -def pretax_profit_and_employees_rank( - df: pd.DataFrame, company: str, year: int -) -> go.Figure: - """Compute and plot jurisdictions percentage of profit before tax and percentage of employees then rank by +def pretax_profit_and_employees_rank(df: pd.DataFrame, company: str, year: int) -> go.Figure: + """Compute and plot jurisdictions percentage of profit before tax and percentage of employees then rank by percentage of profit. Args: @@ -554,7 +511,7 @@ def pretax_profit_and_employees_rank( year (int): Fiscal year. Returns: - go.Figure:: rank of jurisdictions with percentage of profit before and percentage of employees in a Plotly + go.Figure:: rank of jurisdictions with percentage of profit before and percentage of employees in a Plotly figure. """ @@ -584,7 +541,7 @@ def pretax_profit_and_employees_rank( barmode="group", orientation="h", text_auto=".1%", - color_discrete_sequence=COLOR_SEQUENCE + color_discrete_sequence=COLOR_SEQUENCE, ) # Set figure height (min. 640) depending on the number of jurisdictions @@ -661,7 +618,7 @@ def pretax_profit_and_profit_per_employee( Returns: go.Figure: Percentage of profit and profit/employee in a Plotly Figure. """ - + # Filter rows with selected company/year and subset with necessary features features = ["jur_name", "profit_before_tax", "employees", "jur_tax_haven"] df = df.loc[(df["mnc"] == company) & (df["year"] == year), features] @@ -701,11 +658,11 @@ def pretax_profit_and_profit_per_employee( autosize=True, height=360, xaxis=dict( - title="% profit", + title="Percentage of profit", tickformat=".0%", ), yaxis=dict( - title="Profit/employee", + title="Profit per employee", ), legend=dict( x=0.1, y=1.05, xanchor="center", yanchor="top", title=dict(text=""), orientation="h" @@ -727,10 +684,10 @@ def pretax_profit_and_profit_per_employee( # Viz 18 : breakdown of revenue between related party and unrelated party in TH vs domestic vs non TH -def compute_related_and_unrelated_revenues_breakdown( +def related_and_unrelated_revenues_breakdown( df: pd.DataFrame, company: str, year: int -) -> dict: - """Compute related and unrelated revenues in tax heaven, non tax heaven and +) -> go.Figure: + """Compute and plot related and unrelated revenues in tax heaven, non tax heaven and domestic jurisdictions. Args: @@ -739,7 +696,7 @@ def compute_related_and_unrelated_revenues_breakdown( year (int): fiscal year to filter the results with. Returns: - dict: revenues percentage for different type of jurisdictions. + go.Figure: related and unrelated revenues in a Plotly Figure. """ # Filter rows with selected company/year and subset with necessary features @@ -767,14 +724,12 @@ def compute_related_and_unrelated_revenues_breakdown( # Compute kpis in a new DataFrame data = pd.DataFrame() data["tax_haven"] = df.loc[ - df["jur_tax_haven"] == True, ["unrelated_revenues", "related_revenues"] + df["jur_tax_haven"], ["unrelated_revenues", "related_revenues"] ].sum() data["non_tax_haven"] = df.loc[ - df["jur_tax_haven"] == False, ["unrelated_revenues", "related_revenues"] - ].sum() - data["domestic"] = df.loc[ - df["domestic"] == True, ["unrelated_revenues", "related_revenues"] + ~df["jur_tax_haven"], ["unrelated_revenues", "related_revenues"] ].sum() + data["domestic"] = df.loc[df["domestic"], ["unrelated_revenues", "related_revenues"]].sum() # Replace values with share (%) of 'unrelated/related revenues' data = data.div(data.sum(axis="rows"), axis="columns") @@ -782,43 +737,15 @@ def compute_related_and_unrelated_revenues_breakdown( # Rename indexes data = data.rename( index={ - "unrelated_revenues": "unrelated_revenues_percentage", - "related_revenues": "related_revenues_percentage", + "unrelated_revenues": "Unrelated revenues", + "related_revenues": "Related revenues", } ) - # Convert DataFrame to dictionary - data = data.to_dict() - - return data - - -def display_related_and_unrelated_revenues_breakdown( - df: pd.DataFrame, company: str, year: int -) -> tuple[pd.DataFrame, go.Figure]: - """Display related and unrelated revenues in tax heaven, non tax heaven and - domestic jurisdictions. - - Args: - df (pd.DataFrame): CbCRs database. - company (str): Company name - year (int): fiscal year to filter the results with. - """ - - # Compute data - data = compute_related_and_unrelated_revenues_breakdown(df=df, company=company, year=year) - - # Create DataFrame - df = pd.DataFrame.from_dict(data, orient="index") - - # Rename columns and indexes - df.columns = df.columns.str.replace("_", " ").str.capitalize() - df.index = df.index.str.replace("_", " ").str.capitalize() - # Create figure fig = px.bar( df, - x=["Unrelated revenues percentage", "Related revenues percentage"], + x=["Unrelated revenues", "Related revenues"], y=df.index, orientation="h", text_auto=".0%", @@ -826,7 +753,7 @@ def display_related_and_unrelated_revenues_breakdown( # Update layout settings fig.update_layout( - title="Breakdown of revenue", + title=None, xaxis=dict(title=None, tickformat=".0%"), yaxis_title=None, legend=dict(title=dict(text=""), orientation="h"), @@ -836,7 +763,7 @@ def display_related_and_unrelated_revenues_breakdown( ) # Define position of text values - for col in ["Unrelated revenues percentage", "Related revenues percentage"]: + for col in ["Unrelated revenues", "Related revenues"]: values_positions = ["outside" if value <= 0.05 else "inside" for value in df[col]] fig.update_traces(textangle=0, textposition=values_positions, selector=dict(name=col)) @@ -852,8 +779,7 @@ def display_related_and_unrelated_revenues_breakdown( font=dict(size=13), ) - # fig.show() - return pd.DataFrame.from_dict(data, orient="index"), go.Figure(fig) + return go.Figure(fig) # Viz 19 : what are the tax havens being used by the company @@ -942,7 +868,7 @@ def company_table(df_selected_company): # Viz 21 : evolution of tax havens use over time : % profit vs % employees in TH over time -def compute_tax_havens_use_evolution(df: pd.DataFrame, company: str) -> dict: +def tax_havens_use_evolution(df: pd.DataFrame, company: str) -> go.Figure: """Compute the evolution of tax havens use by company over time. Args: @@ -950,7 +876,7 @@ def compute_tax_havens_use_evolution(df: pd.DataFrame, company: str) -> dict: company (str): Company name Returns: - dict: tax havens percentage of profits and employees for each year. + go.Figure: tax havens use evolution in a Plotly Figure. """ # Filter rows with selected company and subset with necessary features @@ -985,26 +911,6 @@ def compute_tax_havens_use_evolution(df: pd.DataFrame, company: str) -> dict: df["tax_havens_profit_%"] = df["profit_before_tax"] / df["total_profit"] df["tax_havens_employees_%"] = df["employees"] / df["total_employees"] - # Convert necessary data to dictionnary - data = df[["year", "tax_havens_profit_%", "tax_havens_employees_%"]].to_dict() - - return data - - -def display_tax_havens_use_evolution(df: pd.DataFrame, company: str): - """Display the evolution of tax havens use by company over time. - - Args: - df (pd.DataFrame): CbCRs database. - company (str): Company name - """ - - # Compute data - data = compute_tax_havens_use_evolution(df=df, company=company) - - # Create DataFrame - df = pd.DataFrame.from_dict(data) - # Rename columns df = df.rename( columns={ @@ -1034,7 +940,6 @@ def display_tax_havens_use_evolution(df: pd.DataFrame, company: str): height=480, ) - # fig.show() return go.Figure(fig) @@ -1044,7 +949,7 @@ def display_tax_havens_use_evolution(df: pd.DataFrame, company: str): # Viz 24 : mnc tracked def mnc_tracked(df: pd.DataFrame) -> go.Figure: - """"Compute and plot the list of company name in a word cloud where the size of the font depends of the number + """Compute and plot the list of company name in a word cloud where the size of the font depends of the number of reports available. Args: @@ -1053,7 +958,7 @@ def mnc_tracked(df: pd.DataFrame) -> go.Figure: Returns: go.Figure: word cloud with company name in a Plotly figure. """ - + # Create dictionnary with company name as key and the number of reports as value data = df.groupby("mnc")["year"].nunique().to_dict() @@ -1380,9 +1285,7 @@ def transparency_score_over_time(df: pd.DataFrame, company: str): fig.show() -def transparency_scores_over_time_details( - df: pd.DataFrame, company: str -) -> pd.DataFrame: +def transparency_scores_over_time_details(df: pd.DataFrame, company: str) -> pd.DataFrame: """Compute all geographic, completeness and general transparency scores over time for a specific company in a table. Args: From d48ca3456fa7beae11d4a13c39de9e9d0cf8446c Mon Sep 17 00:00:00 2001 From: anquetos Date: Fri, 21 Jun 2024 10:05:59 +0200 Subject: [PATCH 5/6] Removed remaining titles in plots. --- app/algo.py | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/app/algo.py b/app/algo.py index 4badf68..067c321 100644 --- a/app/algo.py +++ b/app/algo.py @@ -94,7 +94,7 @@ def number_of_tracked_reports_over_time( fig.update_layout( autosize=True, height=360, - font_family="Roboto", + font_family="Roboto, sans-serif", title=None, xaxis=dict(title=None, tickvals=data["year"].unique()), yaxis=dict( @@ -176,20 +176,12 @@ def breakdown_of_reports_by_sector(df: pd.DataFrame) -> go.Figure: y="sector", x="percent", orientation="h", # Horizontal orientation - title="Breakdown of Reports by Sector (All Years)", labels={"percent": "Percentage of Companies (%)", "sector": "Sector"}, text="percent", # Show the percentage as text label - hover_data={"unique_company_count": True, "percent": ":.2f%"}, - # Add tooltip for count and rounded percentage + hover_data={"unique_company_count": True, "percent": ":.2f%"}, # Add tooltip for count and rounded percentage ) - # Update layout to display the title above the chart - fig.update_layout( - title="Breakdown of Reports by Sector", - title_x=0.5, - title_y=0.9, # Adjust position - title_font_size=20, - ) # Adjust font size + fig.update_layout(title=None) return go.Figure(fig) @@ -230,7 +222,6 @@ def breakdown_of_reports_by_hq_country(df: pd.DataFrame) -> go.Figure: y="upe_name", x="percent", orientation="h", # Horizontal orientation - title="Breakdown of Reports by HQ Country over Time", labels={"percent": "Percentage of Companies (%)", "upe_name": "HQ Country"}, text="percent", # Show the percentage as text label hover_data={"unique_company_count": True, "percent": ":.2f%"}, @@ -238,12 +229,7 @@ def breakdown_of_reports_by_hq_country(df: pd.DataFrame) -> go.Figure: ) # Update layout to display the title above the chart - fig.update_layout( - title="Breakdown of Reports by HQ Country over Time", - title_x=0.5, - title_y=0.95, # Adjust position - title_font_size=20, - ) # Adjust font size + fig.update_layout(title=None) return go.Figure(fig) @@ -278,14 +264,16 @@ def breakdown_of_reports_by_sector_over_time(df: pd.DataFrame) -> go.Figure: x="year", y="unique_company_count", color="Sectors", - title="Breakdown of Reports by Sector over Time", labels={"unique_company_count": "Number of Companies Reporting", "year": "Year"}, barmode="stack", category_orders={"Sectors": chart_order}, ) # Reverse the order of legend items - fig.update_layout(legend=dict(traceorder="reversed")) + fig.update_layout( + title=None, + legend=dict(traceorder="reversed") + ) # Adjusting the legend order and formatting the legend labels for i, trace in enumerate(fig.data): @@ -930,7 +918,7 @@ def tax_havens_use_evolution(df: pd.DataFrame, company: str) -> go.Figure: # Update layout settings fig.update_layout( - title="Tax havens use in profitables jurisdictions", + title=None, xaxis_title=None, yaxis_title=None, yaxis_tickformat=".0%", From 7703985ccf3761c421fdca8f8ab8cb5b50e15c1a Mon Sep 17 00:00:00 2001 From: anquetos Date: Fri, 21 Jun 2024 15:31:02 +0200 Subject: [PATCH 6/6] Added custom template (to be completed) for Plotly figures. --- app/algo.py | 130 +++++++++++++++++++++++----------------------------- 1 file changed, 58 insertions(+), 72 deletions(-) diff --git a/app/algo.py b/app/algo.py index 067c321..6e4df32 100644 --- a/app/algo.py +++ b/app/algo.py @@ -10,6 +10,19 @@ import humanize from wordcloud import WordCloud, get_single_color_func + +# Define custom template +custom_template = { + "layout": { + "autosize": True, + "plot_bgcolor": "white", + "font": {"family": "Roboto, sans-serif"}, + "title": None, + "margin": dict(l=0, r=0, b=0, t=0), + } +} + + # Define color sequence for plots COLOR_SEQUENCE = ["#D9D9D9", "#1E2E5C"] @@ -84,26 +97,18 @@ def number_of_tracked_reports_over_time( # Create figure fig = px.bar( - data, x="year", y="mnc", color_discrete_sequence=COLOR_SEQUENCE, text_auto=True + data, x="year", y="mnc", text_auto=True, color_discrete_sequence=COLOR_SEQUENCE ) # Force position and color of bar values fig.update_traces(textposition="outside", textfont=dict(color="black")) + # Define axes settings + fig.update_xaxes(title=None, tickvals=data["year"].unique()) + fig.update_yaxes(title=None, visible=False, range=[0, data["mnc"].max() * 1.1]) + # Update layout settings - fig.update_layout( - autosize=True, - height=360, - font_family="Roboto, sans-serif", - title=None, - xaxis=dict(title=None, tickvals=data["year"].unique()), - yaxis=dict( - title=None, - visible=False, - ), - plot_bgcolor="white", - margin=dict(l=0, r=0, b=0, t=0), - ) + fig.update_layout(template=custom_template, height=360) # Define style of hover on bars fig.update_traces( @@ -178,10 +183,14 @@ def breakdown_of_reports_by_sector(df: pd.DataFrame) -> go.Figure: orientation="h", # Horizontal orientation labels={"percent": "Percentage of Companies (%)", "sector": "Sector"}, text="percent", # Show the percentage as text label - hover_data={"unique_company_count": True, "percent": ":.2f%"}, # Add tooltip for count and rounded percentage + hover_data={ + "unique_company_count": True, + "percent": ":.2f%", + }, # Add tooltip for count and rounded percentage ) - fig.update_layout(title=None) + # Update layout settings + fig.update_layout(template=custom_template) return go.Figure(fig) @@ -228,8 +237,8 @@ def breakdown_of_reports_by_hq_country(df: pd.DataFrame) -> go.Figure: # Add tooltip for count and rounded percentage ) - # Update layout to display the title above the chart - fig.update_layout(title=None) + # Update layout settings + fig.update_layout(template=custom_template) return go.Figure(fig) @@ -269,11 +278,8 @@ def breakdown_of_reports_by_sector_over_time(df: pd.DataFrame) -> go.Figure: category_orders={"Sectors": chart_order}, ) - # Reverse the order of legend items - fig.update_layout( - title=None, - legend=dict(traceorder="reversed") - ) + # Update layout settings + fig.update_layout(template=custom_template, legend=dict(traceorder="reversed")) # Adjusting the legend order and formatting the legend labels for i, trace in enumerate(fig.data): @@ -455,22 +461,19 @@ def top_jurisdictions_revenue(df: pd.DataFrame, company: str, year: int) -> go.F x="total_revenues_%", y="jur_name", orientation="h", - color_discrete_sequence=COLOR_SEQUENCE, text_auto=".1%", + color_discrete_sequence=COLOR_SEQUENCE, ) # Set figure height (min. 480) depending on the number of jurisdictions fig_height = max(480, (48 * len(df["jur_name"]))) + # Update axis layout + fig.update_xaxes(title="Percentage of total revenue", tickformat=".0%") + fig.update_yaxes(title=None) + # Update layout settings - fig.update_layout( - font_family="Roboto", - xaxis=dict(title="Percentage of total revenue", tickformat=".0%"), - yaxis_title=None, - plot_bgcolor="white", - height=fig_height, - margin=dict(l=0, r=0, t=0, b=0), - ) + fig.update_layout(template=custom_template, height=fig_height) # Define position of text values values_positions = [ @@ -541,18 +544,17 @@ def pretax_profit_and_employees_rank(df: pd.DataFrame, company: str, year: int) else: max_x_value = 1 + # Update axis layout + fig.update_xaxes(title=None, tickformat=".0%", range=[0, max_x_value]) + fig.update_yaxes(title=None) + # Update layout settings fig.update_layout( - font_family="Roboto", - title=None, - xaxis=dict(title=None, tickformat=".0%", range=[0, max_x_value]), - yaxis_title=None, legend=dict( - x=0.1, y=1.05, xanchor="center", yanchor="top", title=dict(text=""), orientation="h" + orientation="h", yanchor="bottom", y=1.01, xanchor="left", x=0, title=dict(text="") ), - plot_bgcolor="white", + template=custom_template, height=fig_height, - margin=dict(l=0, r=0, t=10, b=0), ) # Add annotations for NaN values where there should have been a bar @@ -639,24 +641,17 @@ def pretax_profit_and_profit_per_employee( custom_data=["jur_name"], ) + # Update axis layout + fig.update_xaxes(title="Percentage of profit", tickformat=".0%") + fig.update_yaxes(title="Profit per employee") + # Update layout settings fig.update_layout( - title=None, - font_family="Roboto", - autosize=True, - height=360, - xaxis=dict( - title="Percentage of profit", - tickformat=".0%", - ), - yaxis=dict( - title="Profit per employee", - ), legend=dict( - x=0.1, y=1.05, xanchor="center", yanchor="top", title=dict(text=""), orientation="h" + orientation="h", yanchor="bottom", y=1.01, xanchor="left", x=0, title=dict(text="") ), - plot_bgcolor="white", - margin=dict(l=0, r=0, t=0, b=0), + template=custom_template, + height=380, ) # Define hover @@ -739,15 +734,13 @@ def related_and_unrelated_revenues_breakdown( text_auto=".0%", ) + # Update axis layout + fig.update_xaxes(title=None, tickformat=".0%") + fig.update_yaxes(title=None) + # Update layout settings fig.update_layout( - title=None, - xaxis=dict(title=None, tickformat=".0%"), - yaxis_title=None, - legend=dict(title=dict(text=""), orientation="h"), - plot_bgcolor="white", - width=800, - height=480, + legend=dict(title=dict(text=""), orientation="h"), template=custom_template ) # Define position of text values @@ -915,17 +908,13 @@ def tax_havens_use_evolution(df: pd.DataFrame, company: str) -> go.Figure: barmode="group", text_auto=".1%", ) + # Update axis layout + fig.update_xaxes(title=None) + fig.update_yaxes(title=None, tickformat=".0%") # Update layout settings fig.update_layout( - title=None, - xaxis_title=None, - yaxis_title=None, - yaxis_tickformat=".0%", - legend=dict(title=dict(text=""), orientation="h"), - plot_bgcolor="white", - width=800, - height=480, + legend=dict(title=dict(text=""), orientation="h"), template=custom_template ) return go.Figure(fig) @@ -1250,7 +1239,7 @@ def transparency_score_over_time(df: pd.DataFrame, company: str): # Update layout settings fig.update_layout( - title="Transparency score over time", + template=custom_template, xaxis=dict(title=None, tickvals=df["year"].unique()), yaxis=dict( title=None, @@ -1262,9 +1251,6 @@ def transparency_score_over_time(df: pd.DataFrame, company: str): tickvals=[0, 25, 50, 75, 100], ticktext=[0, "", "", "", 100], ), - plot_bgcolor="white", - width=800, - height=480, ) # Force position and color of bar values