From 4ad847afad4835e3fc303750b5d9ea2ad0de66e1 Mon Sep 17 00:00:00 2001 From: Jeremy Fix Date: Tue, 11 Jun 2024 14:16:46 +0200 Subject: [PATCH] resync correct files for production --- app/algo.py | 171 ++++++++++--------------- app/main.py | 41 +++--- app/pages/company/company.py | 236 ++++++++++++++++------------------- 3 files changed, 191 insertions(+), 257 deletions(-) diff --git a/app/algo.py b/app/algo.py index e45090b..6f9354c 100644 --- a/app/algo.py +++ b/app/algo.py @@ -10,8 +10,6 @@ # TODO add viz comment # Viz 1 - - - def number_of_tracked_reports(df): number_of_tracked_reports = len(df.groupby(["year", "mnc"])["mnc"]) return number_of_tracked_reports @@ -157,8 +155,7 @@ def company_pourcentage_pretax_profit_and_profit_per_employee(df_selected_compan # pretax_profit_col_name = 'profit_before_tax' profit_col_name = '' employee_col_name = 'employees' - df_selected_company[profit_col_name] / \ - df_selected_company[employee_col_name] + df_selected_company[profit_col_name] / df_selected_company[employee_col_name] # Viz 19 @@ -173,15 +170,15 @@ def tax_haven_used_by_company(df_selected_company): company_upe_code = df_selected_company['upe_code'].unique()[0] pc_list = ['employees', 'profit_before_tax', 'related_revenues'] # grouper = df_selected_company.groupby('jur_name') - + df = pd.DataFrame(df_selected_company) - + df_domestic_company = df[df['jur_code'] == company_upe_code] df_selected_company_th = df[df['jur_tax_haven'] != 'not.TH'] df_selected_company_nth = df[df['jur_tax_haven'] == 'not.TH'] - + for col in pc_list: - + df.insert( len(df_selected_company.columns), col + '_domestic_sum', @@ -218,8 +215,7 @@ def tax_haven_used_by_company(df_selected_company): ) df_selected_company_th_agg = df_selected_company_th_agg.reset_index() df_selected_company_th_agg['profit per employee'] = \ - df_selected_company_th_agg['profit_before_tax'] / \ - df_selected_company_th_agg['employees'] + df_selected_company_th_agg['profit_before_tax'] / df_selected_company_th_agg['employees'] df_selected_company_th_agg['profit per employee'] = df_selected_company_th_agg['profit per employee'].replace( [np.inf, -np.inf], None) @@ -230,9 +226,8 @@ def tax_haven_used_by_company(df_selected_company): # complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for each (color code for tax havens) def company_table(df_selected_company): # company_upe_code = df_selected_company['upe_code'].unique()[0] - pc_list = ['employees', 'profit_before_tax', 'unrelated_revenues', - 'related_revenues', 'total_revenues', 'tax_paid'] - + pc_list = ['employees', 'profit_before_tax', 'unrelated_revenues', 'related_revenues', 'total_revenues', 'tax_paid'] + df = pd.DataFrame(df_selected_company) for col in pc_list: if col + '_sum' not in df.columns: @@ -263,13 +258,12 @@ def company_table(df_selected_company): # Viz 4 - Breakdown of reports by sector (pie chart) def breakdown_of_reports_by_sector(df): - # Dataframe called df + #Dataframe called df df_reports_per_sector_year = df.groupby(['sector', 'year'])['mnc'].nunique().reset_index( name='unique_company_count') # Aggregate the counts of unique companies across all years for each sector - df_reports_per_sector = df_reports_per_sector_year.groupby( - 'sector')['unique_company_count'].sum().reset_index() + df_reports_per_sector = df_reports_per_sector_year.groupby('sector')['unique_company_count'].sum().reset_index() # Calculate the total count of unique companies across all sectors total_companies = df_reports_per_sector['unique_company_count'].sum() @@ -279,8 +273,7 @@ def breakdown_of_reports_by_sector(df): 2) # Sort the DataFrame by the count of unique companies in ascending order - df_reports_per_sector = df_reports_per_sector.sort_values( - by='unique_company_count', ascending=True) + df_reports_per_sector = df_reports_per_sector.sort_values(by='unique_company_count', ascending=True) return df_reports_per_sector @@ -290,8 +283,7 @@ def breakdown_of_reports_by_sector_viz(df_reports_per_sector): fig = px.bar(df_reports_per_sector, y='sector', x='percent', orientation='h', # Horizontal orientation title='Breakdown of Reports by Sector (All Years)', - labels={ - 'percent': 'Percentage of Companies (%)', 'sector': 'Sector'}, + labels={'percent': 'Percentage of Companies (%)', 'sector': 'Sector'}, text='percent', # Show the percentage as text label hover_data={'unique_company_count': True, 'percent': ':.2f%'}, # Add tooltip for count and rounded percentage @@ -313,19 +305,17 @@ def breakdown_of_reports_by_hq_country(df): name='unique_company_count') # Aggregate the counts of unique companies across all years for each HQ country - df_reports_per_country = df_reports_per_country_year.groupby( - 'upe_name')['unique_company_count'].sum().reset_index() + df_reports_per_country = df_reports_per_country_year.groupby('upe_name')['unique_company_count'].sum().reset_index() # Calculate the total count of unique companies across all HQ countries total_companies = df_reports_per_country['unique_company_count'].sum() # Calculate the percentage of each HQ country's count relative to the total count and round to 2 decimals df_reports_per_country['percent'] = ( - (df_reports_per_country['unique_company_count'] / total_companies) * 100).round(2) + (df_reports_per_country['unique_company_count'] / total_companies) * 100).round(2) # Sort the DataFrame by the count of unique companies in ascending order - df_reports_per_country = df_reports_per_country.sort_values( - by='unique_company_count', ascending=True) + df_reports_per_country = df_reports_per_country.sort_values(by='unique_company_count', ascending=True) return df_reports_per_country @@ -335,8 +325,7 @@ def breakdown_of_reports_by_hq_country_viz(df_reports_per_country): fig = px.bar(df_reports_per_country, y='upe_name', x='percent', orientation='h', # Horizontal orientation title='Breakdown of Reports by HQ Country over Time', - labels={ - 'percent': 'Percentage of Companies (%)', 'upe_name': 'HQ Country'}, + labels={'percent': 'Percentage of Companies (%)', 'upe_name': 'HQ Country'}, text='percent', # Show the percentage as text label hover_data={'unique_company_count': True, 'percent': ':.2f%'}, # Add tooltip for count and rounded percentage @@ -352,7 +341,7 @@ def breakdown_of_reports_by_hq_country_viz(df_reports_per_country): return go.Figure(fig) -# Viz 6 - Breakdown of reports by sector over time (bar chart) +## Viz 6 - Breakdown of reports by sector over time (bar chart) def breakdown_of_reports_by_sector_over_time(df): @@ -363,16 +352,14 @@ def breakdown_of_reports_by_sector_over_time(df): top_10_sectors = df['sector'].value_counts().nlargest(10).index.tolist() # Step 2: Group all other sectors as "Others" - df['Sectors'] = df['sector'].apply( - lambda x: x if x in top_10_sectors else 'Others') + df['Sectors'] = df['sector'].apply(lambda x: x if x in top_10_sectors else 'Others') # Step 3: Group the DataFrame by 'year', 'Sectors', and count the number of unique companies for each year and sector df_reports_per_year_sector = df.groupby(['year', 'Sectors'])['mnc'].nunique().reset_index( name='unique_company_count') # Sort sectors alphabetically - df_reports_per_year_sector = df_reports_per_year_sector.sort_values( - by='Sectors', ascending=False) + df_reports_per_year_sector = df_reports_per_year_sector.sort_values(by='Sectors', ascending=False) return df_reports_per_year_sector, top_10_sectors @@ -385,8 +372,7 @@ def breakdown_of_reports_by_sector_over_time_viz(df_reports_per_year_sector, top # Plotting the bar chart using Plotly Express fig = px.bar(df_reports_per_year_sector, x='year', y='unique_company_count', color='Sectors', title='Breakdown of Reports by Sector over Time', - labels={ - 'unique_company_count': 'Number of Companies Reporting', 'year': 'Year'}, + labels={'unique_company_count': 'Number of Companies Reporting', 'year': 'Year'}, barmode='stack', category_orders={'Sectors': chart_order}) @@ -405,19 +391,19 @@ def breakdown_of_reports_by_sector_over_time_viz(df_reports_per_year_sector, top return go.Figure(fig) -# Viz 7 - Breakdown of reports by HQ country over time (bar chart) +## Viz 7 - Breakdown of reports by HQ country over time (bar chart) # TODO add code -# Viz 8 - Breakdown of MNC by sector (pie chart - changed to bar chart for more visibility) +## Viz 8 - Breakdown of MNC by sector (pie chart - changed to bar chart for more visibility) # TODO add code -# Viz 9 - Breakdown of MNC by HQ country (pie chart - changed to bar chart for more visibility) +## Viz 9 - Breakdown of MNC by HQ country (pie chart - changed to bar chart for more visibility) # TODO add code -# Viz 10/11 - Breakdown of MNC by sector +## Viz 10/11 - Breakdown of MNC by sector # TODO add code -# Viz 11 - Breakdown of MNC by HQ country +## Viz 11 - Breakdown of MNC by HQ country # TODO add code # Viz 12 - available reports by company @@ -550,8 +536,7 @@ def display_company_key_financials_kpis( """ # Compute data - data = compute_company_key_financials_kpis( - df=df, company=company, year=year) + data = compute_company_key_financials_kpis(df=df, company=company, year=year) # Create the table df = pd.DataFrame.from_dict(data) @@ -703,8 +688,7 @@ def compute_pretax_profit_and_employees_rank( df = df.sort_values(by='profit_before_tax').reset_index(drop=True) # Calculate percentages - df['profit_before_tax_%'] = df['profit_before_tax'] / \ - df['profit_before_tax'].sum() + df['profit_before_tax_%'] = df['profit_before_tax'] / df['profit_before_tax'].sum() df['employees_%'] = df['employees'] / df['employees'].sum() df = df.drop(columns=['profit_before_tax', 'employees']) @@ -725,8 +709,7 @@ def display_pretax_profit_and_employees_rank( """ # Compute data - df = compute_pretax_profit_and_employees_rank( - df=df, company=company, year=year) + df = compute_pretax_profit_and_employees_rank(df=df, company=company, year=year) # Create DataFrame # df = pd.DataFrame(data) @@ -756,8 +739,7 @@ def display_pretax_profit_and_employees_rank( # Set maximum value for x axis if not df[['% profit', '% employees']].isna().all().all(): - max_x_value = max(df[['% profit', '% employees'] - ].max(axis='columns')) + 0.1 + max_x_value = max(df[['% profit', '% employees']].max(axis='columns')) + 0.1 else: max_x_value = 1 @@ -810,8 +792,7 @@ def display_pretax_profit_and_employees_rank( # Loop through each bar trace and hide the text if the value is NaN for trace in fig.data: values = df[trace.name] - text_position = ['outside' if not np.isnan( - value) else 'none' for value in values] + text_position = ['outside' if not np.isnan(value) else 'none' for value in values] trace.textposition = text_position if trace.name == '% employees': @@ -839,8 +820,7 @@ def compute_pretax_profit_and_profit_per_employee( df.loc[df['employees'] == 0, 'employees'] = 1 # Calculate percentages - df['profit_before_tax_%'] = df['profit_before_tax'] / \ - df['profit_before_tax'].sum() + df['profit_before_tax_%'] = df['profit_before_tax'] / df['profit_before_tax'].sum() df['profit_per_employee'] = df['profit_before_tax'] / df['employees'] df = df.drop(columns=['profit_before_tax', 'employees']) @@ -852,16 +832,14 @@ def compute_pretax_profit_and_profit_per_employee( def display_pretax_profit_and_profit_per_employee(df: pd.DataFrame, company: str, year: int) -> go.Figure: # Compute data - df = compute_pretax_profit_and_profit_per_employee( - df=df, company=company, year=year) - + df = compute_pretax_profit_and_profit_per_employee(df=df, company=company, year=year) + # Create DataFrame # df = pd.DataFrame(data) # Replace bool values of Tax haven by string values - df['jur_tax_haven'] = df['jur_tax_haven'].map( - {True: 'Tax haven', False: 'Non tax haven'}) - + df['jur_tax_haven'] = df['jur_tax_haven'].map({True: 'Tax haven', False: 'Non tax haven'}) + # Create figure fig = px.scatter( df, @@ -896,13 +874,13 @@ def display_pretax_profit_and_profit_per_employee(df: pd.DataFrame, company: str plot_bgcolor='white', margin=dict(l=0, r=0, t=0, b=0) ) - + + # Define hover fig.update_traces( - hovertemplate=f"{ - company} reports %{{x:.1%}} of profit and %{{y:.3s}}€ profits per employee in %{{customdata[0]}}" + hovertemplate=f"{company} reports %{{x:.1%}} of profit and %{{y:.3s}}€ profits per employee in %{{customdata[0]}}" ) - + return go.Figure(fig) @@ -936,17 +914,13 @@ def compute_related_and_unrelated_revenues_breakdown( df['total_revenues'] = df['unrelated_revenues'] + df['related_revenues'] # Create a column to check if 'jur_code' is the domestic country - df['domestic'] = df.apply( - lambda row: row['jur_code'] == row['upe_code'], axis='columns') + df['domestic'] = df.apply(lambda row: row['jur_code'] == row['upe_code'], axis='columns') # Compute kpis in a new DataFrame data = pd.DataFrame() - data['tax_haven'] = df.loc[df['jur_tax_haven'] == True, - ['unrelated_revenues', 'related_revenues']].sum() - data['non_tax_haven'] = df.loc[df['jur_tax_haven'] == - False, ['unrelated_revenues', 'related_revenues']].sum() - data['domestic'] = df.loc[df['domestic'] == True, [ - 'unrelated_revenues', 'related_revenues']].sum() + data['tax_haven'] = df.loc[df['jur_tax_haven'] == True, ['unrelated_revenues', 'related_revenues']].sum() + data['non_tax_haven'] = df.loc[df['jur_tax_haven'] == False, ['unrelated_revenues', 'related_revenues']].sum() + data['domestic'] = df.loc[df['domestic'] == True, ['unrelated_revenues', 'related_revenues']].sum() # Replace values with share (%) of 'unrelated/related revenues' data = data.div(data.sum(axis='rows'), axis='columns') @@ -974,8 +948,7 @@ def display_related_and_unrelated_revenues_breakdown(df: pd.DataFrame, company: """ # Compute data - data = compute_related_and_unrelated_revenues_breakdown( - df=df, company=company, year=year) + data = compute_related_and_unrelated_revenues_breakdown(df=df, company=company, year=year) # Create DataFrame df = pd.DataFrame.from_dict(data, orient='index') @@ -1012,8 +985,7 @@ def display_related_and_unrelated_revenues_breakdown(df: pd.DataFrame, company: # Define position of text values for col in ['Unrelated revenues percentage', 'Related revenues percentage']: - values_positions = ['outside' if value <= - 0.05 else 'inside' for value in df[col]] + values_positions = ['outside' if value <= 0.05 else 'inside' for value in df[col]] fig.update_traces( textangle=0, @@ -1049,28 +1021,23 @@ def compute_tax_havens_use_evolution(df: pd.DataFrame, company: str) -> dict: """ # Filter rows with selected company and subset with necessary features - features = ['jur_code', 'year', 'jur_tax_haven', - 'profit_before_tax', 'employees'] + features = ['jur_code', 'year', 'jur_tax_haven', 'profit_before_tax', 'employees'] df = df.loc[(df['mnc'] == company), features] # Keep jurisdictions with profitable or missing revenues - df = df.loc[(df['profit_before_tax'] >= 0) | - (df['profit_before_tax'].isna())] + df = df.loc[(df['profit_before_tax'] >= 0) | (df['profit_before_tax'].isna())] # For all sum calculations below : # - Result NA : all jurisdictions values were NA ; # - Result 0 : at least one jurisdiction was reported as 0. # Calculate total profit and employees by year and tax haven status - df = df.groupby(['year', 'jur_tax_haven'], as_index=False)[ - ['profit_before_tax', 'employees']].sum(min_count=1) + df = df.groupby(['year', 'jur_tax_haven'], as_index=False)[['profit_before_tax', 'employees']].sum(min_count=1) # Calculate total profits and employees for each year for year in df['year'].unique(): - df.loc[df['year'] == year, 'total_profit'] = df.loc[df['year'] - == year, 'profit_before_tax'].sum(min_count=1) - df.loc[df['year'] == year, 'total_employees'] = df.loc[df['year'] - == year, 'employees'].sum(min_count=1) + df.loc[df['year'] == year, 'total_profit'] = df.loc[df['year'] == year, 'profit_before_tax'].sum(min_count=1) + df.loc[df['year'] == year, 'total_employees'] = df.loc[df['year'] == year, 'employees'].sum(min_count=1) # Remove non tax haven jurisdictions df = df.loc[df['jur_tax_haven'] == True].reset_index() @@ -1080,8 +1047,7 @@ def compute_tax_havens_use_evolution(df: pd.DataFrame, company: str) -> dict: df['tax_havens_employees_%'] = df['employees'] / df['total_employees'] # Convert necessary data to dictionnary - data = df[['year', 'tax_havens_profit_%', - 'tax_havens_employees_%']].to_dict() + data = df[['year', 'tax_havens_profit_%', 'tax_havens_employees_%']].to_dict() return data @@ -1110,8 +1076,7 @@ def display_tax_havens_use_evolution(df: pd.DataFrame, company: str): fig = px.bar( df, x='year', - y=['Percentage of profits in tax havens', - 'Percentage of employees in tax havens'], + y=['Percentage of profits in tax havens', 'Percentage of employees in tax havens'], barmode='group', text_auto='.1%' ) @@ -1141,12 +1106,10 @@ def compute_number_of_tracked_mnc_available(df) -> dict: df_unique_mnc = df.drop_duplicates(subset=['year', 'mnc']) # Group the DataFrame by 'mnc' and count the number of reports for each MNC - df_reports_per_mnc = df_unique_mnc.groupby( - 'mnc').size().reset_index(name='report_count') + df_reports_per_mnc = df_unique_mnc.groupby('mnc').size().reset_index(name='report_count') # Convert the DataFrame to a dictionary where MNCs are keys and report counts are values - mnc_report_count = dict( - zip(df_reports_per_mnc['mnc'], df_reports_per_mnc['report_count'], strict=False)) + mnc_report_count = dict(zip(df_reports_per_mnc['mnc'], df_reports_per_mnc['report_count'], strict=False)) return mnc_report_count @@ -1222,8 +1185,7 @@ def compute_geographic_score(df: pd.DataFrame, company: str, year: int) -> float df = df.dropna(axis='columns', how='all') # List financial columns left after deleting columns with only missing values - financial_columns_left = [ - col for col in df.columns if col in financial_columns] + financial_columns_left = [col for col in df.columns if col in financial_columns] # Geographic score = 0 if no financial columns left if not financial_columns_left: @@ -1235,8 +1197,8 @@ def compute_geographic_score(df: pd.DataFrame, company: str, year: int) -> float # Calculate percentage of each financial value where jurisdiction is 'OTHER' # Percentage = 1. Total of 'OTHER' row(s) / 2. Total of all rows other_percentage = ( - df.loc[df['jur_code'] == 'OTHER', financial_columns_left].sum() # 1 - / df[financial_columns_left].sum() # 2 + df.loc[df['jur_code'] == 'OTHER', financial_columns_left].sum() # 1 + / df[financial_columns_left].sum() # 2 ) # Calculate geographic score @@ -1268,8 +1230,7 @@ def compute_completeness_score(df: pd.DataFrame, company: str, year: int) -> flo df = df.dropna(axis='columns', how='all') # List financial columns left after deleting columns with only missing values - financial_columns_left = [ - col for col in df.columns if col in financial_columns] + financial_columns_left = [col for col in df.columns if col in financial_columns] # Completeness score = 0 if no financial columns left if not financial_columns_left: @@ -1314,8 +1275,7 @@ def compute_transparency_score(df: pd.DataFrame, company: str, year: int) -> flo df = df.dropna(axis='columns', how='all') # List financial columns left after deleting columns with only missing values - financial_columns_left = [ - col for col in df.columns if col in financial_columns] + financial_columns_left = [col for col in df.columns if col in financial_columns] # Transparency score = 0 if no financial columns left if not financial_columns_left: @@ -1327,8 +1287,8 @@ def compute_transparency_score(df: pd.DataFrame, company: str, year: int) -> flo # Calculate percentage of each financial value where jurisdiction is not 'OTHER' # Percentage = 1. Total of not 'OTHER' row(s) / 2. Total of all rows not_other_percentage = ( - df.loc[df['jur_code'] != 'OTHER', financial_columns_left].sum() # 1 - / df[financial_columns_left].sum() # 2 + df.loc[df['jur_code'] != 'OTHER', financial_columns_left].sum() # 1 + / df[financial_columns_left].sum() # 2 ) # Calculate transparency score @@ -1358,12 +1318,9 @@ def compute_all_scores(df: pd.DataFrame, company: str) -> dict: # Calculate scores for each year and add them to the dictionary for year in years_list: # Calculate scores - geographic_score = compute_geographic_score( - df=df, company=company, year=year) - completeness_score = compute_completeness_score( - df=df, company=company, year=year) - transparency_score = compute_transparency_score( - df=df, company=company, year=year) + geographic_score = compute_geographic_score(df=df, company=company, year=year) + completeness_score = compute_completeness_score(df=df, company=company, year=year) + transparency_score = compute_transparency_score(df=df, company=company, year=year) data[year] = { 'mnc': company, diff --git a/app/main.py b/app/main.py index b2a5678..9630812 100644 --- a/app/main.py +++ b/app/main.py @@ -19,33 +19,28 @@ MAX_YEAR_OF_REPORTS = 2021 PATH_TO_DATA = f"{cfg.DATA}/data_final_dataviz.csv" -data: pd.DataFrame = None - +data:pd.DataFrame = None def on_init(state: State): # print('MAIN ON_INIT...') # print(f'MAIN STATE {get_state_id(state)}') - + # Init data init_data(state) # Call company on_init on_init_company(state) # Call company on_init - on_init_home(state) - - # print('MAIN ON_INIT...END') - -# Performance optimization - - + on_init_home(state) + + # print('MAIN ON_INIT...END') + +# Performance optimization def init_data(state: State): - df = pd.read_csv(f"{PATH_TO_DATA}", sep=",", - low_memory=False, encoding='utf-8') + df = pd.read_csv(f"{PATH_TO_DATA}", sep=",", low_memory=False, encoding='utf-8') # Filter dataset with the maximum year to take in account df = df.loc[df["year"] <= MAX_YEAR_OF_REPORTS].reset_index() - state.data = df - - + state.data = df + # Add pages pages = { "/": root, @@ -58,8 +53,6 @@ def init_data(state: State): } # Functions used to navigate between pages - - def goto_home(state): navigate(state, "Home") @@ -99,9 +92,11 @@ def goto_download(state): "font_family": "Manrope" } -if __name__ == '__main__': - tp.Core().run() - web_app = gui_multi_pages.run( + +if __name__ == "__main__": + ## DEV + # Start the local flask server + gui_multi_pages.run( dark_mode=False, stylekit=stylekit, title=f"{APP_TITLE}", @@ -110,7 +105,7 @@ def goto_download(state): watermark="LOCAL DEVELOPMENT", ) else: - # PRODUCTION + ## PRODUCTION # Start the app used by uwsgi server web_app = gui_multi_pages.run( dark_mode=False, @@ -121,7 +116,9 @@ def goto_download(state): debug=False, # Remove watermark "Taipy inside" watermark="", - # IMPORTANT: Set the async_mode to gevent_uwsgi to use uwsgi + # IMPORTANT: Set the async_mode to gevent_uwsgi to use uwsgi # See https://python-socketio.readthedocs.io/en/latest/server.html#uwsgi async_mode='gevent_uwsgi' ) + + diff --git a/app/pages/company/company.py b/app/pages/company/company.py index d05c2bc..294fdd5 100644 --- a/app/pages/company/company.py +++ b/app/pages/company/company.py @@ -1,5 +1,4 @@ -from taipy.gui import State, Markdown, get_state_id - +from taipy.gui import State, Markdown, get_state_id import numpy as np import pandas as pd from app import algo @@ -12,37 +11,37 @@ # Init bindings (used in md file) selected_company = DEFAULT_COMPANY -selector_company: list[str] = [] -selected_year: str = None -selector_year: list[str] = [] -company_sector: str = None -company_upe_name: str = "" +selector_company:list[str] = [] +selected_year:str = None +selector_year:list[str] = [] +company_sector:str = None +company_upe_name:str = "" -df_selected_company: pd.DataFrame = None -df_count_company: pd.DataFrame = None +df_selected_company:pd.DataFrame = None +df_count_company:pd.DataFrame = None # Viz store map[viz_id,viz_dict] # Important for taipy bindings # Use Viz.init on each page with set of viz_id -viz: dict[str, dict] = Viz.init( +viz:dict[str,dict] = Viz.init( ( "company_sector", "company_upe_name", "company_nb_reports", "company_transparency_score", - + "fin_transparency_score", "fin_transparency_score_over_time_details", - + "fin_key_financials_kpis", "fin_jurisdictions_top_revenue", - + "fin_pretax_profit_and_employees_rank", - "fin_pretax_profit_and_profit_per_employee", + "fin_pretax_profit_and_profit_per_employee", ) -) - +) + # Initialize state (Taipy callback function) # Called by main.py/on_init def on_init(state: State): @@ -51,187 +50,169 @@ def on_init(state: State): init_state(state) - # print('COMPANY ON INIT...END') + # print('COMPANY ON INIT...END') def init_state(state: State): with state as s: # Path to image - s.company_image_path = company_image_path - + s.company_image_path = company_image_path + s.selected_company = selected_company # print(f'company state selected_company:{s.selected_company}') - + # Performance: Done once in main.py # s.data = data # print(f'company state data:{s.data.head()}') - - # List companies to populate selector + + # List companies to populate selector s.selector_company = list(np.sort(s.data["mnc"].astype(str).unique())) # print(f'company state selector_company:{s.selector_company}') - + s.viz = viz - + update_full(state) - def update_full(state: State): with state as s: - update_state(s) - update_viz(s) - - + update_state(s) + update_viz(s) + def update_state(state: State): # Create a filtered DataFrame with selected company - state.df_selected_company = state.data[state.data["mnc"] - == state.selected_company] + state.df_selected_company = state.data[state.data["mnc"] == state.selected_company] # print(f'company state df_selected_company:{state.df_selected_company.head()}') - + # List years to populate selector and initialise selected year - state.selector_year = state.df_selected_company["year"].unique().astype( - str).tolist() + state.selector_year = state.df_selected_company["year"].unique().astype(str).tolist() # print(f'company state selector_year:{state.selector_year}') state.selected_year = max(state.selector_year) # print(f'company state selected_year:{state.selected_year}') # Calculate number of reports for all companies - state.df_count_company = algo.number_of_tracked_reports_over_time_company( - state.df_selected_company) + state.df_count_company = algo.number_of_tracked_reports_over_time_company(state.df_selected_company) # print(f'company state df_count_company:{state.df_count_company.head()}') # Calculate sector, upe_code for selected company - state.company_sector = list( - state.df_selected_company["sector"].unique())[0] + state.company_sector = list(state.df_selected_company["sector"].unique())[0] # print(f'company state company_sector:{state.company_sector}') - state.company_upe_name = state.df_selected_company["upe_name"].unique()[0] + state.company_upe_name = state.df_selected_company["upe_name"].unique()[0] # print(f'company state company_upe_name:{state.company_upe_name}') - + def update_viz(state: State): update_viz_company(state) update_viz_year(state) - - + + def update_viz_company(state: State): # print(f'update viz company : {state.selected_company}') - - id = "company_sector" - state.viz[id] = Viz(id=id, - state=state, - data=state.company_sector, - title="Sector" - ).to_state() + + id ="company_sector" + state.viz[id] = Viz(id=id, + state=state, + data=state.company_sector, + title="Sector" + ).to_state() # print(f'update viz id:{id} title:{state.viz[id].title}') - - id = "company_upe_name" - state.viz[id] = Viz(id=id, - state=state, - data=state.company_upe_name, + + id ="company_upe_name" + state.viz[id] = Viz(id=id, + state=state, + data=state.company_upe_name, title="Headquarter" - ).to_state() + ).to_state() # print(f'update viz id:{id} title:{state.viz[id].title}') - - id = "company_nb_reports" + + id="company_nb_reports" state.viz[id] = Viz(id=id, - state=state, - data=algo.number_of_tracked_reports_company( - state.df_selected_company), - title="Number of reports" - ).to_state() + state=state, + data = algo.number_of_tracked_reports_company(state.df_selected_company), + title = "Number of reports" + ).to_state() # print(f'update viz id:{id} title:{state.viz[id].title}') - id = "company_transparency_score" + id="company_transparency_score" state.viz[id] = Viz(id=id, state=state, - data=algo.display_transparency_score( - state.data, state.selected_company), + data=algo.display_transparency_score(state.data, state.selected_company), title="Transparency Score", sub_title="average over all reports" - ).to_state() + ).to_state() # print(f'update viz id:{id} title:{state.viz[id].title}') - - # Ex Viz 6 + + # Ex Viz 6 # id="company_transparency_more" - # state.viz[id] = Viz(id=id, - # state=state, + # state.viz[id] = Viz(id=id, + # state=state, # data=state.df_selected_company, # title="More on transparency", # sub_title="" # ).to_state() # # print(f'update viz id:{id} title:{state.viz[id].title}') - - + + def update_viz_year(state: State): # # same order as previous code # print(f'update viz financial : {state.selected_company}') - + # Transparency - id = "fin_transparency_score" - state.viz[id] = Viz(id=id, - state=state, - data=algo.display_transparency_score( - state.data, state.selected_company, int(state.selected_year)), + id ="fin_transparency_score" + state.viz[id] = Viz(id=id, + state=state, + data=algo.display_transparency_score(state.data, state.selected_company, int(state.selected_year)), title="Transparency Score", - sub_title=f"selected fiscal year : { - state.selected_year}" - ).to_state() + sub_title=f"selected fiscal year : {state.selected_year}" + ).to_state() # print(f'update viz id:{id} title:{state.viz[id].title}') - - id = "fin_transparency_score_over_time_details" + + id ="fin_transparency_score_over_time_details" state.viz[id] = Viz(id=id, - state=state, - data=algo.display_transparency_score_over_time_details( - state.data, state.selected_company), + state=state, + data=algo.display_transparency_score_over_time_details(state.data, state.selected_company), title="Transparency score over time ", - ).to_state() + ).to_state() # print(f'update viz id:{id} title:{state.viz[id].title}') - + # Profile - id = "fin_key_financials_kpis" + id ="fin_key_financials_kpis" state.viz[id] = Viz(id=id, state=state, - data=algo.display_company_key_financials_kpis( - state.data, state.selected_company, int(state.selected_year)), + data=algo.display_company_key_financials_kpis(state.data, state.selected_company, int(state.selected_year)), title="Key metrics", - sub_title=f"selected fiscal year : { - state.selected_year}" - ).to_state() + sub_title=f"selected fiscal year : {state.selected_year}" + ).to_state() # print(f'update viz id:{id} title:{state.viz[id].title}') - + id = "fin_jurisdictions_top_revenue" state.viz[id] = Viz(id=id, state=state, - fig=algo.display_jurisdictions_top_revenue( - state.data, state.selected_company, int(state.selected_year)), + fig=algo.display_jurisdictions_top_revenue(state.data, state.selected_company, int(state.selected_year)), title="Distribution of revenues across countries", - sub_title=f"selected fiscal year : { - state.selected_year}" - ).to_state() - # print(f'update viz id:{id} title:{state.viz[id].title}') + sub_title=f"selected fiscal year : {state.selected_year}" + ).to_state() + # print(f'update viz id:{id} title:{state.viz[id].title}') # Distribution id = "fin_pretax_profit_and_employees_rank" state.viz[id] = Viz(id=id, state=state, - fig=algo.display_pretax_profit_and_employees_rank( - state.data, state.selected_company, int(state.selected_year)), + fig=algo.display_pretax_profit_and_employees_rank(state.data, state.selected_company, int(state.selected_year)), title="% profit and employees by country", - sub_title=f"selected fiscal year : { - state.selected_year}" - ).to_state() + sub_title=f"selected fiscal year : {state.selected_year}" + ).to_state() # print(f'update viz id:{id} title:{state.viz[id].title}') - + + id = "fin_pretax_profit_and_profit_per_employee" state.viz[id] = Viz(id=id, state=state, - fig=algo.display_pretax_profit_and_profit_per_employee( - state.data, state.selected_company, int(state.selected_year)), + fig=algo.display_pretax_profit_and_profit_per_employee(state.data, state.selected_company, int(state.selected_year)), title="% profit and profit / employee by country", - sub_title=f"selected fiscal year : { - state.selected_year}" - ).to_state() + sub_title=f"selected fiscal year : {state.selected_year}" + ).to_state() # print(f'update viz id:{id} title:{state.viz[id].title}') - + # Ex Viz 17 # id = "fin_profit_and_employee_breakdown" # state.viz[id] = Viz(id=id, @@ -240,20 +221,21 @@ def update_viz_year(state: State): # title="% profits, % employees and profit / employee", # sub_title="domestic vs. havens vs. non havens, selected fiscal year" # ).to_state() - # print(f'update viz id:{id} title:{state.viz[id].title}') - + ## # print(f'update viz id:{id} title:{state.viz[id].title}') + # Ex Viz 18 # id = "fin_related_and_unrelated_revenues_breakdown" - # algo_data, algo_fig = algo.display_related_and_unrelated_revenues_breakdown(state.data, state.selected_company, int(state.selected_year)) + # algo_data, algo_fig = algo.display_related_and_unrelated_revenues_breakdown(state.data, state.selected_company, int(state.selected_year)) # state.viz[id] = Viz(id=id, # state=state, # data=algo_data, # fig=algo_fig, # title= "Breakdown of revenue between unrelated and related revenue", - # sub_title=f"domestic vs. havens vs. non havens, selected fiscal year: {state.selected_year}", + # sub_title=f"domestic vs. havens vs. non havens, selected fiscal year: {state.selected_year}", # ).to_state() - # print(f'update viz id:{id} title:{state.viz[id].title}') + ## # print(f'update viz id:{id} title:{state.viz[id].title}') + # Ex Viz 19 # id = "fin_tax_haven_used_by_company" # _, algo_data = algo.tax_haven_used_by_company(state.df_selected_company) @@ -261,11 +243,11 @@ def update_viz_year(state: State): # state=state, # data=algo_data, # title="Profits, employees and revenue breakdown by tax haven", - # sub_title=f"selected fiscal year : {state.selected_year}", + # sub_title=f"selected fiscal year : {state.selected_year}", # ).to_state() - # print(f'update viz id:{id} title:{state.viz[id].title}') - - # Ex Viz 21 + ## # print(f'update viz id:{id} title:{state.viz[id].title}') + + # Ex Viz 21 # id = "fin_tax_havens_use_evolution" # state.viz[id] = Viz(id=id, # state=state, @@ -273,19 +255,17 @@ def update_viz_year(state: State): # title="Percentage of profits, percentage of employees and profit per employees over time ", # sub_title=f"domestic vs. havens vs. non havens, selected fiscal year: {state.selected_year}", # ).to_state() - # print(f'update viz id:{id} title:{state.viz[id].title}') - + ## # print(f'update viz id:{id} title:{state.viz[id].title}') + # Update data and figures when the selected company changes - - def on_change_company(state: State): - # print("Chosen company: ", state.selected_company) + # print("Chosen company: ", state.selected_company) update_full(state) # Update data and figures when the selected year changes def on_change_year(state: State): - # print("Chosen year: ", state.selected_year) + # print("Chosen year: ", state.selected_year) update_viz_year(state)