Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ta/fix wordcloud #58

Merged
merged 10 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 90 additions & 16 deletions app/algo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@
are needed in Taxplorer tool. Below functions will be used in different pages of the website.
"""

import pandas as pd
import random

import humanize
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import humanize
from wordcloud import WordCloud, get_single_color_func
from PIL import Image, ImageColor, ImageDraw, ImageFont

from app import config as cfg

# Define custom template
custom_template = {
Expand Down Expand Up @@ -925,42 +928,113 @@ def tax_havens_use_evolution(df: pd.DataFrame, company: str) -> go.Figure:


# Viz 24 : mnc tracked
def mnc_tracked(df: pd.DataFrame) -> go.Figure:
def mnc_tracked(
df: pd.DataFrame,
image_width: int = 1200,
image_height: int = 1000,
margin: int = 10,
min_font_size: int = 10,
) -> go.Figure:
"""Compute and plot the list of company name in a word cloud where the size of the font depends of the number
of reports available.

Args:
df (pd.DataFrame): CbCRs database.
image_width (int, optional): Image width in pixel. Defaults to 900.
image_height (int, optional): Image height in pixel. Defaults to 600.
margin (int, optional): Margin around words in pixel. Defaults to 10.
min_font_size (int, optional): Minimum fontsize. Defaults to 10.

Returns:
go.Figure: word cloud with company name in a Plotly figure.
"""

# Create dictionnary with company name as key and the number of reports as value
data = df.groupby("mnc")["year"].nunique().to_dict()

color_func = get_single_color_func("#B8BEDB")
# List of colors in hexadecimal format
font_colors = ["#B8BEDB", "#8087A8", "#080F33", "#181F42", "#424A75"]

# Generate the word cloud using the report counts as weights
wordcloud = WordCloud(
width=1200, height=800, background_color="white", color_func=color_func
).generate_from_frequencies(data)
# Create dictionnary with company name as key and the number of reports as value
data = df.groupby("mnc")["year"].nunique().sort_values(ascending=False).to_dict()

# Create a blank image
image = Image.new("RGB", (image_width, image_height), (255, 255, 255))
draw = ImageDraw.Draw(image)

# Load a default scalable font
font_path = f"{cfg.FONTS}/roboto/Roboto-Regular.ttf"

# Calculate max font size based on the most frequent word
most_freq_word = max(data, key=data.get)
max_width = image_width * 0.8

# Function to find the max font size for a word
max_font_size = min_font_size
font = ImageFont.truetype(font_path, max_font_size)
while draw.textlength(most_freq_word, font=font) < max_width:
max_font_size += 2
font = ImageFont.truetype(font_path, max_font_size)

# Calculate and store in a dictionnary text size and bounding boxe for each word
word_sizes = {}
max_frequency = max(data.values())
for word, frequency in data.items():
word_font_size = max(int(max_font_size * (frequency / max_frequency)), min_font_size)
font = ImageFont.truetype(font_path, word_font_size)
left, top, right, bottom = font.getbbox(word)
word_sizes[word] = (word_font_size, (right - left), (bottom - top))

# Place words on the image
# Initiliaze a list to store all words' boundary boxes
bboxes = []
for word in data.keys():
font_size, word_width, word_height = word_sizes[word]
font = ImageFont.truetype(font_path, font_size)

word_placed = False
tries = 0
while not word_placed and tries < 100:
# Pick a random x and y position to place the word
x = random.randint(margin, image_width - word_width - margin)
y = random.randint(margin, image_height - word_height - margin)

# Calculate word's boundary box
bbox = [x, y, x + word_width + margin, y + word_height + margin]

# Check if the word overlaps with already placed words
overlap = False
for existing_bbox in bboxes:
if (
bbox[2] > existing_bbox[0]
and bbox[0] < existing_bbox[2]
and bbox[3] > existing_bbox[1]
and bbox[1] < existing_bbox[3]
):
overlap = True
break

# Add word to the image
if not overlap:
bboxes.append(bbox)
word_color = ImageColor.getrgb(random.choice(font_colors))
draw.text((x, y), word, font=font, fill=word_color)
word_placed = True

tries += 1

# Display the word cloud
fig = px.imshow(wordcloud)
fig = px.imshow(image)

# Remove hover on image
fig.update_traces(hoverinfo="skip", hovertemplate="")

# Remove colorbar
fig.update_layout(coloraxis_showscale=False)

# Remove axis
# Update axis layouts
fig.update_xaxes(showticklabels=False)
fig.update_yaxes(showticklabels=False)

# Remove margins
fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
# Update layout setting
fig.update_layout(template=custom_template, height=400)

return go.Figure(fig)

Expand Down
24 changes: 18 additions & 6 deletions app/css/style.css → app/assets/css/style.css
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
@charset "UTF-8";
@import url("viz.css");
@import url('https://fonts.googleapis.com/css2?family=Manrope:[email protected]&display=swap');
@import url('https://fonts.googleapis.com/css2?family=Roboto:ital,wght@0,100;0,300;0,400;0,500;0,700;0,900;1,100;1,300;1,400;1,500;1,700;1,900&display=swap');

@font-face {
font-family: Manrope;
src: url("../fonts/manrope/Manrope-VariableFont_wght.ttf") format("truetype");
}

@font-face {
font-family: Roboto;
src: url("../fonts/roboto/Roboto-Regular.ttf") format("truetype");
}

.viz-container {
display: flex;
Expand Down Expand Up @@ -272,11 +280,15 @@ color: rgba(0,0,0,0.0)
}

.world-bg {
background-image: url("../../images/world_map.png");
background-image: url("../../assets/images/world_map.jpg");
}

.company-image {
border-radius: 22px;
}

.footer-bg {
background-color: #B8BEDB
background-color: #B8BEDB;
}

/* Taipy classes */
Expand All @@ -285,7 +297,7 @@ color: rgba(0,0,0,0.0)
margin: 0 auto;
background-color: white !important;
box-shadow: none;
-webkit-text-fill-color: initial !important
-webkit-text-fill-color: initial !important;
}

.container {
Expand Down Expand Up @@ -610,7 +622,7 @@ hr.header-hr35 {
}

.world {
background-image: url("../../images/world_map.png");
background-image: url("../../images/world_map.jpg");
background-size: cover;
}

Expand Down
File renamed without changes.
Binary file not shown.
Binary file added app/assets/fonts/roboto/Roboto-Regular.ttf
Binary file not shown.
File renamed without changes
Binary file added app/assets/images/company-image.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
Binary file added app/assets/images/world_map.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# AUTO
DATA = f"{APP}data"
IMAGES = f"{APP}images"
IMAGES = f"{APP}assets/images"
FONTS = f"{APP}assets/fonts"
PAGES = f"{APP}pages"
# DO NOT USE: css path in main is relative to main.py location
# CSS_PATH = f"{APP}css"
Binary file removed app/images/Arrow 1.png
Binary file not shown.
Binary file removed app/images/Arrow_ 1 .png
Binary file not shown.
Binary file removed app/images/bahamas.png
Binary file not shown.
Binary file removed app/images/logo.png
Binary file not shown.
Binary file removed app/images/pexels-ingo-joseph-1880351.png
Binary file not shown.
Binary file removed app/images/viz.png
Binary file not shown.
Binary file removed app/images/world_map.png
Binary file not shown.
4 changes: 2 additions & 2 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# Global variables
# APP
APP_TITLE = "Taxplorer"
FAVICON = "images/taxplorer-logo.svg"
FAVICON = "assets/images/taxplorer-logo.svg"
# DATA
MAX_YEAR_OF_REPORTS = 2021
PATH_TO_DATA = f"{cfg.DATA}/data_final_dataviz.csv"
Expand Down Expand Up @@ -83,7 +83,7 @@ def goto_download(state):


# Initialise Gui with pages and style sheet
gui_multi_pages = Gui(pages=pages, css_file="css/style.css")
gui_multi_pages = Gui(pages=pages, css_file="assets/css/style.css")

# Customize the Stylekit
stylekit = {
Expand Down
2 changes: 1 addition & 1 deletion app/pages/company/company.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ We might have missed out in its report.<br/>
{ .text-blue .text-weight300 .cpt12}
|>

<|{company_image_path}|image|width=100%|>
<|{company_image_path}|image|width=100%|class_name=company-image|>

|>

Expand Down
2 changes: 1 addition & 1 deletion app/pages/company/company.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from app import config as cfg
from app.viz import Viz

company_image_path = f"{cfg.IMAGES}/pexels-ingo-joseph-1880351.png"
company_image_path = f"{cfg.IMAGES}/company-image.jpg"

DEFAULT_COMPANY = "SHELL"

Expand Down
2 changes: 1 addition & 1 deletion app/pages/home/home.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# from app.data.data import data

# Path to images
world_map_path = f"{cfg.IMAGES}/world_map.png"
world_map_path = f"{cfg.IMAGES}/world_map.jpg"
download_icon_path = f"{cfg.IMAGES}/Vector.svg"


Expand Down
2 changes: 1 addition & 1 deletion app/pages/methodology/methodology.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ the highest). The transparency score calculation follows the general formula :
[//]: # ($$\text{Transparency Score} = \sum_{i=1}^{n} w_i \times \frac{\sum_{j\in J_i} | x_{ij} |}{\sum_{j} | x_{ij} |} \times 100\$$)

<|part|class_name=text-center|
<img class="test" src="./images/transparency-score-equation.svg" height="40px"/>
<img class="test" src="./assets/images/transparency-score-equation.svg" height="40px"/>
|>
<br/><br/>

Expand Down
3 changes: 0 additions & 3 deletions app/pages/methodology/methodology.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,5 @@

from app import config as cfg

# Path to equation image
equation = f"{cfg.IMAGES}/transparency-score-equation.svg"

# Generate page from Markdown file
methodology_md = Markdown(f"{cfg.PAGES}/methodology/methodology.md")
16 changes: 8 additions & 8 deletions app/pages/root.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Taxplorer

<|part|class_name=text-center|

<img class="cpb14" src="./images/data4good-logo.svg" height="130px"/>
<img class="cpb14" src="./assets/images/data4good-logo.svg" height="130px"/>

DATA FOR GOOD
{: .h4 .text-blue .text-footer .cpb6 }
Expand All @@ -55,7 +55,7 @@ Website
{: .text-left }

<a class="text-center" href="https://dataforgood.fr/" target="_blank">
<img src="./images/website-logo.svg"/>
<img src="./assets/images/website-logo.svg"/>
</a>
|>

Expand All @@ -64,7 +64,7 @@ Twitter
{: .text-left }

<a class="text-center" href="https://twitter.com/dataforgood_fr" target="_blank">
<img src="./images/twitter-logo.svg"/>
<img src="./assets/images/twitter-logo.svg"/>
</a>
|>

Expand All @@ -73,7 +73,7 @@ LinkedIn
{: .text-left }

<a class="text-center" href="https://www.linkedin.com/company/dataforgood" target="_blank">
<img src="./images/linkedin-logo.svg"/>
<img src="./assets/images/linkedin-logo.svg"/>
</a>
|>

Expand All @@ -87,7 +87,7 @@ LinkedIn

<|part|class_name=text-center|

<img class="cpb14" src="./images/eutax-logo.svg" height="130px"/>
<img class="cpb14" src="./assets/images/eutax-logo.svg" height="130px"/>

EU TAX OBSERVATORY
{: .h4 .text-blue .text-footer .cpb6 }
Expand All @@ -97,7 +97,7 @@ Website
{: .text-left }

<a class="text-center" href="https://www.taxobservatory.eu/" target="_blank">
<img src="./images/website-logo.svg"/>
<img src="./assets/images/website-logo.svg"/>
</a>
|>

Expand All @@ -106,7 +106,7 @@ Twitter
{: .text-left }

<a class="text-center" href="https://twitter.com/taxobservatory" target="_blank">
<img src="./images/twitter-logo.svg"/>
<img src="./assets/images/twitter-logo.svg"/>
</a>
|>

Expand All @@ -115,7 +115,7 @@ LinkedIn
{: .text-left }

<a class="text-center" href="https://www.linkedin.com/company/70917369/" target="_blank">
<img src="./images/linkedin-logo.svg"/>
<img src="./assets/images/linkedin-logo.svg"/>
</a>
|>

Expand Down
Loading
Loading