diff --git a/.gitignore b/.gitignore index 53b16ae8d7..46a50cbef7 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ whotracksme.egg-info/ .DS_Store venv/ whotracksme/data/assets/**/*.csv +whotracksme.db diff --git a/.tool-versions b/.tool-versions index cb51a31255..b4736d5dc2 100644 --- a/.tool-versions +++ b/.tool-versions @@ -1 +1 @@ -python 3.10.6 +python 3.11.6 diff --git a/Dockerfile b/Dockerfile index f3f6ce2062..9674af52d0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Set base image to build upon -FROM python:3.10-slim +FROM python:3.11-slim # Set arg and env ARG VERSION diff --git a/README.md b/README.md index 6103a071c1..2e22e69961 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ Python 3.11 is needed to build the site. We recommend creating a [virtualenv](http://docs.python-guide.org/en/latest/dev/virtualenvs/) to install the dependencies, or use `pipenv` or ) -to +to ```sh python -m venv venv @@ -54,6 +54,13 @@ python -m venv venv After the initial setup, you can proceed with installing whotracks.me. +For nushell: + +```nushell +python -m virtualenv venv +overlay use venv/bin/activate.nu +``` + ## With Pip ```sh diff --git a/deploy_to_s3.py b/deploy_to_s3.py index 5df6232a5c..64761b752b 100644 --- a/deploy_to_s3.py +++ b/deploy_to_s3.py @@ -142,6 +142,7 @@ def upload_file_to_s3(path, filename): f'{site_dir}/websites/', f'{site_dir}/companies/', f'{site_dir}/data/', + f'{site_dir}/api/', ] for directory in directories_to_skip: if local_path.startswith(directory): diff --git a/requirements.txt b/requirements.txt index c618c33707..2304eef095 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ requests==2.31.0 six==1.15.0 urllib3==1.26.18 wheel==0.38.1 +simplejson==3.19.2 \ No newline at end of file diff --git a/setup.py b/setup.py index 37c7c9cfe3..0725b599ee 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ # This allows us to inject a custom version into setuptools using the # environment variable: WTM_VERSION. If it is not specified, a default value is # used, but you will not be able to run sdist in this case. -DEFAULT_VERSION = "dev" +DEFAULT_VERSION = "0.dev" VERSION = os.environ.get("WTM_VERSION", DEFAULT_VERSION) diff --git a/whotracksme/website/build/companies.py b/whotracksme/website/build/companies.py index ff7e0ff117..c26206b61d 100644 --- a/whotracksme/website/build/companies.py +++ b/whotracksme/website/build/companies.py @@ -2,7 +2,7 @@ from collections import defaultdict from jinja2 import Markup -from whotracksme.website.utils import print_progress +from whotracksme.website.utils import print_progress, write_json from whotracksme.website.templates import ( get_template, render_template, @@ -96,6 +96,10 @@ def company_page(template, company_data, data): company_id = company_data['overview']['id'] company_name = get_company_name(company_data) + write_json(f'_site/api/v2/organizations/{data.url_for("company", company_id)}.json', + demographics=company_data, + initials=company_name[:2] + ) with open(f'_site/{data.url_for("company", company_id)}', 'w') as output: output.write(render_template( path_to_root='..', @@ -118,7 +122,10 @@ def build_company_reach_chart_page(data): top100 = company_reach(data.companies, n=100) chart = Markup(overview_bars(top100, highlight=10, height=3000)) template = get_template(data, name='reach-chart-page.html', path_to_root='..') - + write_json('_site/api/v2/organizations.json', + top100=top100, + organizations=data.companies.sort_by('name', descending=False) + ) with open('_site/companies/reach-chart.html', 'w') as output: output.write(render_template( path_to_root='..', diff --git a/whotracksme/website/build/home.py b/whotracksme/website/build/home.py index 9b3fa2521f..75959d21d5 100644 --- a/whotracksme/website/build/home.py +++ b/whotracksme/website/build/home.py @@ -2,7 +2,7 @@ from whotracksme.website.plotting.companies import overview_bars from whotracksme.website.build.companies import company_reach -from whotracksme.website.utils import print_progress +from whotracksme.website.utils import print_progress, write_json from whotracksme.website.templates import get_template, render_template from whotracksme.website.build.blog import load_blog_posts @@ -13,16 +13,33 @@ def build_home(data): posts = load_blog_posts()[:3] + tracker_list = data.trackers.sort_by(metric="reach")[:20] + trackers_list_company = data.trackers.sort_by(metric="company_id")[:20] + most_tracked_sites = data.sites.sort_by(metric='trackers')[:20] + least_tracked_sites = data.sites.sort_by(metric='trackers', descending=False)[:20] + websites = data.sites.summary_stats() + tracker_stats = data.trackers.summary_stats() + + write_json('_site/api/v2/index.json', + tracker_list=tracker_list, + trackers_list_company=trackers_list_company, + most_tracked_sites=most_tracked_sites, + least_tracked_sites=least_tracked_sites, + websites=websites, + tracker_stats=tracker_stats, + top10=top10, + ) + with open('_site/index.html', 'w') as output: output.write(render_template( template=get_template(data, "index.html"), ts=header_graph, - tracker_list=data.trackers.sort_by(metric="reach")[:20], - trackers_list_company=data.trackers.sort_by(metric="company_id")[:20], - most_tracked_sites=data.sites.sort_by(metric='trackers')[:20], - least_tracked_sites=data.sites.sort_by(metric='trackers', descending=False)[:20], - websites=data.sites.summary_stats(), - tracker_stats=data.trackers.summary_stats(), + tracker_list=tracker_list, + trackers_list_company=trackers_list_company, + most_tracked_sites=most_tracked_sites, + least_tracked_sites=least_tracked_sites, + websites=websites, + tracker_stats=tracker_stats, top10=top10, posts=posts )) diff --git a/whotracksme/website/build/trackers.py b/whotracksme/website/build/trackers.py index 945c52c47b..9a28aa2f47 100644 --- a/whotracksme/website/build/trackers.py +++ b/whotracksme/website/build/trackers.py @@ -2,7 +2,7 @@ from jinja2 import Markup from whotracksme.data.loader import DataSource -from whotracksme.website.utils import print_progress +from whotracksme.website.utils import print_progress, write_json, without_keys from whotracksme.website.templates import ( get_template, render_template, @@ -71,17 +71,24 @@ def site_summary(row): def build_trackers_list(data): + tracker_list = data.trackers.sort_by(metric="reach") + tracker_list_company = data.trackers.sort_by( + metric="company_id", + descending=False + ) + header_stats = data.trackers.summary_stats() with open('_site/trackers.html', 'w') as output: output.write(render_template( template=get_template(data, name="trackers.html"), - tracker_list=data.trackers.sort_by(metric="reach"), - trackers_list_company=data.trackers.sort_by( - metric="company_id", - descending=False - ), - header_stats=data.trackers.summary_stats() + tracker_list=tracker_list, + trackers_list_company=tracker_list_company, + header_stats=header_stats )) - + write_json('_site/api/v2/trackers.json', + tracker_list=tracker_list, + trackers_list_company=tracker_list_company, + header_stats=header_stats + ) print_progress(text="Generate tracker list") @@ -143,4 +150,5 @@ def build_tracker_page_batch(batch): page_data = tracker_page_data(tracker_id, data.trackers.get_datapoint(tracker_id), data) + write_json(f'_site/api/v2/trackers/{tracker_id}.json', **without_keys(page_data, "app")) tracker_page(template, page_data) diff --git a/whotracksme/website/build/websites.py b/whotracksme/website/build/websites.py index c83fe41f4b..56d7f9ed38 100644 --- a/whotracksme/website/build/websites.py +++ b/whotracksme/website/build/websites.py @@ -6,7 +6,7 @@ from jinja2 import Markup from whotracksme.data.loader import DataSource -from whotracksme.website.utils import print_progress +from whotracksme.website.utils import print_progress, write_json from whotracksme.website.build.companies import ( tracker_map_data, website_doughnout, @@ -25,6 +25,12 @@ def build_website_list(data): sorted_websites = data.sites.sort_by(metric='popularity', descending=True) sorted_websites_cat = data.sites.sort_by(metric='category', descending=True) + write_json('_site/api/v2/websites.json', + website_list=sorted_websites, + website_list_cat=sorted_websites_cat, + header_numbers=header_numbers + ) + with open('_site/websites.html', 'w') as output: output.write(render_template( template=get_template(data, "websites.html"), @@ -61,6 +67,22 @@ def website_page(template, site, rank, data): # apps per site data tracker_table = list(data.sites.get_tracker_list(site_id)) + write_json('_site/api/v2/websites/{}.json'.format(site.site), + site={ + 'overview': site._asdict() + }, + profile=profile, + methods=methods, + sankey_data=sankey_data, + doughnout={ + "d_values": d_values, + "d_labels": d_labels, + "d_total": d_total, + }, + tracker_categories=d_labels, + tracker_list=tracker_table, + ) + with open('_site/websites/{}.html'.format(site.site), 'w') as output: output.write(render_template( path_to_root='..', diff --git a/whotracksme/website/utils.py b/whotracksme/website/utils.py index de12e9c1dc..cc45aa7673 100644 --- a/whotracksme/website/utils.py +++ b/whotracksme/website/utils.py @@ -1,5 +1,20 @@ +import datetime +import simplejson +import pathlib +import os -def print_progress(text, default_space=40): - print("{} {:{}} done".format(text, "." * (default_space - len(text)), default_space - len(text))) +def write_json(path, **data): + def myconverter(o): + if isinstance(o, datetime.datetime): + return o.isoformat() + + pathlib.Path(os.path.dirname(path)).mkdir(parents=True, exist_ok=True) + json = simplejson.dumps(data, indent=2, default = myconverter) + with open(path, "w") as file: + file.write(json) +def without_keys(d, keys): + return {k: d[k] for k in d.keys() - keys} +def print_progress(text, default_space=40): + print("{} {:{}} done".format(text, "." * (default_space - len(text)), default_space - len(text)))