From 4d429d16723de5d0401fa1cf951540969ad9b9a5 Mon Sep 17 00:00:00 2001 From: 7h3Rabbit <62792609+7h3Rabbit@users.noreply.github.com> Date: Wed, 8 Jan 2025 18:26:26 +0100 Subject: [PATCH] Added support for all webperf.se categories as -i sources use for example: python default.py -i help.webprf to see all available categories. --- engines/webperf.py | 70 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/engines/webperf.py b/engines/webperf.py index 76530daf..e0a368d5 100644 --- a/engines/webperf.py +++ b/engines/webperf.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import json import re from engines.utils import use_item from tests.utils import get_http_content @@ -24,22 +25,63 @@ def read_sites(input_url, input_skip, input_take): list: The list of sites read from the specified category on https://webperf.se. """ sites = [] - - if 'offentlig-sektor' in input_url: - input_url = 'https://webperf.se/category/ovrig-offentlig-sektor/' - elif 'kommuner' in input_url: - input_url = 'https://webperf.se/category/kommuner/' - elif 'regioner' in input_url: - input_url = 'https://webperf.se/category/regioner/' - elif 'toplist' in input_url: - input_url = 'https://webperf.se/toplist/' - elif 'digitalt' in input_url: - input_url = 'https://webperf.se/category/digitalt-sverige/' - elif 'webbyraer' in input_url: - input_url = 'https://webperf.se/category/webbyraer/' + all_categories_url = 'https://webperf.se/sites/' + categories_fallback = { + 'offentlig-sektor': '/category/ovrig-offentlig-sektor/', + 'kommuner': '/category/kommuner/', + 'regioner': '/category/regioner/', + 'toplist': '/toplist/', + 'digitalt': '/category/digitalt-sverige/', + 'webbyraer': '/category/webbyraer/' + } + + all_categories_content = get_http_content(all_categories_url) + if all_categories_content != '': + categories = {} + categories_regex = r"