From 41f42a4ef74871b10e55466c64979ed220324b8e Mon Sep 17 00:00:00 2001 From: Jun-Fei Cherng Date: Wed, 17 Apr 2024 12:59:06 +0800 Subject: [PATCH] refactor: scripts/extract_class_names.py with typer Signed-off-by: Jun-Fei Cherng --- requirements.in | 4 +- requirements.txt | 33 +++++---- scripts/extract_class_names.py | 108 +++++++++++++++------------- scripts/extract_class_names_auto.sh | 18 ++--- 4 files changed, 88 insertions(+), 75 deletions(-) diff --git a/requirements.in b/requirements.in index a0c5469..5a82380 100644 --- a/requirements.in +++ b/requirements.in @@ -5,6 +5,6 @@ ruff>=0.3 # for scripts # # ----------- # -fire -requests tinycss2 +typer>=0.12.3,<1 +typing_extensions diff --git a/requirements.txt b/requirements.txt index 8513a74..8cfab3f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,25 +1,28 @@ # This file was autogenerated by uv via the following command: # uv pip compile requirements.in -o requirements.txt -certifi==2024.2.2 - # via requests -charset-normalizer==3.3.2 - # via requests -fire==0.6.0 -idna==3.7 - # via requests +click==8.1.7 + # via typer +colorama==0.4.6 + # via click +markdown-it-py==3.0.0 + # via rich +mdurl==0.1.2 + # via markdown-it-py mypy==1.9.0 mypy-extensions==1.0.0 # via mypy -requests==2.31.0 +pygments==2.17.2 + # via rich +rich==13.7.1 + # via typer ruff==0.3.7 -six==1.16.0 - # via fire -termcolor==2.4.0 - # via fire +shellingham==1.5.4 + # via typer tinycss2==1.2.1 +typer==0.12.3 typing-extensions==4.11.0 - # via mypy -urllib3==2.2.1 - # via requests + # via + # mypy + # typer webencodings==0.5.1 # via tinycss2 diff --git a/scripts/extract_class_names.py b/scripts/extract_class_names.py index 842c35d..8afcc87 100644 --- a/scripts/extract_class_names.py +++ b/scripts/extract_class_names.py @@ -1,59 +1,74 @@ +from __future__ import annotations + +import gzip import io import json import sys +import urllib.request +from collections.abc import Generator, Iterable +from enum import Enum from pathlib import Path -from typing import Generator, Iterable, Optional, Set -import fire -import requests -import tinycss2 -import tinycss2.ast +import tinycss2.ast as cssast +import typer +from tinycss2 import parse_stylesheet +from typing_extensions import Annotated # always use \n as line ending for printing sys.stdout = io.TextIOWrapper(sys.stdout.buffer, newline="\n") +class CliOutputFormat(str, Enum): + JSON = "json" + TEXT = "text" + + +app = typer.Typer(add_completion=False) + + +@app.command() def extract_stylesheet_class_names( - path: str, - output_format: str = "json", - lib_name: str = "UnknownLib", - lib_version: str = "0", + path: Annotated[ + str, + typer.Argument(help="The path or URL of the stylesheet."), + ], + output_format: Annotated[ + CliOutputFormat, + typer.Option(help="The output format."), + ] = CliOutputFormat.JSON, + lib_name: Annotated[ + str, + typer.Option(help="The library name."), + ] = "Untitled", + lib_version: Annotated[ + str, + typer.Option(help="The library version."), + ] = "0", ) -> None: - """ - Extract class names from the stylesheet. - - :param path: The path or URL of the stylesheet - :type path: str - :param output_format: The output format - :type output_format: str - :param lib_name: The library name - :type lib_name: str - :param lib_version: The library version - :type lib_version: str - """ - class_names: Set[str] = set() - content = get_file_content(path) - rules = tinycss2.parse_stylesheet(content) + """Extract class names from the stylesheet.""" + if path.startswith(("http://", "https://")): + content = str(simple_urlopen(path), "utf-8") + else: + content = Path(path).read_text(encoding="utf-8") + class_names: set[str] = set() + rules = parse_stylesheet(content) for rule in rules: - if isinstance(rule, tinycss2.ast.AtRule): - if not rule.content: - continue + if isinstance(rule, cssast.AtRule) and rule.content: class_names |= set(find_class_names(rule.content)) continue - - if isinstance(rule, tinycss2.ast.QualifiedRule): + if isinstance(rule, cssast.QualifiedRule): class_names |= set(find_class_names(rule.prelude)) continue class_names_sorted = sorted(class_names) - if output_format == "json": + if output_format is CliOutputFormat.JSON: print( json.dumps( { "name": lib_name, - "version": str(lib_version), + "version": lib_version, "classes": class_names_sorted, }, ensure_ascii=False, @@ -64,32 +79,27 @@ def extract_stylesheet_class_names( print("\n".join(class_names_sorted)) -def find_class_names(nodes: Iterable[tinycss2.ast.Node]) -> Generator[str, None, None]: - prev_node: Optional[tinycss2.ast.Node] = None +def find_class_names(nodes: Iterable[cssast.Node]) -> Generator[str, None, None]: + prev_node: cssast.Node | None = None for node in nodes: if ( - isinstance(prev_node, tinycss2.ast.LiteralToken) + isinstance(prev_node, cssast.LiteralToken) and prev_node.value == "." - and isinstance(node, tinycss2.ast.IdentToken) + and isinstance(node, cssast.IdentToken) ): yield node.value prev_node = node -def get_file_content(path: str) -> str: - """ - Gets the file content. - - :param path: The path or URL of file - :type path: str - - :returns: The file content. - :rtype: str - """ - if path.startswith(("http://", "https://")): - return str(requests.get(path).content, "utf-8") - return Path(path).read_text(encoding="utf-8") +def simple_urlopen(url: str, *, chunk_size: int = 512 * 1024) -> bytes: + with urllib.request.urlopen(url) as resp: + data = b"" + while chunk := resp.read(chunk_size): + data += chunk + if resp.info().get("Content-Encoding") == "gzip": + data = gzip.decompress(data) + return data if __name__ == "__main__": - fire.Fire(extract_stylesheet_class_names) + app() diff --git a/scripts/extract_class_names_auto.sh b/scripts/extract_class_names_auto.sh index 943edb7..024659c 100755 --- a/scripts/extract_class_names_auto.sh +++ b/scripts/extract_class_names_auto.sh @@ -12,27 +12,27 @@ echo "Download and parse Bootstrap 3..." python \ "${SCRIPT_DIR}/extract_class_names.py" \ "https://cdn.jsdelivr.net/npm/bootstrap@3/dist/css/bootstrap.min.css" \ - --output_format="json" \ - --lib_name="Bootstrap" \ - --lib_version="3" \ + --output-format="json" \ + --lib-name="Bootstrap" \ + --lib-version="3" \ >"${DB_DIR}/3.json" echo "Download and parse Bootstrap 4..." python \ "${SCRIPT_DIR}/extract_class_names.py" \ "https://cdn.jsdelivr.net/npm/bootstrap@4/dist/css/bootstrap.min.css" \ - --output_format="json" \ - --lib_name="Bootstrap" \ - --lib_version="4" \ + --output-format="json" \ + --lib-name="Bootstrap" \ + --lib-version="4" \ >"${DB_DIR}/4.json" echo "Download and parse Bootstrap 5..." python \ "${SCRIPT_DIR}/extract_class_names.py" \ "https://cdn.jsdelivr.net/npm/bootstrap@5/dist/css/bootstrap.min.css" \ - --output_format="json" \ - --lib_name="Bootstrap" \ - --lib_version="5" \ + --output-format="json" \ + --lib-name="Bootstrap" \ + --lib-version="5" \ >"${DB_DIR}/5.json" popd || exit