Skip to content

Commit

Permalink
refactor: scripts/extract_class_names.py with typer
Browse files Browse the repository at this point in the history
Signed-off-by: Jun-Fei Cherng <[email protected]>
  • Loading branch information
jfcherng committed Apr 17, 2024
1 parent 5d666bc commit 41f42a4
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 75 deletions.
4 changes: 2 additions & 2 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ ruff>=0.3
# for scripts #
# ----------- #

fire
requests
tinycss2
typer>=0.12.3,<1
typing_extensions
33 changes: 18 additions & 15 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
# This file was autogenerated by uv via the following command:
# uv pip compile requirements.in -o requirements.txt
certifi==2024.2.2
# via requests
charset-normalizer==3.3.2
# via requests
fire==0.6.0
idna==3.7
# via requests
click==8.1.7
# via typer
colorama==0.4.6
# via click
markdown-it-py==3.0.0
# via rich
mdurl==0.1.2
# via markdown-it-py
mypy==1.9.0
mypy-extensions==1.0.0
# via mypy
requests==2.31.0
pygments==2.17.2
# via rich
rich==13.7.1
# via typer
ruff==0.3.7
six==1.16.0
# via fire
termcolor==2.4.0
# via fire
shellingham==1.5.4
# via typer
tinycss2==1.2.1
typer==0.12.3
typing-extensions==4.11.0
# via mypy
urllib3==2.2.1
# via requests
# via
# mypy
# typer
webencodings==0.5.1
# via tinycss2
108 changes: 59 additions & 49 deletions scripts/extract_class_names.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,74 @@
from __future__ import annotations

import gzip
import io
import json
import sys
import urllib.request
from collections.abc import Generator, Iterable
from enum import Enum
from pathlib import Path
from typing import Generator, Iterable, Optional, Set

import fire
import requests
import tinycss2
import tinycss2.ast
import tinycss2.ast as cssast
import typer
from tinycss2 import parse_stylesheet
from typing_extensions import Annotated

# always use \n as line ending for printing
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, newline="\n")


class CliOutputFormat(str, Enum):
JSON = "json"
TEXT = "text"


app = typer.Typer(add_completion=False)


@app.command()
def extract_stylesheet_class_names(
path: str,
output_format: str = "json",
lib_name: str = "UnknownLib",
lib_version: str = "0",
path: Annotated[
str,
typer.Argument(help="The path or URL of the stylesheet."),
],
output_format: Annotated[
CliOutputFormat,
typer.Option(help="The output format."),
] = CliOutputFormat.JSON,
lib_name: Annotated[
str,
typer.Option(help="The library name."),
] = "Untitled",
lib_version: Annotated[
str,
typer.Option(help="The library version."),
] = "0",
) -> None:
"""
Extract class names from the stylesheet.
:param path: The path or URL of the stylesheet
:type path: str
:param output_format: The output format
:type output_format: str
:param lib_name: The library name
:type lib_name: str
:param lib_version: The library version
:type lib_version: str
"""
class_names: Set[str] = set()
content = get_file_content(path)
rules = tinycss2.parse_stylesheet(content)
"""Extract class names from the stylesheet."""
if path.startswith(("http://", "https://")):
content = str(simple_urlopen(path), "utf-8")
else:
content = Path(path).read_text(encoding="utf-8")

class_names: set[str] = set()
rules = parse_stylesheet(content)
for rule in rules:
if isinstance(rule, tinycss2.ast.AtRule):
if not rule.content:
continue
if isinstance(rule, cssast.AtRule) and rule.content:
class_names |= set(find_class_names(rule.content))
continue

if isinstance(rule, tinycss2.ast.QualifiedRule):
if isinstance(rule, cssast.QualifiedRule):
class_names |= set(find_class_names(rule.prelude))
continue

class_names_sorted = sorted(class_names)

if output_format == "json":
if output_format is CliOutputFormat.JSON:
print(
json.dumps(
{
"name": lib_name,
"version": str(lib_version),
"version": lib_version,
"classes": class_names_sorted,
},
ensure_ascii=False,
Expand All @@ -64,32 +79,27 @@ def extract_stylesheet_class_names(
print("\n".join(class_names_sorted))


def find_class_names(nodes: Iterable[tinycss2.ast.Node]) -> Generator[str, None, None]:
prev_node: Optional[tinycss2.ast.Node] = None
def find_class_names(nodes: Iterable[cssast.Node]) -> Generator[str, None, None]:
prev_node: cssast.Node | None = None
for node in nodes:
if (
isinstance(prev_node, tinycss2.ast.LiteralToken)
isinstance(prev_node, cssast.LiteralToken)
and prev_node.value == "."
and isinstance(node, tinycss2.ast.IdentToken)
and isinstance(node, cssast.IdentToken)
):
yield node.value
prev_node = node


def get_file_content(path: str) -> str:
"""
Gets the file content.
:param path: The path or URL of file
:type path: str
:returns: The file content.
:rtype: str
"""
if path.startswith(("http://", "https://")):
return str(requests.get(path).content, "utf-8")
return Path(path).read_text(encoding="utf-8")
def simple_urlopen(url: str, *, chunk_size: int = 512 * 1024) -> bytes:
with urllib.request.urlopen(url) as resp:
data = b""
while chunk := resp.read(chunk_size):
data += chunk
if resp.info().get("Content-Encoding") == "gzip":
data = gzip.decompress(data)
return data


if __name__ == "__main__":
fire.Fire(extract_stylesheet_class_names)
app()
18 changes: 9 additions & 9 deletions scripts/extract_class_names_auto.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,27 @@ echo "Download and parse Bootstrap 3..."
python \
"${SCRIPT_DIR}/extract_class_names.py" \
"https://cdn.jsdelivr.net/npm/bootstrap@3/dist/css/bootstrap.min.css" \
--output_format="json" \
--lib_name="Bootstrap" \
--lib_version="3" \
--output-format="json" \
--lib-name="Bootstrap" \
--lib-version="3" \
>"${DB_DIR}/3.json"

echo "Download and parse Bootstrap 4..."
python \
"${SCRIPT_DIR}/extract_class_names.py" \
"https://cdn.jsdelivr.net/npm/bootstrap@4/dist/css/bootstrap.min.css" \
--output_format="json" \
--lib_name="Bootstrap" \
--lib_version="4" \
--output-format="json" \
--lib-name="Bootstrap" \
--lib-version="4" \
>"${DB_DIR}/4.json"

echo "Download and parse Bootstrap 5..."
python \
"${SCRIPT_DIR}/extract_class_names.py" \
"https://cdn.jsdelivr.net/npm/bootstrap@5/dist/css/bootstrap.min.css" \
--output_format="json" \
--lib_name="Bootstrap" \
--lib_version="5" \
--output-format="json" \
--lib-name="Bootstrap" \
--lib-version="5" \
>"${DB_DIR}/5.json"

popd || exit

0 comments on commit 41f42a4

Please sign in to comment.