Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add JSON converter #19

Merged
merged 17 commits into from
Jan 12, 2024
2 changes: 2 additions & 0 deletions .github/workflows/.clear-in-built
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.github/*
scripts/*
53 changes: 42 additions & 11 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,51 @@ on:
branches: [ 'main' ]
pull_request:
branches: [ 'main' ]
types: [opened, synchronize, reopened, ready_for_review]

jobs:
validate:
name: Validate XML files with XMLStarlet
build:
name: Build and validate token sheets
runs-on: ubuntu-latest

if: github.event.pull_request.draft == false
steps:
- uses: actions/checkout@v3

- name: Validate 73 tokens
uses: Mudlet/[email protected]
- name: Checkout sheets
uses: actions/checkout@v4

- name: Run build script
run: |
mkdir built
python -m scripts.build

- name: Upload artifact
uses: actions/upload-artifact@v4
with:
args: 'val -b 73.xml'

- name: Validate 8X tokens
uses: Mudlet/[email protected]
name: built
path: built/

commit:
name: Push sheets to built branch
runs-on: ubuntu-latest

permissions: write-all
needs: build

if: github.ref == 'refs/heads/main' && github.event_name == 'push'
steps:
- name: Download artifact
uses: actions/download-artifact@v4
with:
args: 'val -b 8X.xml'
name: built
path: built/

- name: Save to built branch
uses: s0/git-publish-subdir-action@develop
env:
REPO: self
BRANCH: built
FOLDER: built
SKIP_EMPTY_COMMITS: true
CLEAR_GLOBS_FILE: ".github/.clear-in-built"
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
MESSAGE: "Build {sha}: {msg}"
4 changes: 3 additions & 1 deletion scripts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from .formats import to_json, validate
from .parse import Token, Tokens, OsVersion, OsVersions, Translation
from .tokenide import TokenIDESheet
from .trie import TokenTrie

__all__ = ["Token", "Tokens", "OsVersion", "OsVersions", "Translation", "TokenIDESheet", "TokenTrie"]
__all__ = ["Token", "Tokens", "OsVersion", "OsVersions", "Translation",
"TokenTrie", "TokenIDESheet", "to_json", "validate"]
23 changes: 23 additions & 0 deletions scripts/build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import json
import xml.etree.ElementTree as ET

from .formats import *


with open("8X.xml", encoding="UTF-8") as infile:
root = ET.fromstring(src := infile.read())

with open("built/8X.xml", "w+", encoding="UTF-8") as outfile:
validate(root)
outfile.write(src)

with open("built/8X.json", "w+", encoding="UTF-8") as outfile:
json.dump(to_json(root), outfile, indent=2, ensure_ascii=False)


with open("73.xml", encoding="UTF-8") as infile:
root = ET.fromstring(src := infile.read())

with open("built/73.xml", "w+", encoding="UTF-8") as outfile:
validate(root, for_73=True)
outfile.write(src)
200 changes: 200 additions & 0 deletions scripts/formats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
import json
import re
import xml.etree.ElementTree as ET

from collections import defaultdict

from .parse import OsVersion, OsVersions


def validate(root: ET.Element, *, for_73: bool = False) -> int:
"""
Validates a token sheet, raising an error if an invalid component is found

:param root: An XML element, which must be the root element of the sheet
:param for_73: Whether to use the 73 sheet validator (defaults to False)
:return: The number of tokens in the sheet
"""

if root.tag != "tokens":
raise ValueError("not a token sheet")

all_tokens = set()
all_names = {}

version = None

def visit(element: ET.Element, byte: str = "", lang: str = ""):
nonlocal version

byte += element.attrib.get("value", "").lstrip("$")
lang += element.attrib.get("code", "")

class ValidationError(ValueError):
__qualname__ = "ValidationError"

def __init__(self, message: str):
super().__init__((f"token 0x{byte}: " if byte else "root: ") + message)

# Require attributes matching regexes
def attributes(attrs: dict[str, str]):
attrib = element.attrib.copy()

for attr, regex in attrs.items():
if attr not in attrib:
raise ValidationError(f"<{element.tag}> does not have attribute {attr}")

if not re.fullmatch(regex, value := attrib.pop(attr)):
raise ValidationError(f"<{element.tag}> {attr} '{value}' does not match r'{regex}'")

if attrib:
raise ValidationError(f"<{element.tag}> has unexpected attribute {[*attrib.values()][0]}")

# Require child tags to match regex when appended in order
def children(regex: str):
if not re.fullmatch(regex, "".join(f"<{child.tag}>" for child in element)):
raise ValidationError(f"children of <{element.tag}> do not match r'{regex}'")

# Require text to match regex
def text(regex: str):
if not re.fullmatch(regex, element.text):
raise ValidationError(f"<{element.tag}> text '{element.text}' does not match r'{regex}'")

# Check requirements for each tag
match element.tag:
case "tokens":
children(r"(<token>|<two-byte>)+")

case "two-byte":
attributes({"value": r"\$[0-9A-F]{2}"})
children(r"(<token>)+")

case "token":
attributes({"value": r"\$[0-9A-F]{2}"})
children(r"(<version>)+")

if byte in all_tokens:
raise ValidationError("token byte must be unique")

all_tokens.add(byte)

case "version":
version = OsVersions.INITIAL
children(r"<since>(<until>)?(<lang>)+")

case "since":
if not for_73:
if (this_version := OsVersion.from_element(element)) < version:
raise ValidationError(f"version {this_version} overlaps with {version}")

version = this_version

# Workaround for nested defaultdict
all_names[version] = all_names.get(version, defaultdict(set))

children(r"<model><os-version>")

case "until":
children(r"<model><os-version>")

case "lang":
attributes({"code": r"[a-z]{2}"} if for_73 else {"code": r"[a-z]{2}", "ti-ascii": r"([0-9A-F]{2})+"})
children(r"<name>" if for_73 else r"<display><accessible>(<variant>)*")

case "name" if for_73:
text(r"[\S\s]+")

case "display":
text(r"[\S\s]+")

case "accessible":
text(r"[\u0000-\u00FF]*")

if element.text in all_names[version][lang]:
raise ValidationError(f"{lang} accessible name '{element.text}' is not unique within {version}")

all_names[version][lang].add(element.text)

case "variant":
text(r".+")

if element.text in all_names[version][lang]:
raise ValidationError(f"{lang} variant name '{element.text}' is not unique within {version}")

all_names[version][lang].add(element.text)

case "model":
text(r"TI-\d\d.*")

case "os-version":
text(r"(\d+\.)+\d+")

case _:
raise ValidationError(f"unrecognized tag <{element.tag}>")

# Visit children
for child in element:
visit(child, byte, lang)

visit(root)
return len(all_tokens)


def to_json(element: ET.Element):
"""
Converts a token sheet to an equivalent JSON representation

:param element: An XML element; call on the root element to convert the entire sheet
:return: The element and all its descendants as JSON
"""

match element.tag:
case "tokens" | "two-byte":
return {child.attrib["value"]: to_json(child) for child in element}

case "token":
return [to_json(child) for child in element]

case "version":
dct = {}
langs = {}

for child in element:
if child.tag == "lang":
langs[child.attrib["code"]] = to_json(child)

else:
dct[child.tag] = to_json(child)

return dct | {"langs": langs}

case "lang":
dct = {"ti-ascii": element.attrib["ti-ascii"]}
variants = []

for child in element:
if child.tag == "variant":
variants.append(child.text)

else:
dct[child.tag] = child.text

if variants:
return dct | {"variants": variants}

else:
return dct

case _:
if list(element):
return {child.tag: to_json(child) for child in element}

else:
return element.text


# with open("../8X.xml", encoding="UTF-8") as file:
# json.dumps(to_json(ET.fromstring(file.read())), indent=2)


__all__ = ["to_json", "validate"]
2 changes: 1 addition & 1 deletion scripts/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@


@functools.total_ordering
@dataclass
@dataclass(frozen=True)
class OsVersion:
"""
Data class for defining and comparing OS versions
Expand Down