Skip to content

Commit

Permalink
Ignore coverage in areas we don't currently care about coverage
Browse files Browse the repository at this point in the history
Ignore in registry.py as while covered, the information is not helpful.
Ignore coverage in Canonicalization as there are no real are no tests
for this currently, and we simply want to make sure we are covering
all the pre-canonicalization logic.
  • Loading branch information
facelessuser committed Sep 8, 2019
1 parent b6d8dcd commit db36b41
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 20 deletions.
7 changes: 7 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[run]
omit=
soupsieve/css_lang/registry.py

[report]
omit=
soupsieve/css_lang/registry.py
40 changes: 20 additions & 20 deletions soupsieve/css_lang/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Language handler."""
from __future__ import unicode_literals
import re
from .registry import registry

Expand All @@ -9,7 +10,6 @@
# Though the ABNF support 3 extlang tags, any tags that use these
# ranges are invalid, and always will be, per RCF5646.
# Though wellformed, We cannot canonicalize an invalid tag.
# `WELLFORMED_EXTLANG = r"[a-z]{3}(?:-[a-z]{3}){0,2}"`
EXTLANG = r"[a-z]{3}(?:-[a-z]{3}){0,2}"
SCRIPT = r"[a-z]{4}"
REGION = r"(?:[a-z]{2}|[0-9]{3})"
Expand Down Expand Up @@ -74,7 +74,7 @@ def normalize(language):
return RE_WILD_STRIP.sub('-', language).lower()


class Canonicalize(object):
class Canonicalize(object): # pragma: no cover
"""
Canonicalize language tags and ranges.
Expand Down Expand Up @@ -119,7 +119,7 @@ def __init__(self, language, debug=False):
self.is_range = '*' in language
self.language = language

def canonicalize_language(self):
def _canonicalize_language(self):
"""Canonicalize language tags."""

language = self.parts['language']
Expand Down Expand Up @@ -163,7 +163,7 @@ def canonicalize_language(self):
self.parts['language'] = primary
self.parts['extlang'] = ''

def canonicalize_script(self):
def _canonicalize_script(self):
"""Canonicalize script tags."""

script = self.parts['script'].lstrip('-')
Expand All @@ -176,7 +176,7 @@ def canonicalize_script(self):
if value:
self.parts['script'] = '-' + value

def canonicalize_region(self):
def _canonicalize_region(self):
"""Canonicalize region tags."""

region = self.parts['region'].lstrip('-')
Expand All @@ -190,7 +190,7 @@ def canonicalize_region(self):
if value:
self.parts['region'] = '-' + value

def canonicalize_variant(self):
def _canonicalize_variant(self):
"""Canonicalize variant tags."""

variant = self.parts['variant'].lstrip('-')
Expand Down Expand Up @@ -255,7 +255,7 @@ def canonicalize_variant(self):
if variant != ordered:
self.parts['variant'] = '-' + ordered

def canonicalize_extension(self):
def _canonicalize_extension(self):
"""Canonicalize extension."""

extension = self.parts['extension']
Expand All @@ -264,7 +264,7 @@ def canonicalize_extension(self):
extension = '-'.join(sorted(RE_EXT_SPLIT.split(extension.lstrip('-'))))
self.parts['extension'] = '-' + extension if extension else extension

def canonicalize_redundant_grandfathered(self):
def _canonicalize_redundant_grandfathered(self):
"""Canonicalize redundant tags."""

lang = None
Expand All @@ -283,7 +283,7 @@ def canonicalize_redundant_grandfathered(self):
for k, v in RE_LANGUAGETAG.match(lang).groupdict(default='').items():
self.parts[k] = v

def to_extlang_form(self):
def _to_extlang_form(self):
"""Convert to extlang form."""

primary = self.parts['language']
Expand All @@ -293,7 +293,7 @@ def to_extlang_form(self):
self.parts['language'] = r'{}-{}'.format(prefix[0], primary)
self.parts['extlang'] = r'-{}'.format(primary)

def format_case(self):
def _format_case(self):
"""Format case."""

if not self.is_range or not self.parts['language'].startswith('*'):
Expand All @@ -318,17 +318,17 @@ def _canonicalize(self, extlang_form=False):
if self.parts and not self.parts['extlang'].count('-') > 1:
try:
if not m.group('private'):
self.canonicalize_extension()
self.canonicalize_redundant_grandfathered()
self.canonicalize_language()
self.canonicalize_script()
self.canonicalize_region()
self.canonicalize_variant()
self._canonicalize_extension()
self._canonicalize_redundant_grandfathered()
self._canonicalize_language()
self._canonicalize_script()
self._canonicalize_region()
self._canonicalize_variant()

if extlang_form:
self.to_extlang_form()
self._to_extlang_form()

self.format_case()
self._format_case()

# Rebuild the tag. Some variables may be empty strings.
# For instance, when grandfathered is present, nothing else is.
Expand All @@ -343,7 +343,7 @@ def _canonicalize(self, extlang_form=False):
print('Canonicalized: ', lang)
return lang

def canonicalize(self):
def canonicalize(self): # pragma: no cover
"""Canonicalize."""

return self._canonicalize()
Expand Down Expand Up @@ -414,7 +414,7 @@ def extended_filter(lang_range, lang_tags, canonicalize=False):
return matches


def basic_filter(lang_range, lang_tags, canonicalize=False):
def basic_filter(lang_range, lang_tags, canonicalize=False): # pragma: no cover
"""Language tags."""

lang_range = normalize(Canonicalize(lang_range).canonicalize_extlang() if canonicalize else lang_range)
Expand Down
23 changes: 23 additions & 0 deletions tests/test_lang_canonicalization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
Test language tag/range canonicalization.
```
# grandfathered registrations
'zh-hakka' => "hak"
# canonical and extlang forms
'sgn-jsl' => "jsl" => "sgn-jsl"
# variants
'ja-Latn-hepburn-heploc' => "ja-Latn-hepburn-alalc97"
# etc.
```
"""
from __future__ import unicode_literals
import unittest


class TestLangCanonicalization(unittest.TestCase):
"""Test language canonicalization."""
11 changes: 11 additions & 0 deletions tests/test_level4/test_lang.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ def test_lang(self):
flags=util.HTML
)

def test_lang_missing_range(self):
"""Test language range with a missing range."""

# Implicit wild
self.assert_selector(
self.MARKUP,
"p:lang(de--DE)",
[],
flags=util.HTML
)

def test_explicit_wildcard(self):
"""Test language with explicit wildcard (same as implicit)."""

Expand Down
1 change: 1 addition & 0 deletions tools/gen_iana_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ class Redundant(Tag):

with codecs.open('soupsieve/css_lang/registry.py', 'w', encoding='utf-8') as f:
f.write('"""IANA Registry."""\n')
f.write('from __future__ import unicode_literals\n\n')
f.write('registry = {\n')
f.write(' "filedate": {!r},\n'.format(date))
for key in sorted(data.keys()):
Expand Down

0 comments on commit db36b41

Please sign in to comment.