Skip to content

Commit

Permalink
Fix docstring parsing edge case
Browse files Browse the repository at this point in the history
  • Loading branch information
brentyi committed Oct 26, 2021
1 parent a32ee3b commit 4bad48e
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 19 deletions.
67 changes: 48 additions & 19 deletions dcargs/_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
@dataclasses.dataclass
class _Token:
token_type: int
token: str
content: str
line_number: int


Expand Down Expand Up @@ -46,20 +46,20 @@ def make(cls) -> "_Tokenization":
line_number += 1
tokens_from_line[line_number] = []
elif toktype is not tokenize.INDENT:
token = _Token(token_type=toktype, token=tok, line_number=line_number)
token = _Token(token_type=toktype, content=tok, line_number=line_number)
tokens.append(token)
tokens_from_line[line_number].append(token)

prev_field_line_number: int = 1
for i, token in enumerate(tokens[:-1]):
if token.token_type == tokenize.NAME:
# Naive heuristic for field names
# This will currently catch variable/argument annotations as well
if (
tokens[i + 1].token == ":"
and token.token not in field_data_from_name
tokens[i + 1].content == ":"
and tokens[i] == tokens_from_line[tokens[i].line_number][0]
and token.content not in field_data_from_name
):
field_data_from_name[token.token] = _FieldData(
field_data_from_name[token.content] = _FieldData(
index=i,
line_number=token.line_number,
prev_field_line_number=prev_field_line_number,
Expand Down Expand Up @@ -96,22 +96,51 @@ def get_field_docstring(cls: Type, field_name: str) -> Optional[str]:
field_data = tokenization.field_data_from_name[field_name]

# Check for docstring-style comment.
if (
field_data.line_number + 1 in tokenization.tokens_from_line
and len(tokenization.tokens_from_line[field_data.line_number + 1]) > 0
line_number = field_data.line_number + 1
while (
line_number in tokenization.tokens_from_line
and len(tokenization.tokens_from_line[line_number]) > 0
):
first_token_on_next_line = tokenization.tokens_from_line[
field_data.line_number + 1
][0]
if first_token_on_next_line.token_type == tokenize.STRING:
docstring = first_token_on_next_line.token.strip()
assert docstring.endswith('"""') and docstring.startswith('"""')
return _strings.dedent(docstring[3:-3])
first_token = tokenization.tokens_from_line[line_number][0]
first_token_content = first_token.content.strip()

# Found a docstring!
if (
first_token.token_type == tokenize.STRING
and first_token_content.startswith('"""')
and first_token_content.endswith('"""')
):
return _strings.dedent(first_token_content[3:-3])

# Found the next field.
if (
first_token.token_type == tokenize.NAME
and len(tokenization.tokens_from_line[line_number]) >= 2
and tokenization.tokens_from_line[line_number][1].content == ":"
):
break

# Found a method.
if first_token.content == "def":
break

line_number += 1
# if (
# field_data.line_number + 1 in tokenization.tokens_from_line
# and len(tokenization.tokens_from_line[field_data.line_number + 1]) > 0
# ):
# first_token_on_next_line = tokenization.tokens_from_line[
# field_data.line_number + 1
# ][0]
# if first_token_on_next_line.token_type == tokenize.STRING:
# docstring = first_token_on_next_line.token.strip()
# assert docstring.endswith('"""') and docstring.startswith('"""')
# return _strings.dedent(docstring[3:-3])

# Check for comment on the same line as the field.
final_token_on_line = tokenization.tokens_from_line[field_data.line_number][-1]
if final_token_on_line.token_type == tokenize.COMMENT:
comment: str = final_token_on_line.token
comment: str = final_token_on_line.content
assert comment.startswith("#")
return comment[1:].strip()

Expand All @@ -125,8 +154,8 @@ def get_field_docstring(cls: Type, field_name: str) -> Optional[str]:
comment_token.token_type == tokenize.COMMENT
and comment_token.line_number > field_data.prev_field_line_number
):
assert comment_token.token.startswith("#")
comments.append(comment_token.token[1:].strip())
assert comment_token.content.startswith("#")
comments.append(comment_token.content[1:].strip())
else:
break
if len(comments) > 0:
Expand Down
1 change: 1 addition & 0 deletions dcargs/_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

def parse(
cls: Type[DataclassType],
*,
description: Optional[str] = None,
args: Optional[Sequence[str]] = None,
) -> DataclassType:
Expand Down
21 changes: 21 additions & 0 deletions tests/test_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,24 @@ class Config:
helptext = f.getvalue()
print(helptext)
assert " --x INT An optional variable. (default: None)\n" in helptext


def test_helptext_hard_string():
@dataclasses.dataclass
class HelptextHardString:
# fmt: off
x: str = (
"This docstring may be tougher to parse!"
)
"""Helptext."""
# fmt: on

f = io.StringIO()
with pytest.raises(SystemExit):
with contextlib.redirect_stdout(f):
dcargs.parse(HelptextHardString, args=["--help"])
helptext = f.getvalue()
assert (
"--x STR Helptext. (default: This docstring may be tougher to parse!)\n"
in helptext
)

0 comments on commit 4bad48e

Please sign in to comment.